[ { "id": "0074qaufB6", "title": "InfoNet: Missing Information Retrieval in Multi-Stream Sensing Systems", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Faulty sensors in a multiple input stream setup are more prone to corrupted input data streams, hindering the performance of Deep Neural Networks (DNN), which focus on deducing information from data. However, the relevant information among multiple input streams has correlations and contains mutual information. This paper utilizes this opportunity to retrieve perturbed information caused by corrupted input streams. We propose InfoNet, which estimates the information entropy at every element of the input feature to the network and retrieves the missing information in the input feature matrix. Finally, using the estimated information entropy and retrieved data, we introduce a novel guided replacement procedure to recover the complete information that is the input to the downstream DNN task. We evaluate the proposed algorithm for sound localization where audio streams from the microphone array are corrupted. We have recovered the performance drop due to the corrupted input stream and reduced the localization error with non-corrupted input streams. Finally, we assess the potential of using the proposed algorithm for retrieving information in other sensing modalities, e.g., wireless signal-based source localization.", "keywords": "Early attention;Feature Recovery;Information theory;Entropy;Multi-stream sensing system", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Subrata Kumar Biswas;Bashima Islam", "authorids": "~Subrata_Kumar_Biswas1;~Bashima_Islam1", "gender": "M;F", "homepage": "https://users.wpi.edu/~sbiswas/;https://users.wpi.edu/~bislam/", "dblp": ";188/6243", "google_scholar": "xuMQYBgAAAAJ;SxAZLx8AAAAJ", "orcid": "0000-0002-2670-0115;", "linkedin": "subrata-biswas-433247142/;bashimaislam", "or_profile": "~Subrata_Kumar_Biswas1;~Bashima_Islam1", "aff": "Meta Facebook;Worcester Polytechnic Institute", "aff_domain": "meta.com;wpi.edu", "position": "Intern;Assistant Professor", "bibtex": "@misc{\nbiswas2024infonet,\ntitle={InfoNet: Missing Information Retrieval in Multi-Stream Sensing Systems},\nauthor={Subrata Kumar Biswas and Bashima Islam},\nyear={2024},\nurl={https://openreview.net/forum?id=0074qaufB6}\n}", "github": "", "project": "", "reviewers": "fMm6;tZQw;9qjF", "site": "https://openreview.net/forum?id=0074qaufB6", "pdf_size": 4860378, "rating": "1;3;5", "confidence": "4;3;4", "soundness": "2;3;2", "contribution": "1;2;2", "presentation": "2;2;3", "wc_summary": "97;80;124", "wc_strengths": "49;53;168", "wc_weaknesses": "350;172;196", "wc_questions": "48;41;182", "wc_review": "544;346;670", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 100.33333333333333, 18.116904322268255 ], "wc_strengths_avg": [ 90.0, 55.17849822772152 ], "wc_weaknesses_avg": [ 239.33333333333334, 78.86415887813396 ], "wc_questions_avg": [ 90.33333333333333, 64.88108781112996 ], "wc_review_avg": [ 520.0, 133.3566646253572 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ao_75FN8juAJ:scholar.google.com/&scioq=InfoNet:+Missing+Information+Retrieval+in+Multi-Stream+Sensing+Systems&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Worcester Polytechnic Institute", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.wpi.edu", "aff_unique_abbr": "Meta;WPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "014CgNPAGy", "title": "On the Role of Momentum in the Implicit Bias of Gradient Descent for Diagonal Linear Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Momentum is a widely adopted and crucial modification to gradient descent when training modern deep neural networks. In this paper, we target on the regularization effect of momentum-based methods in regression settings and analyze a popular proxy model, diagonal linear networks, to precisely characterize the implicit bias of heavy-ball (HB) and Nesterov's method of accelerated gradients (NAG). We show that, HB and NAG exhibit different implicit bias compared to GD for diagonal linear networks, which is different from the one for classic linear regression problem where momentum-based methods share the same implicit bias with GD. Specifically, the role of momentum in the implicit bias of GD is twofold. On one hand, HB and NAG induce extra initialization mitigation effects similar to SGD that are beneficial for generalization of sparse regression. On the other hand, besides the initialization of parameters, the implicit regularization effects of HB and NAG also depend on the initialization of gradients explicitly, which may not be benign for generalization. As a consequence, whether HB and NAG have better generalization properties than GD jointly depends on the aforementioned twofold effects determined by various parameters such as learning rate, momentum factor, data matrix, and integral of gradients. Particularly, the difference between the implicit bias of GD and that of HB and NAG disappears for small learning rate. Our findings highlight the potential beneficial role of momentum and can help understand its advantages in practice from the perspective of generalization.", "keywords": "GD;momentum;implicit bias;linear networks", "primary_area": "optimization", "supplementary_material": "", "author": "Bochen Lyu;Zhanxing Zhu", "authorids": "~Bochen_Lyu1;~Zhanxing_Zhu1", "gender": ";M", "homepage": ";https://zhanxingzhu.github.io/", "dblp": ";87/7756.html", "google_scholar": ";a2sHceIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Bochen_Lyu1;~Zhanxing_Zhu1", "aff": ";University of Southampton", "aff_domain": ";soton.ac.uk", "position": ";Associate Professor", "bibtex": "@misc{\nanonymous2024on,\ntitle={On the Role of Momentum in the Implicit Bias of Gradient Descent for Diagonal Linear Networks},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=014CgNPAGy}\n}", "github": "", "project": "", "reviewers": "oaZ7;YbMr;vt7i;wNfT", "site": "https://openreview.net/forum?id=014CgNPAGy", "pdf_size": 537004, "rating": "3;5;5;6", "confidence": "4;5;4;3", "soundness": "1;1;3;3", "contribution": "1;1;2;3", "presentation": "2;3;3;3", "wc_summary": "47;156;94;104", "wc_strengths": "31;8;15;95", "wc_weaknesses": "219;937;242;65", "wc_questions": "6;53;5;6", "wc_review": "303;1154;356;270", "wc_reply_reviewers": "198;899;68;10", "wc_reply_authors": "514;2139;1249;161", "reply_reviewers": "1;2;1;1", "reply_authors": "1;4;3;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 1.0 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.25, 38.71934271136327 ], "wc_strengths_avg": [ 37.25, 34.368408458932166 ], "wc_weaknesses_avg": [ 365.75, 336.7590941607962 ], "wc_questions_avg": [ 17.5, 20.5 ], "wc_review_avg": [ 520.75, 366.8919289109533 ], "wc_reply_reviewers_avg": [ 293.75, 356.01009466024976 ], "wc_reply_authors_avg": [ 1015.75, 758.0314554818949 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:G_DlNajnXBEJ:scholar.google.com/&scioq=On+the+Role+of+Momentum+in+the+Implicit+Bias+of+Gradient+Descent+for+Diagonal+Linear+Networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Southampton", "aff_unique_dep": "", "aff_unique_url": "https://www.southampton.ac.uk", "aff_unique_abbr": "Southampton", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "id": "01Yi8rzoNs", "title": "Visual Chain of Thought: Bridging Logical Gaps with Multimodal Infillings", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent advances in large language models elicit reasoning in a chain-of-thought that allows models to decompose problems in a human-like fashion. Though this paradigm improves multi-step reasoning ability in language models, it is limited by being unimodal and applied mainly to question-answering tasks. We claim that incorporating visual augmentation into reasoning is essential, especially for complex, imaginative tasks. Consequently, we introduce VCoT, a novel method that leverages chain-of-thought prompting with vision-language grounding to recursively bridge the logical gaps within sequential data. Our method uses visual guidance to generate synthetic multimodal infillings that add consistent and novel information to reduce the logical gaps for downstream tasks that can benefit from temporal reasoning, as well as provide interpretability into models' multi-step reasoning. We apply VCoT to the Visual Storytelling and WikiHow summarization datasets and demonstrate through human evaluation that VCoT offers novel and consistent synthetic data augmentation beating chain-of-thought baselines, which can be used to enhance downstream performance.", "keywords": "chain of thought;vision and language;large language models;reasoning", "primary_area": "generative models", "supplementary_material": "", "author": "Daniel Philip Rose;Vaishnavi Himakunthala;Andy Ouyang;Ryan He;Alex Mei;Yujie Lu;Michael Saxon;Chinmay Sonar;Diba Mirza;William Yang Wang", "authorids": "~Daniel_Philip_Rose1;~Vaishnavi_Himakunthala1;~Andy_Ouyang1;~Ryan_He1;~Alex_Mei1;~Yujie_Lu1;~Michael_Saxon1;~Chinmay_Sonar1;~Diba_Mirza1;~William_Yang_Wang2", "gender": "M;F;M;;;;M;M;F;M", "homepage": ";;;;http://sites.cs.ucsb.edu/~alexmei/;https://yujielu10.github.io/;https://saxon.me;https://chinmaysonar.github.io/;https://sites.cs.ucsb.edu/~dimirza/;https://www.cs.ucsb.edu/~william/", "dblp": ";;;;;;222/6656;207/0890;;08/9282", "google_scholar": ";;;;GOrfNGAAAAAJ;pcmr6GMAAAAJ;pAlwjdgAAAAJ;-6Rg0WcAAAAJ;https://scholar.google.com/citations?hl=en;gf8Ms_8AAAAJ", "orcid": ";;;;;;;;;", "linkedin": "danny-rose-2075651a7/;vaishnavihimakunthala/;andy--ouyang/;ryanhe02/;alexmeigz/;;;;diba-mirza-b3150914b/;", "or_profile": "~Daniel_Philip_Rose1;~Vaishnavi_Himakunthala1;~Andy_Ouyang1;~Ryan_He1;~Alex_Mei1;~Yujie_Lu1;~Michael_Saxon1;~Chinmay_Sonar1;~Diba_Mirza1;~William_Wang1", "aff": ", University of California, Santa Barbara;, University of California, Santa Barbara;University of California, Santa Barbara;University of California, Santa Barbara;;UC Santa Barbara;Advanced Micro Devices;University of California, Santa Barbara;University of California, Santa Barbara;UC Santa Barbara", "aff_domain": "cs.ucsb.edu;cs.ucsb.edu;ucsb.edu;ucsb.edu;;ucsb.edu;amd.com;ucsb.edu;ucsb.edu;ucsb.edu", "position": "Undergrad student;Undergrad student;Undergrad student;Intern;;PhD student;Intern;PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\nrose2024visual,\ntitle={Visual Chain of Thought: Bridging Logical Gaps with Multimodal Infillings},\nauthor={Daniel Philip Rose and Vaishnavi Himakunthala and Andy Ouyang and Ryan He and Alex Mei and Yujie Lu and Michael Saxon and Chinmay Sonar and Diba Mirza and William Yang Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=01Yi8rzoNs}\n}", "github": "", "project": "", "reviewers": "E16R;a5oz;dqPG;BRKH", "site": "https://openreview.net/forum?id=01Yi8rzoNs", "pdf_size": 6739734, "rating": "3;3;5;6", "confidence": "4;4;4;4", "soundness": "2;2;2;3", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "136;55;76;74", "wc_strengths": "27;31;117;55", "wc_weaknesses": "285;261;411;235", "wc_questions": "4;21;109;52", "wc_review": "452;368;713;416", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.25, 30.425112982534674 ], "wc_strengths_avg": [ 57.5, 35.98263470064414 ], "wc_weaknesses_avg": [ 298.0, 67.59437846448475 ], "wc_questions_avg": [ 46.5, 39.978119015281344 ], "wc_review_avg": [ 487.25, 133.69999065071022 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4674612661234009738&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;1;0;0;0", "aff_unique_norm": "University of California, Santa Barbara;Advanced Micro Devices, Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsb.edu;https://www.amd.com", "aff_unique_abbr": "UCSB;AMD", "aff_campus_unique_index": "0;0;0;0;0;0;0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "01ep65umEr", "title": "TeLLMe what you see: Using LLMs to Explain Neurons in Vision Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "As the role of machine learning models continues to expand across diverse fields, the demand for model interpretability grows. This is particularly crucial for deep learning models, which are often referred to as black boxes, due to their highly nonlinear nature. This paper proposes a novel method for generating and evaluating concise explanations for the behavior of specific neurons in trained vision models. Doing so signifies an important step towards better understanding the decision making in neural networks. Our technique draws inspiration from a recently published framework that utilized GPT-4 for interpretability of language models. Here, we extend and expand the method to vision models, offering interpretations based on both neuron activations and weights in the network. We illustrate our approach using an AlexNet model and ViT trained on ImageNet, generating clear, human-readable explanations. Our method outperforms the current state-of-the-art in both quantitative and qualitative assessments, while also demonstrating superior capacity in capturing polysemic neuron behavior. The findings hold promise for enhancing transparency, trust and understanding in the deployment of deep learning vision models across various domains. The relevant code can be found in our GitHub repository.", "keywords": "Explainable AI;Explaining Neurons in Vision Models", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Leon Guertler;M Ganesh Kumar;Anh Tuan Luu;Cheston Tan", "authorids": "~Leon_Guertler1;~M_Ganesh_Kumar1;~Anh_Tuan_Luu2;~Cheston_Tan1", "gender": "M;M;M;M", "homepage": ";https://mgkumar138.github.io/;https://tuanluu.github.io/;", "dblp": ";230/0379;81/8329.html;136/9366", "google_scholar": ";sFfy1q4AAAAJ;https://scholar.google.com.sg/citations?hl=en;Up0UYEYAAAAJ", "orcid": ";0000-0001-5559-6428;;", "linkedin": "leon-gurtler-6b3847165/;m-ganesh-kumar-28682792/;;cheston-tan/", "or_profile": "~Leon_Guertler1;~M_Ganesh_Kumar1;~Anh_Tuan_Luu2;~Cheston_Tan1", "aff": "Nanyang Technological University;Harvard University;Nanyang Technological University;Singapore University of Technology and Design", "aff_domain": "ntu.edu.sg;harvard.edu;ntu.edu.sg;sutd.edu.sg", "position": "Undergrad student;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nguertler2024tellme,\ntitle={Te{LLM}e what you see: Using {LLM}s to Explain Neurons in Vision Models},\nauthor={Leon Guertler and M Ganesh Kumar and Anh Tuan Luu and Cheston Tan},\nyear={2024},\nurl={https://openreview.net/forum?id=01ep65umEr}\n}", "github": "", "project": "", "reviewers": "1NcR;KpfM;xVYG;NqpP", "site": "https://openreview.net/forum?id=01ep65umEr", "pdf_size": 4504693, "rating": "5;5;5;6", "confidence": "3;4;3;5", "soundness": "2;3;2;2", "contribution": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "96;37;46;90", "wc_strengths": "104;13;59;51", "wc_weaknesses": "196;184;86;250", "wc_questions": "1;4;98;6", "wc_review": "397;238;289;397", "wc_reply_reviewers": "0;0;0;86", "wc_reply_authors": "499;672;771;668", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.25, 26.03243169586737 ], "wc_strengths_avg": [ 56.75, 32.34482184214345 ], "wc_weaknesses_avg": [ 179.0, 59.16924876994806 ], "wc_questions_avg": [ 27.25, 40.88627520330019 ], "wc_review_avg": [ 330.25, 69.14251586397475 ], "wc_reply_reviewers_avg": [ 21.5, 37.239092362730865 ], "wc_reply_authors_avg": [ 652.5, 97.75607398008576 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0-7pBnGKwxQJ:scholar.google.com/&scioq=TeLLMe+what+you+see:+Using+LLMs+to+Explain+Neurons+in+Vision+Models&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Nanyang Technological University;Harvard University;Singapore University of Technology and Design", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.harvard.edu;https://www.sutd.edu.sg", "aff_unique_abbr": "NTU;Harvard;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Singapore;United States" }, { "id": "02Ug9N8DCI", "title": "GateLoop: Fully Data-Controlled Linear Recurrence for Sequence Modeling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Linear Recurrence has proven to be a powerful tool for modeling long sequences efficiently. In this work, we show that existing models fail to take full advantage of its potential. Motivated by this finding, we develop GateLoop, a foundational sequence model that generalizes linear recurrent models such as S4, S5, LRU and RetNet, by employing data-controlled state transitions. \nUtilizing this theoretical advance, GateLoop empirically outperforms existing models for auto-regressive language modeling. Our method comes with a low-cost $O(l)$ recurrent mode and an efficient $O(l \\log_{2} l)$ parallel mode making use of highly optimized associative scan implementations. Furthermore, we derive an $O(l^2)$ surrogate-attention mode, revealing remarkable implications for Transformer and recently proposed architectures. \nSpecifically, we prove that our approach can be interpreted as providing data-controlled relative-positional information to Attention. \nWhile many existing models solely rely on data-controlled cumulative sums for context aggregation, our findings suggest that incorporating data-controlled complex cumulative products may be a crucial step towards more powerful sequence models.", "keywords": "Data-controlled;Linear Recurrence;Sequence Modeling;GateLoop;Linear;RNN;State Space Model;SSM;S4;S5;LRU;RetNet;generalization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Tobias Katsch", "authorids": "~Tobias_Katsch1", "gender": "M", "homepage": "https://github.com/tobiaskatsch", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Tobias_Katsch1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\nkatsch2024gateloop,\ntitle={GateLoop: Fully Data-Controlled Linear Recurrence for Sequence Modeling},\nauthor={Tobias Katsch},\nyear={2024},\nurl={https://openreview.net/forum?id=02Ug9N8DCI}\n}", "github": "", "project": "", "reviewers": "kCvT;BbYv;3RS5;gLYa", "site": "https://openreview.net/forum?id=02Ug9N8DCI", "pdf_size": 7626103, "rating": "3;3;3;5", "confidence": "4;4;4;5", "soundness": "2;1;1;3", "contribution": "1;2;3;2", "presentation": "2;2;2;2", "wc_summary": "110;38;40;104", "wc_strengths": "72;15;38;94", "wc_weaknesses": "275;9;167;159", "wc_questions": "2;310;33;278", "wc_review": "459;372;278;635", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 73.0, 34.07345007480164 ], "wc_strengths_avg": [ 54.75, 30.408674749156695 ], "wc_weaknesses_avg": [ 152.5, 94.67180150393253 ], "wc_questions_avg": [ 155.75, 139.1444842600669 ], "wc_review_avg": [ 436.0, 131.51996046228115 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12199180100342829511&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3 }, { "title": "Hybrid LLM: Cost-Efficient and Quality-Aware Query Routing", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19625", "id": "02f3mUtqnM", "author_site": "Dujian Ding, Ankur Mallick, Chi Wang, Robert Sim, Subhabrata Mukherjee, Victor R\u00fchle, Laks Lakshmanan, Ahmed H Awadallah", "tldr": "", "abstract": "Large language models (LLMs) excel in most NLP tasks but also require expensive cloud servers for deployment due to their size, while smaller models that can be deployed on lower cost (e.g., edge) devices, tend to lag behind in terms of response quality. Therefore in this work we propose a hybrid inference approach which combines their respective strengths to save cost and maintain quality. Our approach uses a router that assigns queries to the small or large model based on the predicted query difficulty and the desired quality level. The desired quality level can be tuned dynamically at test time to seamlessly trade quality for cost as per the scenario requirements. In experiments our approach allows us to make up to 40% fewer calls to the large model, with no drop in response quality.", "keywords": "Large language models;Efficient ML;Query Routing", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Dujian Ding;Ankur Mallick;Chi Wang;Robert Sim;Subhabrata Mukherjee;Victor R\u00fchle;Laks V. S. Lakshmanan;Ahmed Hassan Awadallah", "authorids": "~Dujian_Ding1;~Ankur_Mallick1;~Chi_Wang3;~Robert_Sim1;~Subhabrata_Mukherjee2;~Victor_R\u00fchle1;~Laks_V._S._Lakshmanan1;~Ahmed_Hassan_Awadallah1", "gender": ";M;M;M;;;M;M", "homepage": ";https://ankurmallick.github.io/;http://chiwang.cc;;https://subhomukherjee.com/;https://www.cs.ubc.ca/~laks;https://www.microsoft.com/en-us/research/people/hassanam/publications/;https://www.microsoft.com/en-us/research/people/virueh/", "dblp": "244/8792;180/2636;09/404-1;47/1233;37/11030.html;l/LVSLakshmanan;147/9148;277/8100", "google_scholar": "https://scholar.google.ca/citations?user=1-FsZPQAAAAJ;6SYGK8cAAAAJ;https://scholar.google.com/citations?hl=en;uT8sPt8AAAAJ;T4iBN5cAAAAJ;https://scholar.google.ca/citations?user=_RCsaOsAAAAJ;sNGk-9MAAAAJ;", "orcid": ";;;;;0000-0002-9775-4241;;0000-0002-8957-7628", "linkedin": "dujian-ding-250123133/;ankurmallick/;chi-wang-autogen/;simra/;subho87;laksvslakshmanan/;ahmed-hassan-awadallah-a355a27/;victor-r%C3%BChle-bb752195", "or_profile": "~Dujian_Ding1;~Ankur_Mallick1;~Chi_Wang3;~Robert_Sim1;~Subhabrata_Mukherjee2;~Laks_V._S._Lakshmanan1;~Ahmed_Hassan_Awadallah1;~Victor_Ruehle1", "aff": "Computing Science, University of British Columbia;Microsoft;Microsoft Research;Microsoft;Hippocratic AI;University of British Columbia;Microsoft Research;Microsoft", "aff_domain": "cs.ubc.ca;microsoft.com;microsoft.com;microsoft.com;hippocraticai.com;ubc.ca;microsoft.com;microsoft.com", "position": "PhD student;Researcher;Principal Researcher;Principal Researcher;Chief Scientist;Professor;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nding2024hybrid,\ntitle={Hybrid {LLM}: Cost-Efficient and Quality-Aware Query Routing},\nauthor={Dujian Ding and Ankur Mallick and Chi Wang and Robert Sim and Subhabrata Mukherjee and Victor R{\\\"u}hle and Laks V. S. Lakshmanan and Ahmed Hassan Awadallah},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=02f3mUtqnM}\n}", "github": "", "project": "", "reviewers": "uyT6;ZnPu;VA7E;9pHQ", "pdf_size": 4420090, "rating": "3;6;6;8", "confidence": "4;2;2;5", "soundness": "2;2;3;4", "contribution": "2;2;2;3", "presentation": "2;3;3;4", "wc_summary": "85;94;101;59", "wc_strengths": "47;62;136;51", "wc_weaknesses": "189;73;64;39", "wc_questions": "4;34;30;64", "wc_review": "325;263;331;213", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1426;539;953;814", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 84.75, 15.911866640969563 ], "wc_strengths_avg": [ 74.0, 36.21463792446364 ], "wc_weaknesses_avg": [ 91.25, 57.79435526069999 ], "wc_questions_avg": [ 33.0, 21.283796653792763 ], "wc_review_avg": [ 283.0, 48.394214530251446 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 933.0, 321.2654665537521 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.13474201390907387, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14692033895568743922&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=02f3mUtqnM", "pdf": "https://openreview.net/pdf?id=02f3mUtqnM", "email": "cs.ubc.ca;microsoft.com;microsoft.com;microsoft.com;hippocraticai.com;ubc.ca;microsoft.com;microsoft.com", "author_num": 8, "aff_unique_index": "0;1;1;1;2;0;1;1", "aff_unique_norm": "University of British Columbia;Microsoft;Hippocratic AI", "aff_unique_dep": "Department of Computing Science;Microsoft Corporation;", "aff_unique_url": "https://www.ubc.ca;https://www.microsoft.com;https://www.hippocratic.ai", "aff_unique_abbr": "UBC;Microsoft;Hippocratic AI", "aff_campus_unique_index": "0", "aff_campus_unique": "Vancouver;", "aff_country_unique_index": "0;1;1;1;1;0;1;1", "aff_country_unique": "Canada;United States" }, { "id": "030cjlZm4a", "title": "Learning Predictive Checklists with Probabilistic Logic Programming", "track": "main", "status": "Reject", "tldr": "", "abstract": "Checklists have been widely recognized as effective tools for completing complex tasks in a systematic manner. Although originally intended for use in procedural tasks, their interpretability and ease of use have led to their adoption for predictive tasks as well, including in clinical settings. However, designing checklists can be challenging, often requiring expert knowledge and manual rule design based on available data. Recent work has attempted to address this issue by using machine learning to automatically generate predictive checklists from data, although these approaches have been limited to Boolean data. We propose a novel method for learning predictive checklists from diverse data modalities, such as images, time series, and text, by combining the power of dedicated deep learning architectures with the interpretability and conciseness of checklists. Our approach relies on probabilistic logic programming, a learning paradigm that enables matching the discrete nature of a checklist with continuous-valued data. We propose a regularization technique to tradeoff between the information captured in discrete concepts of continuous data and permit a tunable level of interpretability for the learned checklist concepts. We demonstrate that our method outperforms various explainable machine learning techniques on prediction tasks involving image sequences, clinical notes, and time series.", "keywords": "Predictive Checklists;Interpretability;Fairness;Probabilistic Logic Programming", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "/attachment/4a671c91b008d335242a0a7a33b2ee48de06144b.pdf", "author": "Yukti Makhija;Edward De Brouwer;Rahul G Krishnan", "authorids": "~Yukti_Makhija1;~Edward_De_Brouwer1;~Rahul_G_Krishnan1", "gender": ";M;M", "homepage": ";https://edwarddebrouwer.xyz;http://www.cs.toronto.edu/~rahulgk/index.html", "dblp": ";;172/0880", "google_scholar": ";-Pm4XtAAAAAJ;ilJgXHkAAAAJ", "orcid": ";;", "linkedin": ";edwarddebrouwer/;rahulgk/", "or_profile": "~Yukti_Makhija1;~Edward_De_Brouwer1;~Rahul_G_Krishnan1", "aff": ";Yale University;Department of Computer Science, University of Toronto", "aff_domain": ";yale.edu;cs.toronto.edu", "position": ";Postdoc;Assistant Professor", "bibtex": "@misc{\nmakhija2024learning,\ntitle={Learning Predictive Checklists with Probabilistic Logic Programming},\nauthor={Yukti Makhija and Edward De Brouwer and Rahul G Krishnan},\nyear={2024},\nurl={https://openreview.net/forum?id=030cjlZm4a}\n}", "github": "", "project": "", "reviewers": "Ucnh;eMqU;SxKf", "site": "https://openreview.net/forum?id=030cjlZm4a", "pdf_size": 1262744, "rating": "3;6;8", "confidence": "4;4;4", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "2;2;2", "wc_summary": "34;225;54", "wc_strengths": "51;88;37", "wc_weaknesses": "223;172;861", "wc_questions": "65;28;213", "wc_review": "373;513;1165", "wc_reply_reviewers": "0;40;93", "wc_reply_authors": "636;1034;2045", "reply_reviewers": "0;1;1", "reply_authors": "2;3;3", "rating_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 104.33333333333333, 85.71399470850072 ], "wc_strengths_avg": [ 58.666666666666664, 21.514852750806565 ], "wc_weaknesses_avg": [ 418.6666666666667, 313.4691195140103 ], "wc_questions_avg": [ 102.0, 79.92913528036361 ], "wc_review_avg": [ 683.6666666666666, 345.11962499335726 ], "wc_reply_reviewers_avg": [ 44.333333333333336, 38.09053542402481 ], "wc_reply_authors_avg": [ 1238.3333333333333, 593.0903997049878 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UK5bP4Z4SfgJ:scholar.google.com/&scioq=Learning+Predictive+Checklists+with+Probabilistic+Logic+Programming&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Yale University;University of Toronto", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.yale.edu;https://www.utoronto.ca", "aff_unique_abbr": "Yale;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Canada" }, { "id": "04ARmqba4z", "title": "Adaptive Multi-head Contrastive Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In contrastive learning, two views of an original image generated by different augmentations are considered as a positive pair whose similarity is required to be high. Moreover, two views of two different images are considered as a negative pair, and their similarity is encouraged to be low. Normally, a single similarity measure given by a single projection head is used to evaluate positive and negative sample pairs, respectively. However, due to the various augmentation strategies and varying intra-sample similarity, augmented views from the same image are often not similar. Moreover, due to inter-sample similarity, augmented views of two different images may be more similar than augmented views from the same image. As such, enforcing a high similarity for positive pairs and a low similarity for negative pairs may not always be achievable, and in the case of some pairs, forcing so may be detrimental to the performance. To address this issue, we propose to use multiple projection heads, each producing a separate set of features. Our loss function for pre-training emerges from a solution to the maximum likelihood estimation over head-wise posterior distributions of positive samples given observations. The loss contains the similarity measure over positive and negative pairs, each re-weighted by an individual adaptive temperature that is regularized to prevent ill solutions. Our adaptive multi-head contrastive learning (AMCL) can be applied to and experimentally improves several popular contrastive learning methods such as SimCLR, MoCo and Barlow Twins. Such improvement is consistent under various backbones and linear probing epoches and is more significant when multiple augmentation methods are used.", "keywords": "adaptive temperature;contrastive learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Lei Wang;Piotr Koniusz;Tom Gedeon;Liang Zheng", "authorids": "~Lei_Wang20;~Piotr_Koniusz1;~Tom_Gedeon1;~Liang_Zheng4", "gender": "M;;M;M", "homepage": "https://leiwangr.github.io/;https://www.koniusz.com;https://cs.anu.edu.au/people/Tom.Gedeon/;http://zheng-lab.cecs.anu.edu.au/", "dblp": "181/2817-108;25/8616;g/TamasDGedeon.html;61/7360-1", "google_scholar": "VWCZLXgAAAAJ;https://scholar.google.co.uk/citations?user=wZ7-1tUAAAAJ;https://scholar.google.com.tw/citations?user=lPTjWIkAAAAJ;https://scholar.google.com.au/citations?user=vNHqr3oAAAAJ", "orcid": "0000-0002-8600-7099;0000-0002-6340-5289;0000-0001-8356-4909;", "linkedin": "lei-l-wang/;;tom-gedeon;liang-zheng-76341311a/", "or_profile": "~Lei_Wang20;~Piotr_Koniusz1;~Tom_Gedeon1;~Liang_Zheng4", "aff": "Australian National University;Data61, CSIRO;Curtin University of Technology;Australian National University", "aff_domain": "anu.edu.au;data61.csiro.au;curtin.edu.au;anu.edu.au", "position": "Postdoc;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@misc{\nwang2024adaptive,\ntitle={Adaptive Multi-head Contrastive Learning},\nauthor={Lei Wang and Piotr Koniusz and Tom Gedeon and Liang Zheng},\nyear={2024},\nurl={https://openreview.net/forum?id=04ARmqba4z}\n}", "github": "", "project": "", "reviewers": "HgQy;pd1e;YrZy", "site": "https://openreview.net/forum?id=04ARmqba4z", "pdf_size": 2912051, "rating": "3;3;5", "confidence": "4;4;4", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "2;1;3", "wc_summary": "16;68;94", "wc_strengths": "25;44;31", "wc_weaknesses": "154;249;159", "wc_questions": "12;108;5", "wc_review": "207;469;289", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 59.333333333333336, 32.42769735204082 ], "wc_strengths_avg": [ 33.333333333333336, 7.93025150224688 ], "wc_weaknesses_avg": [ 187.33333333333334, 43.65266951236265 ], "wc_questions_avg": [ 41.666666666666664, 46.99172503986444 ], "wc_review_avg": [ 321.6666666666667, 109.42678932611621 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11122893215011527354&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Australian National University;CSIRO;Curtin University", "aff_unique_dep": ";Data61;", "aff_unique_url": "https://www.anu.edu.au;https://www.csiro.au;https://www.curtin.edu.au", "aff_unique_abbr": "ANU;CSIRO;Curtin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "04UvXg4CvW", "title": "EPIC: Compressing Deep GNNs via Expressive Power Gap-Induced Knowledge Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "The teacher-student paradigm-based knowledge distillation (KD) has recently emerged as a promising technique for compressing graph neural networks (GNNs). Despite the great success in compressing moderate-sized GNNs, distilling deep GNNs (e.g., with over 100 layers) remains a tough challenge. A widely recognized reason is the *teacher-student expressive power gap*, i.e., the embeddings of a deep teacher may be extremely hard for a shallow student to approximate. Besides, the theoretical analysis and measurement of this gap are currently missing, resulting in a difficult trade-off between the needs of being \"lightweight'' and being \"expressive'' when selecting a student for the deep teacher. To bridge the theoretical gap and address the challenge of distilling deep GNNs, we propose the *first* GNN KD framework that quantitatively analyzes the teacher-student expressive power gap, namely **E**xpressive **P**ower gap-**I**ndu**C**ed knowledge distillation (**EPIC**). Our key idea is to formulate the estimation of the expressive power gap as an embedding regression problem based on the theory of polynomial approximation. Then, we show that the minimum approximation error has an upper bound, which decreases rapidly with respect to the number of student layers. Furthermore, we empirically demonstrate that the upper bound exponentially converges to zero as the number of student layers increases. Moreover, we propose to select an appropriate value for the number of student layers based on the upper bound, and propose an expressive power gap-induced loss term to further encourage the student to generate embeddings similar to those of the teacher. Experiments on large-scale benchmarks demonstrate that EPIC can effectively reduce the numbers of layers of deep GNNs, while achieving comparable or superior performance. Specifically, for the 1,001-layer RevGNN-Deep, we reduce the number of layers by 94\\% and accelerate inference by roughly eight times, while achieving comparable performance in terms of ROC-AUC on the large-scale benchmark ogbn-proteins.", "keywords": "deep graph neural networks;knowledge distillation;expressive power gap", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/75d2e1ed2ecc8c5b690aa40be7552716025b268a.zip", "author": "Xize Liang;Jie Wang;Zhihao Shi;Hanzhu Chen;Bin Li;Feng Wu", "authorids": "~Xize_Liang1;~Jie_Wang1;~Zhihao_Shi3;~Hanzhu_Chen1;~Bin_Li8;~Feng_Wu1", "gender": ";M;M;;M;M", "homepage": ";http://staff.ustc.edu.cn/~jwangx;https://miralab.ai/people/zhihao-shi/;;http://staff.ustc.edu.cn/~binli;", "dblp": ";29/5259-5;;;89/6764-25;25/3972-1", "google_scholar": ";OugG4dUAAAAJ;https://scholar.google.com.hk/citations?user=u2Ffj60AAAAJ;;;5bInRDEAAAAJ", "orcid": ";;;;0000-0002-2332-3959;", "linkedin": ";;;;;", "or_profile": "~Xize_Liang1;~Jie_Wang1;~Zhihao_Shi3;~Hanzhu_Chen1;~Bin_Li8;~Feng_Wu1", "aff": ";University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": ";ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn", "position": ";Full Professor;PhD student;;Full Professor;Full Professor", "bibtex": "@misc{\nliang2024epic,\ntitle={{EPIC}: Compressing Deep {GNN}s via Expressive Power Gap-Induced Knowledge Distillation},\nauthor={Xize Liang and Jie Wang and Zhihao Shi and Hanzhu Chen and Bin Li and Feng Wu},\nyear={2024},\nurl={https://openreview.net/forum?id=04UvXg4CvW}\n}", "github": "", "project": "", "reviewers": "G7rf;8ZML;ZSeG", "site": "https://openreview.net/forum?id=04UvXg4CvW", "pdf_size": 0, "rating": "3;5;5", "confidence": "4;4;4", "soundness": "2;3;3", "contribution": "2;2;2", "presentation": "2;3;2", "wc_summary": "49;54;116", "wc_strengths": "20;38;79", "wc_weaknesses": "86;171;433", "wc_questions": "4;19;77", "wc_review": "159;282;705", "wc_reply_reviewers": "0;0;134", "wc_reply_authors": "460;1188;1459", "reply_reviewers": "0;0;1", "reply_authors": "2;3;2", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 73.0, 30.474032661705056 ], "wc_strengths_avg": [ 45.666666666666664, 24.689178916188272 ], "wc_weaknesses_avg": [ 230.0, 147.67757672262456 ], "wc_questions_avg": [ 33.333333333333336, 31.47838764754143 ], "wc_review_avg": [ 382.0, 233.8503795164763 ], "wc_reply_reviewers_avg": [ 44.666666666666664, 63.168205785998246 ], "wc_reply_authors_avg": [ 1035.6666666666667, 421.8248715073854 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FTkpozQc7hsJ:scholar.google.com/&scioq=EPIC:+Compressing+Deep+GNNs+via+Expressive+Power+Gap-Induced+Knowledge+Distillation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "05gc31KWpz", "title": "DISPEL: Domain Generalization via Domain-Specific Liberating", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Domain generalization aims to learn a generalization model that can perform well on unseen test domains by only training on limited source domains. However, existing domain generalization approaches often bring in prediction-irrelevant noise or require the collection of domain labels. To address these challenges, we consider the domain generalization problem from a different perspective by categorizing the underlying feature groups into domain-shared and domain-specific features. Nevertheless, domain-specific features are difficult to be identified and distinguished from the input data. In this work, we propose $\\underline{\\mathrm{D}}$oma$\\underline{\\mathrm{I}}$n-$\\underline{\\mathrm{SPE}}$cific $\\underline{\\mathrm{L}}$iberating (DISPEL), a post-processing fine-grained masking approach that can filter out undefined and indistinguishable domain-specific features in the embedding space. Specifically, DISPEL utilizes a mask generator that produces a unique mask for each input data to filter domain-specific features. The DISPEL framework is highly flexible to apply to fine-tuned models. We derive a generalization error bound to guarantee the generalization performance by optimizing a designed objective loss. The experimental results on five benchmarks demonstrate that DISPEL outperforms existing methods and can further generalize various algorithms.", "keywords": "Domain Generalization;Transfer Learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/2a61e76f56ef1f9c15a426084797e74fe0060d7e.zip", "author": "Chia-Yuan Chang;Yu-Neng Chuang;Guanchu Wang;Samson Zhou;Vladimir Braverman;Mengnan Du;Na Zou", "authorids": "~Chia-Yuan_Chang3;~Yu-Neng_Chuang1;~Guanchu_Wang1;~Samson_Zhou1;~Vladimir_Braverman1;~Mengnan_Du1;~Na_Zou2", "gender": "Not Specified;M;M;;Unspecified;;F", "homepage": "https://z76316.github.io/;;https://guanchuwang.github.io/home;https://samsonzhou.github.io/;http://www.cs.jhu.edu/~vova/;https://mengnandu.com/;https://nzou1.github.io/", "dblp": "03/1382-2.html;207/7875;213/0985;179/2683;14/4758;183/5606;152/0090-1.html", "google_scholar": "EO595aMAAAAJ;;_QL5218AAAAJ;NpjsgocAAAAJ;https://scholar.google.com.tw/citations?user=DTthB48AAAAJ;0i-Js2gAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0001-1889-612X;;;;;;0000-0003-1984-795X", "linkedin": "chia-yuan-chang/;ync/;;;;;na-zou-a1721535/", "or_profile": "~Chia-Yuan_Chang3;~Yu-Neng_Chuang1;~Guanchu_Wang1;~Samson_Zhou1;~Vladimir_Braverman1;~Mengnan_Du1;~Na_Zou2", "aff": "Texas A&M University - College Station;Rice University;Rice University;Texas A&M University - College Station;Department of Computer Science, Whiting School of Engineering;New Jersey Institute of Technology;University of Houston", "aff_domain": "tamu.edu;rice.edu;rice.edu;tamu.edu;cs.jhu.edu;njit.edu;uh.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nchang2024dispel,\ntitle={{DISPEL}: Domain Generalization via Domain-Specific Liberating},\nauthor={Chia-Yuan Chang and Yu-Neng Chuang and Guanchu Wang and Samson Zhou and Vladimir Braverman and Mengnan Du and Na Zou},\nyear={2024},\nurl={https://openreview.net/forum?id=05gc31KWpz}\n}", "github": "", "project": "", "reviewers": "Mwon;StPi;u3X2;zEDH", "site": "https://openreview.net/forum?id=05gc31KWpz", "pdf_size": 3150064, "rating": "3;3;5;6", "confidence": "5;4;3;4", "soundness": "2;3;2;3", "contribution": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "75;51;67;172", "wc_strengths": "40;29;68;75", "wc_weaknesses": "59;43;53;236", "wc_questions": "227;270;242;46", "wc_review": "401;393;430;529", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.25, 47.415055625824166 ], "wc_strengths_avg": [ 53.0, 19.06567596493762 ], "wc_weaknesses_avg": [ 97.75, 80.02304355621573 ], "wc_questions_avg": [ 196.25, 88.10895243957903 ], "wc_review_avg": [ 438.25, 54.172756067972024 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12868005047063878227&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;0;2;3;4", "aff_unique_norm": "Texas A&M University;Rice University;Johns Hopkins University;New Jersey Institute of Technology;University of Houston", "aff_unique_dep": ";;Department of Computer Science;;", "aff_unique_url": "https://www.tamu.edu;https://www.rice.edu;https://www.jhu.edu;https://www.njit.edu;https://www.uh.edu", "aff_unique_abbr": "TAMU;Rice;JHU;NJIT;UH", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "College Station;;Baltimore", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dropout Enhanced Bilevel Training", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19624", "id": "06lrITXVAx", "author_site": "Peiran Yu, Junyi Li, Heng Huang", "tldr": "", "abstract": "Bilevel optimization problems appear in many widely used machine learning tasks. Bilevel optimization models are sensitive to small changes, and bilevel training tasks typically involve limited datasets. Therefore, overfitting is a common challenge in bilevel training tasks. This paper considers the use of dropout to address this problem. We propose a bilevel optimization model that depends on the distribution of dropout masks. We investigate how the dropout rate affects the hypergradient of this model. We propose a dropout bilevel method to solve the dropout bilevel optimization model. Subsequently, we analyze the resulting dropout bilevel method from an optimization perspective. Analyzing the optimization properties of methods with dropout is essential because it provides convergence guarantees for methods using dropout. However, there has been limited investigation in this research direction. We provide the complexity of the resulting dropout bilevel method in terms of reaching an $\\epsilon$ stationary point of the proposed stochastic bilevel model. Empirically, we demonstrate that overfitting occurs in data cleaning problems, and the method proposed in this work mitigates this issue.", "keywords": "Bilevel Optimization;Overfitting", "primary_area": "optimization", "supplementary_material": "/attachment/3f2da4776df25e60924b0709b573e4fe323b121d.pdf", "author": "Peiran Yu;Junyi Li;Heng Huang", "authorids": "~Peiran_Yu1;~Junyi_Li1;~Heng_Huang1", "gender": "F;M;M", "homepage": "https://sites.google.com/view/yupeiran/;;https://www.cs.umd.edu/~heng/", "dblp": "240/3145;;03/281", "google_scholar": "SXJ4R24AAAAJ;MzvZSs0AAAAJ;4OqLaDwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Peiran_Yu1;~Junyi_Li1;~Heng_Huang1", "aff": "University of Maryland;University of Maryland, College Park;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;cs.umd.edu", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nyu2024dropout,\ntitle={Dropout Enhanced Bilevel Training},\nauthor={Peiran Yu and Junyi Li and Heng Huang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=06lrITXVAx}\n}", "github": "", "project": "", "reviewers": "mkUf;Kt41;fs9w;hA9j", "pdf_size": 837802, "rating": "6;6;8;8", "confidence": "4;4;3;3", "soundness": "3;3;4;3", "contribution": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "50;39;89;80", "wc_strengths": "21;38;79;59", "wc_weaknesses": "158;110;28;108", "wc_questions": "23;80;21;2", "wc_review": "252;267;217;249", "wc_reply_reviewers": "17;153;0;0", "wc_reply_authors": "177;758;20;175", "reply_reviewers": "1;2;0;0", "reply_authors": "2;3;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 64.5, 20.62159062730128 ], "wc_strengths_avg": [ 49.25, 21.821720830401986 ], "wc_weaknesses_avg": [ 101.0, 46.65833258915282 ], "wc_questions_avg": [ 31.5, 29.176188921790317 ], "wc_review_avg": [ 246.25, 18.21228980661136 ], "wc_reply_reviewers_avg": [ 42.5, 64.17359269980138 ], "wc_reply_authors_avg": [ 282.5, 281.82130863368013 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12300902621179201195&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=06lrITXVAx", "pdf": "https://openreview.net/pdf?id=06lrITXVAx", "email": "umd.edu;umd.edu;cs.umd.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Maryland;University of Maryland, College Park", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu", "aff_unique_abbr": "UMD;UMD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "06mzMua9Rw", "title": "A Trust Region Approach for Few-Shot Sim-to-Real Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Simulation-to-Reality Reinforcement Learning (Sim-to-Real RL) seeks to use simulations to minimize the need for extensive real-world interactions. Specifically, in the few-shot off-dynamics setting, the goal is to acquire a simulator-based policy despite a dynamics mismatch that can be effectively transferred to the real-world using only a handful of real-world transitions. In this context, conventional RL agents tend to exploit simulation inaccuracies resulting in policies that excel in the simulator but underperform in the real environment. To address this challenge, we introduce a novel approach that incorporates a penalty to constrain the trajectories induced by the simulator-trained policy inspired by recent advances in Imitation Learning and Trust Region based RL algorithms. We evaluate our method across various environments representing diverse Sim-to-Real conditions, where access to the real environment is extremely limited. These experiments include high-dimensional systems relevant to real-world applications. Across most tested scenarios, our proposed method demonstrates performance improvements compared to existing baselines.", "keywords": "Reinforcement Learning;Simulation-to-Reality;Off-Dynamics", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/9c8b8be7e3ad45c2c20a21fa323c539b4a8d5f3f.pdf", "author": "Paul Daoudi;Bogdan Robu;CHRISTOPHE PRIEUR;Ludovic Dos Santos;Merwan Barlier", "authorids": "~Paul_Daoudi2;~Bogdan_Robu1;~CHRISTOPHE_PRIEUR1;~Ludovic_Dos_Santos1;~Merwan_Barlier1", "gender": "M;M;;;M", "homepage": ";http://www.gipsa-lab.fr/~bogdan.robu/;http://www.gipsa-lab.grenoble-inp.fr/~christophe.prieur/index.html;;https://scholar.google.com/citations?user=TNPp0cwAAAAJ", "dblp": "347/7716;74/8134;62/3525;185/0155;167/4759", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;TNPp0cwAAAAJ", "orcid": "0009-0004-2784-952X;0000-0001-7568-007X;0000-0002-4456-2019;;", "linkedin": "paul-daoudi-83101a126/;;;;", "or_profile": "~Paul_Daoudi2;~Bogdan_Robu1;~CHRISTOPHE_PRIEUR1;~Merwan_Barlier1;~Ludovic_DOS_SANTOS3", "aff": "Huawei Technologies Ltd.;;CNRS;Huawei Technologies Ltd.;Criteo AI Lab", "aff_domain": "huawei.com;;cnrs.fr;huawei.com;criteo.com", "position": "PhD student;;Full Professor;Researcher;Researcher", "bibtex": "@misc{\nanonymous2024a,\ntitle={A Trust Region Approach for Few-Shot Sim-to-Real Reinforcement Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=06mzMua9Rw}\n}", "github": "", "project": "", "reviewers": "mwPX;u2wg;iBdd;agZp", "site": "https://openreview.net/forum?id=06mzMua9Rw", "pdf_size": 1354416, "rating": "3;3;5;5", "confidence": "4;3;4;3", "soundness": "2;2;2;3", "contribution": "2;1;2;2", "presentation": "3;2;2;3", "wc_summary": "40;101;121;107", "wc_strengths": "60;30;32;61", "wc_weaknesses": "1028;164;49;220", "wc_questions": "2;33;170;7", "wc_review": "1130;328;372;395", "wc_reply_reviewers": "400;189;56;384", "wc_reply_authors": "983;579;559;760", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 92.25, 31.027205803939225 ], "wc_strengths_avg": [ 45.75, 14.771171246722448 ], "wc_weaknesses_avg": [ 365.25, 387.5728157391847 ], "wc_questions_avg": [ 53.0, 68.56748500564973 ], "wc_review_avg": [ 556.25, 332.1282696489415 ], "wc_reply_reviewers_avg": [ 257.25, 142.83097528197447 ], "wc_reply_authors_avg": [ 720.25, 170.71229451917046 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7674579431628756951&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Huawei;Centre National de la Recherche Scientifique;Criteo", "aff_unique_dep": "Huawei Technologies;;Criteo AI Lab", "aff_unique_url": "https://www.huawei.com;https://www.cnrs.fr;https://www.criteo.com", "aff_unique_abbr": "Huawei;CNRS;Criteo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;France" }, { "id": "070DFUdNh7", "title": "GraphGPT: Graph Learning with Generative Pre-trained Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce GraphGPT, a novel model for Graph learning by self-supervised Generative Pre-training Transformers. Our model transforms each graph or sampled subgraph into a sequence of tokens representing the node, edge and attributes reversibly using the Eulerian path first. Then we feed the tokens into a standard transformer decoder and pre-train it with the next-token-prediction (NTP) task. Lastly, we fine-tune the GraphGPT model with the supervised tasks. This intuitive, yet effective model achieves superior or close results to the state-of-the-art methods for the graph-, edge- and node-level tasks on the large scale molecular dataset PCQM4Mv2, the protein-protein association dataset ogbl-ppa and the ogbn-proteins dataset from the Open Graph Benchmark (OGB). Furthermore, the generative pre-training enables us to train GraphGPT up to 400M+ parameters with consistently increasing performance, which is beyond the capability of GNNs and previous graph transformers. The source code and pre-trained checkpoints will be released soon to pave the way for the graph foundation model research, and also to assist the scientific discovery in pharmaceutical, chemistry, material and bio-informatics domains, etc.", "keywords": "Graph;GPT;Generative;Pre-train;Fine-tune;Transformer;GraphGPT", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Qifang Zhao;Weidong Ren;Tianyu Li;Xiaoxiao Xu;Hong Liu", "authorids": "~Qifang_Zhao1;~Weidong_Ren2;~Tianyu_Li2;~Xiaoxiao_Xu1;~Hong_Liu10", "gender": ";M;M;M;M", "homepage": ";;;https://orcid.org/0000-0003-0189-8601;https://c.liepin.com/resume/getdefaultresume/", "dblp": ";https://dblp.org/rec/conf/ijcnn/RenZLH21.html;;;", "google_scholar": ";;bsCWGaEAAAAJ;;", "orcid": ";;;0000-0003-0189-8601;", "linkedin": "zhaoqf123/;;;;", "or_profile": "~Qifang_Zhao1;~Weidong_Ren2;~Tianyu_Li2;~Xiaoxiao_Xu1;~Hong_Liu10", "aff": "Alibaba Group;;Alibaba Group;;", "aff_domain": "alibaba-inc.com;;alibaba-inc.com;;", "position": "Researcher;;Applied Scientist;;", "bibtex": "@misc{\nzhao2024graphgpt,\ntitle={Graph{GPT}: Graph Learning with Generative Pre-trained Transformers},\nauthor={Qifang Zhao and Weidong Ren and Tianyu Li and Xiaoxiao Xu and Hong Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=070DFUdNh7}\n}", "github": "", "project": "", "reviewers": "2YEp;kAz1;VKoy;v4XD", "site": "https://openreview.net/forum?id=070DFUdNh7", "pdf_size": 1885052, "rating": "3;5;5;5", "confidence": "3;4;3;3", "soundness": "2;2;3;3", "contribution": "2;3;3;2", "presentation": "3;2;3;3", "wc_summary": "77;75;125;55", "wc_strengths": "51;43;49;48", "wc_weaknesses": "38;203;200;79", "wc_questions": "52;50;2;2", "wc_review": "218;371;376;184", "wc_reply_reviewers": "0;97;73;0", "wc_reply_authors": "975;789;541;415", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.0, 25.729360660537214 ], "wc_strengths_avg": [ 47.75, 2.947456530637899 ], "wc_weaknesses_avg": [ 130.0, 72.96231904209186 ], "wc_questions_avg": [ 26.5, 24.510201957552287 ], "wc_review_avg": [ 287.25, 87.10159298198857 ], "wc_reply_reviewers_avg": [ 42.5, 43.33878170876519 ], "wc_reply_authors_avg": [ 680.0, 217.05529249479267 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17618659319024588280&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "07xuZw59uB", "title": "Bridging the Fairness Divide: Achieving Group and Individual Fairness in Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph neural networks (GNNs) have emerged as a powerful tool for analyzing and learning from complex data structured as graphs, demonstrating remarkable effectiveness in various applications, such as social network analysis, recommendation systems, and drug discovery. However, despite their impressive performance, the fairness problem has increasingly gained attention as a crucial aspect to consider. Existing research on fairness in graph learning primarily emphasizes either group fairness or individual fairness; however, to the best of our knowledge, none of these studies comprehensively address both individual and group fairness simultaneously. In this paper, we propose a new concept of individual fairness within groups and a novel framework named Fairness for Group and Individual (FairGI), which considers both group fairness and individual fairness within groups in the context of graph learning. FairGI employs the similarity matrix of individuals to achieve individual fairness within groups, while leveraging adversarial learning to address group fairness in terms of both Equal Opportunity and Statistical Parity. The experimental results demonstrate that our approach not only outperforms other state-of-the-art models in terms of group fairness and individual fairness within groups, but also exhibits excellent performance in population-level individual fairness, while maintaining comparable prediction accuracy.", "keywords": "Graph Neural Networks;Fairness in Graph Learning;Individual Fairness;Group Fairness", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Duna Zhan;Dongliang Guo;Pengsheng Ji;Sheng Li", "authorids": "~Duna_Zhan1;~Dongliang_Guo1;~Pengsheng_Ji1;~Sheng_Li3", "gender": "F;M;;M", "homepage": "https://www.stat.uga.edu/directory/people/duna-zhan;https://donglgcn.github.io/;https://www.stat.uga.edu/directory/people/pengsheng-ji;http://sheng-li.org", "dblp": ";48/7696-2.html;153/2224;23/3439-1", "google_scholar": ";;Wwl-gO0AAAAJ;DEncVcYAAAAJ", "orcid": ";0000-0003-2856-4011;0000-0003-1439-5819;0000-0003-1205-8632", "linkedin": "dunazhan;;;sheng-li-15a70022/", "or_profile": "~Duna_Zhan1;~Dongliang_Guo1;~Pengsheng_Ji1;~Sheng_Li3", "aff": ";University of Virginia, Charlottesville;University of Georgia;University of Virginia, Charlottesville", "aff_domain": ";virginia.edu;uga.edu;virginia.edu", "position": ";PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\nzhan2024bridging,\ntitle={Bridging the Fairness Divide: Achieving Group and Individual Fairness in Graph Neural Networks},\nauthor={Duna Zhan and Dongliang Guo and Pengsheng Ji and Sheng Li},\nyear={2024},\nurl={https://openreview.net/forum?id=07xuZw59uB}\n}", "github": "", "project": "", "reviewers": "uySt;dYzA;n2zH;9gSh", "site": "https://openreview.net/forum?id=07xuZw59uB", "pdf_size": 487658, "rating": "1;3;3;5", "confidence": "4;4;4;4", "soundness": "1;2;2;2", "contribution": "2;1;1;3", "presentation": "2;3;3;2", "wc_summary": "106;51;150;85", "wc_strengths": "89;17;34;63", "wc_weaknesses": "489;94;147;321", "wc_questions": "265;4;2;73", "wc_review": "949;166;333;542", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 98.0, 35.86781286892191 ], "wc_strengths_avg": [ 50.75, 27.535204738661378 ], "wc_weaknesses_avg": [ 262.75, 155.28743510020377 ], "wc_questions_avg": [ 86.0, 107.2263960039691 ], "wc_review_avg": [ 497.5, 292.73921841803156 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15362928413863393123&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Virginia;University of Georgia", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.uga.edu", "aff_unique_abbr": "UVA;UGA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Charlottesville;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sheared LLaMA: Accelerating Language Model Pre-training via Structured Pruning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19623", "id": "09iOdaeOzp", "author_site": "Mengzhou Xia, Tianyu Gao, Zhiyuan Zeng, Danqi Chen", "tldr": "", "abstract": "The popularity of LLaMA (Touvron et al., 2023a;b) and other recently emerged moderate-sized large language models (LLMs) highlights the potential of building smaller yet powerful LLMs. Regardless, the cost of training such models from scratch on trillions of tokens remains high. In this work, we study structured pruning as an effective means to develop smaller LLMs from pre-trained, larger models. Our approach employs two key techniques: (1) targeted structured pruning, which prunes a larger model to a specified target shape by removing layers, heads, and intermediate and hidden dimensions in an end-to-end manner, and (2) dynamic batch loading, which dynamically updates the composition of sampled data in each training batch based on varying losses across different domains. We demonstrate the efficacy of our approach by presenting the Sheared-LLaMA series, pruning the LLaMA2-7B model down to 1.3B and 2.7B parameters. Sheared-LLaMA models outperform state-of-the-art open-source models of equivalent sizes, such as Pythia, INCITE, OpenLLaMA and the concurrent TinyLlama models, on a wide range of downstream and instruction tuning evaluations, while requiring only 3% of compute compared to training such models from scratch. This work provides compelling evidence that leveraging existing LLMs with structured pruning is a far more cost-effective approach for building competitive small-scale LLMs", "keywords": "pruning;efficiency;large language models;pre-training", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Mengzhou Xia;Tianyu Gao;Zhiyuan Zeng;Danqi Chen", "authorids": "~Mengzhou_Xia1;~Tianyu_Gao1;~Zhiyuan_Zeng3;~Danqi_Chen1", "gender": "F;M;M;F", "homepage": "https://xiamengzhou.github.io/;https://gaotianyu.xyz/about/;https://zhiyuan-zeng.github.io/;https://www.cs.princeton.edu/~danqic/", "dblp": "241/9329;207/8893-1.html;;87/7949", "google_scholar": "zyJn1IcAAAAJ;il-F8YYAAAAJ;qLJqCqsAAAAJ;sVR8ktkAAAAJ", "orcid": ";0000-0002-5178-0866;;", "linkedin": ";;;", "or_profile": "~Mengzhou_Xia1;~Tianyu_Gao1;~Zhiyuan_Zeng3;~Danqi_Chen1", "aff": "Princeton University;Princeton University;Tsinghua University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;tsinghua.edu.cn;cs.princeton.edu", "position": "PhD student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nxia2024sheared,\ntitle={Sheared {LL}a{MA}: Accelerating Language Model Pre-training via Structured Pruning},\nauthor={Mengzhou Xia and Tianyu Gao and Zhiyuan Zeng and Danqi Chen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=09iOdaeOzp}\n}", "github": "", "project": "", "reviewers": "bT2X;wgpN;urdF;qt4Y", "pdf_size": 739472, "rating": "5;5;6;8", "confidence": "4;4;4;4", "soundness": "3;3;2;4", "contribution": "3;2;2;4", "presentation": "2;3;3;3", "wc_summary": "78;57;138;102", "wc_strengths": "37;54;95;224", "wc_weaknesses": "349;337;386;30", "wc_questions": "5;13;72;398", "wc_review": "469;461;691;754", "wc_reply_reviewers": "0;0;0;15", "wc_reply_authors": "475;436;869;815", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.75, 30.102948360584218 ], "wc_strengths_avg": [ 102.5, 73.24786686313807 ], "wc_weaknesses_avg": [ 275.5, 142.88544362530425 ], "wc_questions_avg": [ 122.0, 161.43574573185455 ], "wc_review_avg": [ 593.75, 130.6931042557334 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 648.75, 194.67970490012564 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 270, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9713425200262995197&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=09iOdaeOzp", "pdf": "https://openreview.net/pdf?id=09iOdaeOzp", "email": "princeton.edu;princeton.edu;tsinghua.edu.cn;cs.princeton.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Princeton University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Princeton;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "AutoLoRa: An Automated Robust Fine-Tuning Framework", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19622", "id": "09xFexjhqE", "author_site": "Xilie Xu, Jingfeng Zhang, Mohan Kankanhalli", "tldr": "", "abstract": "Robust Fine-Tuning (RFT) is a low-cost strategy to obtain adversarial robustness in downstream applications, without requiring a lot of computational resources and collecting significant amounts of data. This paper uncovers an issue with the existing RFT, \nwhere optimizing both adversarial and natural objectives through the feature extractor (FE) yields significantly divergent gradient directions. This divergence introduces instability in the optimization process, thereby hindering the attainment of adversarial robustness and rendering RFT highly sensitive to hyperparameters. To mitigate this issue, we propose a low-rank (LoRa) branch that disentangles RFT into two distinct components: optimizing natural objectives via the LoRa branch and adversarial objectives via the FE. Besides, we introduce heuristic strategies for automating the scheduling of the learning rate and the scalars of loss terms. Extensive empirical evaluations demonstrate that our proposed automated RFT disentangled via the LoRa branch (AutoLoRa) achieves new state-of-the-art results across a range of downstream tasks. AutoLoRa holds significant practical utility, as it automatically converts a pre-trained FE into an adversarially robust model for downstream tasks without the need for searching hyperparameters. Our source code is available at [the GitHub](https://github.com/GodXuxilie/RobustSSL_Benchmark/tree/main/Finetuning_Methods/AutoLoRa).", "keywords": "robust fine-tuning;adversarial robustness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Xilie Xu;Jingfeng Zhang;Mohan Kankanhalli", "authorids": "~Xilie_Xu1;~Jingfeng_Zhang1;~Mohan_Kankanhalli1", "gender": "M;M;M", "homepage": "https://godxuxilie.github.io/;https://zjfheart.github.io;https://www.comp.nus.edu.sg/~mohan", "dblp": "259/2327;227/2664.html;09/3613.html", "google_scholar": "https://scholar.google.com/citations?hl=en;NS0P1FkAAAAJ;6Lx_eowAAAAJ", "orcid": ";0000-0003-3491-8074;0000-0002-4846-2015", "linkedin": ";;mohan-kankanhalli-583417221", "or_profile": "~Xilie_Xu1;~Jingfeng_Zhang1;~Mohan_Kankanhalli1", "aff": "National University of Singapore;University of Auckland;National University of Singapore", "aff_domain": "nus.edu.sg;auckland.ac.nz;nus.edu.sg", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024autolora,\ntitle={AutoLoRa: An Automated Robust Fine-Tuning Framework},\nauthor={Xilie Xu and Jingfeng Zhang and Mohan Kankanhalli},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=09xFexjhqE}\n}", "github": "", "project": "", "reviewers": "qytY;4DPH;pJnK;kvxv", "pdf_size": 552186, "rating": "5;6;6;8", "confidence": "4;4;3;4", "soundness": "2;2;3;3", "contribution": "2;3;2;2", "presentation": "3;3;3;3", "wc_summary": "83;228;114;104", "wc_strengths": "53;92;41;80", "wc_weaknesses": "116;288;191;48", "wc_questions": "5;164;47;3", "wc_review": "257;772;393;235", "wc_reply_reviewers": "0;128;346;81", "wc_reply_authors": "812;954;1010;298", "reply_reviewers": "0;2;2;1", "reply_authors": "3;3;4;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 132.25, 56.40201680791211 ], "wc_strengths_avg": [ 66.5, 20.402205763103165 ], "wc_weaknesses_avg": [ 160.75, 89.19466071464143 ], "wc_questions_avg": [ 54.75, 65.47661796397246 ], "wc_review_avg": [ 414.25, 215.2293834493794 ], "wc_reply_reviewers_avg": [ 138.75, 128.11591431200108 ], "wc_reply_authors_avg": [ 768.5, 281.0671627920985 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18117461636957153722&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=09xFexjhqE", "pdf": "https://openreview.net/pdf?id=09xFexjhqE", "email": "nus.edu.sg;auckland.ac.nz;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;University of Auckland", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.auckland.ac.nz", "aff_unique_abbr": "NUS;UoA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;New Zealand" }, { "id": "0A5o6dCKeK", "title": "NExT-GPT: Any-to-Any Multimodal LLM", "track": "main", "status": "Reject", "tldr": "", "abstract": "While recently Multimodal Large Language Models (MM-LLMs) have made exciting strides, they mostly fall prey to the limitation of only input-side multimodal understanding, without the ability to produce content in multiple modalities. As we humans always perceive the world and communicate with people through various modalities, developing any-to-any MM-LLMs capable of accepting and delivering content in any modality becomes essential to human-level AI. To fill the gap, we present an end-to-end general-purpose any-to-any MM-LLM system, NExT-GPT. We connect an LLM with multimodal adaptors and different diffusion decoders, enabling NExT-GPT to perceive inputs and generate outputs in arbitrary combinations of text, images, videos, and audio. By leveraging the existing well-trained highly-performing encoders and decoders, NExT-GPT is tuned with only a small amount of parameter (1%) of certain projection layers, which not only benefits low-cost training but also facilitates convenient expansion to more potential modalities. Moreover, we introduce a modality-switching instruction tuning (MosIT) and manually curate a high-quality dataset for MosIT, based on which NExT-GPT is empowered with complex cross-modal semantic understanding and content generation. Overall, our research showcases the promising possibility of building a unified AI agent capable of modeling universal modalities, paving the way for more human-like AI research in the community.", "keywords": "Large Language Model;Diffusion Model", "primary_area": "generative models", "supplementary_material": "/attachment/270675d02e3504834cbfd92ecf17614841e9db3f.zip", "author": "Shengqiong Wu;Hao Fei;Leigang Qu;Wei Ji;Tat-Seng Chua", "authorids": "~Shengqiong_Wu2;~Hao_Fei1;~Leigang_Qu1;~Wei_Ji1;~Tat-Seng_Chua2", "gender": "F;M;M;M;M", "homepage": "https://chocowu.github.io/;https://haofei.vip/;https://leigang-qu.github.io/;https://jiwei0523.github.io/;http://www.comp.nus.edu.sg/~chuats/", "dblp": "274/7191;81/3569-1;276/3150;52/3220-8;", "google_scholar": "RJJLKR0AAAAJ;YGDX46AAAAAJ;1W2Tio4AAAAJ;69OFB-AAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": "0000-0001-6192-1194;0000-0003-3026-6347;0009-0004-6555-3834;0000-0002-8106-9768;0000-0001-6097-7807", "linkedin": ";;;;", "or_profile": "~Shengqiong_Wu2;~Hao_Fei1;~Leigang_Qu1;~Wei_Ji1;~Tat-seng_Chua1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;Nanjing University;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;u.nus.edu;nju.edu.cn;nus.edu.sg", "position": "PhD student;Postdoc;PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\nwu2024nextgpt,\ntitle={{NE}xT-{GPT}: Any-to-Any Multimodal {LLM}},\nauthor={Shengqiong Wu and Hao Fei and Leigang Qu and Wei Ji and Tat-Seng Chua},\nyear={2024},\nurl={https://openreview.net/forum?id=0A5o6dCKeK}\n}", "github": "", "project": "", "reviewers": "NwS3;ijPL;7uQJ;mDkx", "site": "https://openreview.net/forum?id=0A5o6dCKeK", "pdf_size": 8145815, "rating": "5;5;6;8", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "contribution": "3;2;3;3", "presentation": "4;3;4;3", "wc_summary": "68;49;78;21", "wc_strengths": "39;35;64;22", "wc_weaknesses": "127;200;82;47", "wc_questions": "3;2;11;64", "wc_review": "237;286;235;154", "wc_reply_reviewers": "99;0;0;0", "wc_reply_authors": "1443;1773;771;397", "reply_reviewers": "1;0;0;0", "reply_authors": "4;4;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 54.0, 21.714050750608465 ], "wc_strengths_avg": [ 40.0, 15.215124054702938 ], "wc_weaknesses_avg": [ 114.0, 57.17954179599553 ], "wc_questions_avg": [ 20.0, 25.64176280991617 ], "wc_review_avg": [ 228.0, 47.3550419702063 ], "wc_reply_reviewers_avg": [ 24.75, 42.868257487329714 ], "wc_reply_authors_avg": [ 1096.0, 541.5173127426306 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 616, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=174615942206434624&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "National University of Singapore;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.nju.edu.cn", "aff_unique_abbr": "NUS;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Singapore;China" }, { "id": "0AYosSFETw", "title": "Towards human-like spoken dialogue generation between AI agents from written dialogue", "track": "main", "status": "Reject", "tldr": "", "abstract": "The advent of large language models (LLMs) has made it possible to generate natural written dialogues between two agents.\nHowever, generating human-like spoken dialogues from these written dialogues remains challenging.\nSpoken dialogues have several unique characteristics: they frequently include backchannels and laughter, and the smoothness of turn-taking significantly influences the fluidity of conversation.\nThis study proposes CHATS \u2015 CHatty Agents Text-to-Speech \u2015 a discrete token-based system designed to generate spoken dialogues based on written dialogues.\nOur system can generate speech for both the speaker side and the listener side simultaneously, using only the transcription from the speaker side, which eliminates the need for transcriptions of backchannels or laughter.\nMoreover, CHATS facilitates natural turn-taking; it determines the appropriate duration of silence after each utterance in the absence of overlap, and it initiates the generation of overlapping speech based on the phoneme sequence of the next utterance in case of overlap.\nExperimental evaluations indicate that CHATS outperforms the text-to-speech baseline, producing spoken dialogues that are more interactive and fluid while retaining clarity and intelligibility.", "keywords": "spoken dialogue modeling;text-to-speech synthesis;backchannel generation;turn-taking", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Kentaro Mitsui;Yukiya Hono;Kei Sawada", "authorids": "~Kentaro_Mitsui1;~Yukiya_Hono1;~Kei_Sawada1", "gender": "M;M;", "homepage": ";;", "dblp": "247/6422;;", "google_scholar": "https://scholar.google.co.jp/citations?user=RMIT5OQAAAAJ;SU5SLdUAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Kentaro_Mitsui1;~Yukiya_Hono1;~Kei_Sawada1", "aff": "rinna Co., Ltd.;Nagoya Institute of Technology;", "aff_domain": "rinna.co.jp;nitech.ac.jp;", "position": "Researcher;Postdoc;", "bibtex": "@misc{\nmitsui2024towards,\ntitle={Towards human-like spoken dialogue generation between {AI} agents from written dialogue},\nauthor={Kentaro Mitsui and Yukiya Hono and Kei Sawada},\nyear={2024},\nurl={https://openreview.net/forum?id=0AYosSFETw}\n}", "github": "", "project": "", "reviewers": "Kha2;FEtd;yc8m", "site": "https://openreview.net/forum?id=0AYosSFETw", "pdf_size": 1229911, "rating": "5;6;8", "confidence": "3;4;4", "soundness": "2;4;4", "contribution": "3;2;3", "presentation": "2;3;3", "wc_summary": "204;50;154", "wc_strengths": "52;57;129", "wc_weaknesses": "175;76;94", "wc_questions": "20;74;17", "wc_review": "451;257;394", "wc_reply_reviewers": "39;0;0", "wc_reply_authors": "1071;519;1443", "reply_reviewers": "1;0;0", "reply_authors": "3;1;3", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 136.0, 64.14566755959959 ], "wc_strengths_avg": [ 79.33333333333333, 35.178907822096406 ], "wc_weaknesses_avg": [ 115.0, 43.05810028322197 ], "wc_questions_avg": [ 37.0, 26.19160170741759 ], "wc_review_avg": [ 367.3333333333333, 81.41389452803632 ], "wc_reply_reviewers_avg": [ 13.0, 18.384776310850235 ], "wc_reply_authors_avg": [ 1011.0, 379.5997892517855 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9835558836471678525&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "rinna Co., Ltd.;Nagoya Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": ";https://www.nitech.ac.jp", "aff_unique_abbr": ";NIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "One-shot Empirical Privacy Estimation for Federated Learning", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19621", "id": "0BqyZSWfzo", "author_site": "Galen Andrew, Peter Kairouz, Sewoong Oh, Alina Oprea, H. Brendan McMahan, Vinith Suriyakumar", "tldr": "", "abstract": "Privacy estimation techniques for differentially private (DP) algorithms are useful for comparing against analytical bounds, or to empirically measure privacy loss in settings where known analytical bounds are not tight. However, existing privacy auditing techniques usually make strong assumptions on the adversary (e.g., knowledge of intermediate model iterates or the training data distribution), are tailored to specific tasks, model architectures, or DP algorithm, and/or require retraining the model many times (typically on the order of thousands). These shortcomings make deploying such techniques at scale difficult in practice, especially in federated settings where model training can take days or weeks. In this work, we present a novel \u201cone-shot\u201d approach that can systematically address these challenges, allowing efficient auditing or estimation of the privacy loss of a model during the same, single training run used to fit model parameters, and without requiring any a priori knowledge about the model architecture, task, or DP algorithm. We show that our method provides provably correct estimates for the privacy loss under the Gaussian mechanism, and we demonstrate its performance on a well-established FL benchmark dataset under several adversarial threat models.", "keywords": "differential privacy;federated learning;empirical privacy", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Galen Andrew;Peter Kairouz;Sewoong Oh;Alina Oprea;Hugh Brendan McMahan;Vinith Menon Suriyakumar", "authorids": "~Galen_Andrew1;~Peter_Kairouz1;~Sewoong_Oh3;~Alina_Oprea1;~Hugh_Brendan_McMahan1;~Vinith_Menon_Suriyakumar1", "gender": "M;M;;F;M;M", "homepage": ";https://kairouzp.github.io/;;http://www.ccs.neu.edu/home/alina/;;", "dblp": "31/1971;129/1254;;35/3425;;", "google_scholar": ";m8NUgw0AAAAJ;;https://scholar.google.com.tw/citations?user=16J3izoAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-4979-5292;;", "linkedin": ";kayrouzp;;alina-oprea-9588bb1;;vsuriyakumar", "or_profile": "~Galen_Andrew1;~Peter_Kairouz1;~Sewoong_Oh3;~Alina_Oprea1;~Hugh_Brendan_McMahan1;~Vinith_Menon_Suriyakumar1", "aff": "Google;Google;;Northeastern University;Google;Massachusetts Institute of Technology", "aff_domain": "google.com;google.com;;northeastern.edu;google.com;mit.edu", "position": "Researcher;Research Scientist;;Associate Professor;Research Scientist;PhD student", "bibtex": "@inproceedings{\nandrew2024oneshot,\ntitle={One-shot Empirical Privacy Estimation for Federated Learning},\nauthor={Galen Andrew and Peter Kairouz and Sewoong Oh and Alina Oprea and Hugh Brendan McMahan and Vinith Menon Suriyakumar},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0BqyZSWfzo}\n}", "github": "", "project": "", "reviewers": "ZyjW;zCeQ;LDHu", "pdf_size": 599734, "rating": "8;8;8", "confidence": "4;3;3", "soundness": "4;3;4", "contribution": "3;2;3", "presentation": "3;3;4", "wc_summary": "144;69;66", "wc_strengths": "74;56;116", "wc_weaknesses": "655;142;169", "wc_questions": "112;152;39", "wc_review": "985;419;390", "wc_reply_reviewers": "775;108;24", "wc_reply_authors": "2205;1400;906", "reply_reviewers": "4;1;1", "reply_authors": "5;4;2", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 93.0, 36.08323710533743 ], "wc_strengths_avg": [ 82.0, 25.13961017995307 ], "wc_weaknesses_avg": [ 322.0, 235.7244153667583 ], "wc_questions_avg": [ 101.0, 46.783187863447985 ], "wc_review_avg": [ 598.0, 273.9063100161562 ], "wc_reply_reviewers_avg": [ 302.3333333333333, 335.980488851494 ], "wc_reply_authors_avg": [ 1503.6666666666667, 535.3567865348201 ], "reply_reviewers_avg": [ 2.0, 1.4142135623730951 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2844245160919175035&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=0BqyZSWfzo", "pdf": "https://openreview.net/pdf?id=0BqyZSWfzo", "email": "google.com;google.com;;northeastern.edu;google.com;mit.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Google;Northeastern University;Massachusetts Institute of Technology", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.northeastern.edu;https://web.mit.edu", "aff_unique_abbr": "Google;NEU;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "0Ce3c9l7G1", "title": "Learning Multi-Agent Communication using Regularized Attention Messages", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning how to communicate in Multi-Agent Reinforcement Learning (MARL) can be key to solve complex cooperative tasks. Recent approaches have shown the advantages of using an efficient communication architecture, tackling problems such as what, when, or whom to communicate. However, these methods still fail to solve some complex scenarios, and some of them do not evaluate the implications of having limited communication channels. In this paper, we propose Attentive Regularized Communication (ARCOMM), a new method for communication in MARL. The proposed method uses an attention module to evaluate the weight of the messages generated by the agents, together with a message regularizer that facilitates learning more meaningful messages, improving the performance of the team. We further analyse how ARCOMM reacts to situations where the messages must be compressed before being sent to other agents. Our results show that the proposed method helps, through the power of communication, to improve the performances of the agents in complex domains when compared to other methods. Furthermore, we show that, although there is a decrease of performance, agents are still capable of learning even with lossy communication. The messages learned by the agents also support the motivations for our method.", "keywords": "Multi-Agent Reinforcement Learning;Communication;Attention;Message Compression", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Rafael Moreira Pina;Varuna De SIlva De SIlva;Corentin Artaud", "authorids": "~Rafael_Moreira_Pina1;~Varuna_De_SIlva_De_SIlva1;~Corentin_Artaud1", "gender": "M;Non-Binary;", "homepage": ";https://www.lborolondon.ac.uk/about/staff/dr-varuna-de-silva/;https://corentinartaud.github.io", "dblp": "310/3161;;333/5155", "google_scholar": ";;https://scholar.google.co.uk/citations?user=702BRhkAAAAJ", "orcid": "0000-0003-1304-3539;;0009-0002-0387-235X", "linkedin": "rafael-pina-b26913194/;;corentinartaud/", "or_profile": "~Rafael_Moreira_Pina1;~Varuna_De_SIlva_De_SIlva1;~Corentin_Artaud1", "aff": "Loughborough University London;Loughborough University;Loughborough University London", "aff_domain": "lboro.ac.uk;lboro.ac.uk;lboro.ac.uk", "position": "PhD student;Associate Professor;PhD student", "bibtex": "@misc{\npina2024learning,\ntitle={Learning Multi-Agent Communication using Regularized Attention Messages},\nauthor={Rafael Moreira Pina and Varuna De SIlva De SIlva and Corentin Artaud},\nyear={2024},\nurl={https://openreview.net/forum?id=0Ce3c9l7G1}\n}", "github": "", "project": "", "reviewers": "cGtd;j1DY;eV4L;FWnu", "site": "https://openreview.net/forum?id=0Ce3c9l7G1", "pdf_size": 28872842, "rating": "3;3;5;5", "confidence": "4;5;4;4", "soundness": "2;1;2;2", "contribution": "2;2;2;2", "presentation": "3;2;2;2", "wc_summary": "57;21;71;96", "wc_strengths": "9;10;27;54", "wc_weaknesses": "157;144;252;70", "wc_questions": "58;9;26;70", "wc_review": "281;184;376;290", "wc_reply_reviewers": "0;0;54;37", "wc_reply_authors": "652;530;1205;758", "reply_reviewers": "0;0;1;2", "reply_authors": "2;2;3;3", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.25, 27.11434122378783 ], "wc_strengths_avg": [ 25.0, 18.207141456033124 ], "wc_weaknesses_avg": [ 155.75, 64.72393297691357 ], "wc_questions_avg": [ 40.75, 24.38621536852326 ], "wc_review_avg": [ 282.75, 68.01240695637819 ], "wc_reply_reviewers_avg": [ 22.75, 23.53056522908024 ], "wc_reply_authors_avg": [ 786.25, 254.87092321408497 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:weN9GJto7N4J:scholar.google.com/&scioq=Learning+Multi-Agent+Communication+using+Regularized+Attention+Messages&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Loughborough University", "aff_unique_dep": "", "aff_unique_url": "https://www.lborolondon.ac.uk", "aff_unique_abbr": "Lboro", "aff_campus_unique_index": "0;0", "aff_campus_unique": "London;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "0D6mUZTWoF", "title": "A Topology-aware Graph Coarsening Framework for Continual Graph Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Continual learning on graphs tackles the problem of training a graph neural network (GNN) where graph data arrive in a streaming fashion and the model tends to forget knowledge from previous tasks when updating with new data.\nTraditional continual learning strategies such as Experience Replay can be adapted to streaming graphs, however, these methods often face challenges such as inefficiency in preserving graph topology and incapability of capturing the correlation between old and new tasks.\nTo address these challenges, we propose TA$\\mathbb{CO}$, a topology-aware graph coarsening and continual learning framework that stores information from previous tasks as a reduced graph. \nAt each time period, this reduced graph expands by combining with a new graph and aligning shared nodes, and then it undergoes a ``zoom out'' process by reduction to maintain a stable size. \nWe design a graph coarsening algorithm based on node representation proximities to efficiently reduce a graph and preserve topological information. We empirically demonstrate the learning process on the reduced graph can approximate that of the original graph.\nOur experiments validate the effectiveness of the proposed framework on three real-world datasets using different backbone GNN models.", "keywords": "Continual Graph Learning;Catastrophic Forgetting;Graph Coarsening", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/660a4731ebe4deeeadf25c4f5443ce61f19453f8.zip", "author": "Xiaoxue Han;Zhuo Feng;Yue Ning", "authorids": "~Xiaoxue_Han1;~Zhuo_Feng3;~Yue_Ning1", "gender": "F;M;F", "homepage": "https://hanxiaoxue114.github.io/;https://web.stevens.edu/facultyprofile/?id=2371;https://yue-ning.github.io/", "dblp": "219/1935;81/4441.html;74/9990-1.html", "google_scholar": ";;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xiaoxue_Han1;~Zhuo_Feng3;~Yue_Ning1", "aff": "Stevens Institute of Technology;;Stevens Institute of Technology", "aff_domain": "stevens.edu;;stevens.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@misc{\nhan2024a,\ntitle={A Topology-aware Graph Coarsening Framework for Continual Graph Learning},\nauthor={Xiaoxue Han and Zhuo Feng and Yue Ning},\nyear={2024},\nurl={https://openreview.net/forum?id=0D6mUZTWoF}\n}", "github": "", "project": "", "reviewers": "9rSM;sD2m;Tw7D;A9ak", "site": "https://openreview.net/forum?id=0D6mUZTWoF", "pdf_size": 937313, "rating": "3;5;6;8", "confidence": "5;4;3;3", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "136;68;88;98", "wc_strengths": "31;26;32;99", "wc_weaknesses": "184;35;66;78", "wc_questions": "36;253;25;156", "wc_review": "387;382;211;431", "wc_reply_reviewers": "0;223;22;0", "wc_reply_authors": "1061;948;432;1186", "reply_reviewers": "0;1;1;0", "reply_authors": "3;3;1;3", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 97.5, 24.713356712514795 ], "wc_strengths_avg": [ 47.0, 30.108138434649195 ], "wc_weaknesses_avg": [ 90.75, 56.07751331861997 ], "wc_questions_avg": [ 117.5, 93.59620718811206 ], "wc_review_avg": [ 352.75, 84.03087230298159 ], "wc_reply_reviewers_avg": [ 61.25, 93.81730917053633 ], "wc_reply_authors_avg": [ 906.75, 286.73278065125373 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9198662110077999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17516262532202830926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0", "aff_unique_norm": "Stevens Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.stevens.edu", "aff_unique_abbr": "SIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "0GZ1Bq4Tfr", "title": "Layer-wise Pre-weight Decay", "track": "main", "status": "Reject", "tldr": "", "abstract": "In deep learning, weight decay is a regularization mechanism been widely adopted to improve the generalization performance. Previously, a common understanding of the role of weight decay was that it contributes by pushing the model weights to approach 0 at each time step. \nHowever, our findings challenge this notion and argue the objective of weight decay is to make the weights approach the negative value of the update term instead of 0, thereby indicating a delay defect in certain steps that results in opposing penalties. In addition, we study the negative side effect of weight decay, revealing it will damage the inter-layer connectivity of the network while reducing weight magnitude.\nTo address these issues, we first propose real-time weight decay to fix the delay defect by penalizing both the weights and the gradients at each time step. \nThen, we advance the decay step before the update function as pre-weight decay to mitigate the performance drop raised by the side effect.\nTo further improve the general performance and enhance model robustness towards the decay rate, we finally introduce a layer-wise pre-weight decay to adjust the decay rate based on the layer index. \nExtensive analytical and comparative experiments demonstrate that the proposed $\\textit{layer-wise pre-weight decay}$ (LPWD) (i) exhibits remarkable robustness to the decay rate, and (ii) significantly improves the generalization performance across various conditions.", "keywords": "deep learning;regularization;generalization;weight decay", "primary_area": "optimization", "supplementary_material": "", "author": "Xiaolong Huang;Qiankun Li;Hanguang Xiao;Gao Xuesong;Xueran Li", "authorids": "~Xiaolong_Huang3;~Qiankun_Li1;~Hanguang_Xiao1;~Gao_Xuesong1;~Xueran_Li1", "gender": "M;M;M;M;M", "homepage": ";https://orcid.org/0000-0001-5121-1682;https://ai.cqut.edu.cn/info/1052/1679.htm;https://github.com/Mr-GaoXueSong;https://github.com/lxr-1204", "dblp": ";228/7339;;;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;;", "orcid": ";0000-0001-5121-1682;;;", "linkedin": ";;;;", "or_profile": "~Xiaolong_Huang3;~Qiankun_Li1;~Hanguang_Xiao1;~Gao_Xuesong1;~Xueran_Li1", "aff": "Mila - Quebec Artificial Intelligence Institute;University of Science and Technology of China;Chongqing University of Technology;;Anhui University", "aff_domain": "mila.quebec;ustc.edu.cn;cuqt.edu.cn;;ahu.edu.cn", "position": "MS student;PhD student;Full Professor;;MS student", "bibtex": "@misc{\nhuang2024layerwise,\ntitle={Layer-wise Pre-weight Decay},\nauthor={Xiaolong Huang and Qiankun Li and Hanguang Xiao and Gao Xuesong and Xueran Li},\nyear={2024},\nurl={https://openreview.net/forum?id=0GZ1Bq4Tfr}\n}", "github": "", "project": "", "reviewers": "QiZQ;AMDG;hMNw;51Q7", "site": "https://openreview.net/forum?id=0GZ1Bq4Tfr", "pdf_size": 3289142, "rating": "3;3;3;6", "confidence": "4;4;5;3", "soundness": "2;2;1;3", "contribution": "2;2;1;3", "presentation": "2;2;2;2", "wc_summary": "127;107;55;56", "wc_strengths": "22;51;13;58", "wc_weaknesses": "224;149;125;175", "wc_questions": "63;28;79;31", "wc_review": "436;335;272;320", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 86.25, 31.554516317002864 ], "wc_strengths_avg": [ 36.0, 18.934096228761486 ], "wc_weaknesses_avg": [ 168.25, 36.72448093574639 ], "wc_questions_avg": [ 50.25, 21.533404282648853 ], "wc_review_avg": [ 340.75, 59.713377898089135 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6NAF6PFAlzQJ:scholar.google.com/&scioq=Layer-wise+Pre-weight+Decay&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Quebec Artificial Intelligence Institute;University of Science and Technology of China;Chongqing University of Technology;Anhui University", "aff_unique_dep": "Artificial Intelligence;;;", "aff_unique_url": "https://mila.quebec;http://www.ustc.edu.cn;http://www.cqut.edu.cn;http://www.ahu.edu.cn/", "aff_unique_abbr": "Mila;USTC;;AHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Canada;China" }, { "title": "Language Control Diffusion: Efficiently Scaling through Space, Time, and Tasks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19620", "id": "0H6DFoZZXZ", "author_site": "Edwin Zhang, Yujie Lu, Shinda Huang, William Wang, Amy Zhang", "tldr": "", "abstract": "Training generalist agents is difficult across several axes, requiring us to deal with high-dimensional inputs (space), long horizons (time), and generalization to novel tasks. Recent advances with architectures have allowed for improved scaling along one or two of these axes, but are still computationally prohibitive to use. In this paper, we propose to address all three axes by leveraging Language to Control Diffusion models as a hierarchical planner conditioned on language (LCD). We effectively and efficiently scale diffusion models for planning in extended temporal, state, and task dimensions to tackle long horizon control problems conditioned on natural language instructions, as a step towards generalist agents. Comparing LCD with other state-of-the-art models on the CALVIN language benchmark finds that LCD outperforms other SOTA methods in multi-task success rates, whilst improving inference speed over other comparable diffusion models by 3.3x~15x. We show that LCD can successfully leverage the unique strength of diffusion models to produce coherent long range plans while addressing their weakness in generating low-level details and control.", "keywords": "planning;diffusion;language;RL;reinforcement", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/f73f5eb9525f55dc681390f230a073bff76cbce3.zip", "author": "Edwin Zhang;Yujie Lu;Shinda Huang;William Yang Wang;Amy Zhang", "authorids": "~Edwin_Zhang2;~Yujie_Lu1;~Shinda_Huang1;~William_Yang_Wang2;~Amy_Zhang1", "gender": ";;;F;M", "homepage": "https://eddie.win;https://yujielu10.github.io/;;;https://www.cs.ucsb.edu/~william/", "dblp": ";;;43/2754;08/9282", "google_scholar": ";pcmr6GMAAAAJ;;;gf8Ms_8AAAAJ", "orcid": ";;;;", "linkedin": ";;shinda-huang/;;", "or_profile": "~Edwin_Zhang2;~Yujie_Lu1;~Shinda_Huang1;~Amy_Zhang2;~William_Wang1", "aff": "Harvard University;UC Santa Barbara;University of California, Santa Barbara;Meta Facebook;UC Santa Barbara", "aff_domain": "harvard.edu;ucsb.edu;ucsb.edu;facebook.com;ucsb.edu", "position": "PhD student;PhD student;MS student;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nzhang2024language,\ntitle={Language Control Diffusion: Efficiently Scaling through Space, Time, and Tasks},\nauthor={Edwin Zhang and Yujie Lu and Shinda Huang and William Yang Wang and Amy Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0H6DFoZZXZ}\n}", "github": "", "project": "", "reviewers": "z6e9;74cS;FYfw;fAWH", "pdf_size": 2758481, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "contribution": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "71;46;34;44", "wc_strengths": "21;82;34;71", "wc_weaknesses": "94;172;223;76", "wc_questions": "2;87;5;73", "wc_review": "188;387;296;264", "wc_reply_reviewers": "0;137;12;0", "wc_reply_authors": "1014;1285;1093;685", "reply_reviewers": "0;1;1;0", "reply_authors": "4;4;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 48.75, 13.626720074911644 ], "wc_strengths_avg": [ 52.0, 25.228951623085727 ], "wc_weaknesses_avg": [ 141.25, 59.41117319158072 ], "wc_questions_avg": [ 41.75, 38.58351331851468 ], "wc_review_avg": [ 283.75, 71.35956488096042 ], "wc_reply_reviewers_avg": [ 37.25, 57.798680780792914 ], "wc_reply_authors_avg": [ 1019.25, 216.6868420093846 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12290706867650507132&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=0H6DFoZZXZ", "pdf": "https://openreview.net/pdf?id=0H6DFoZZXZ", "email": "harvard.edu;ucsb.edu;ucsb.edu;facebook.com;ucsb.edu", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Harvard University;University of California, Santa Barbara;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.harvard.edu;https://www.ucsb.edu;https://meta.com", "aff_unique_abbr": "Harvard;UCSB;Meta", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "0HIMHjYhYe", "title": "Diffusion Model-Augmented Behavioral Cloning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Imitation learning addresses the challenge of learning by observing an expert\u2019s demonstrations without access to reward signals from environments. Most existing imitation learning methods that do not require interacting with environments either model the expert distribution as the conditional probability p(a|s) (e.g., behavioral cloning, BC) or the joint probability p(s, a) Despite its simplicity, modeling the conditional probability with BC usually struggles with generalization. While modeling the joint probability can lead to improved generalization performance, the inference procedure is often time-consuming and the model can suffer from manifold overfitting. This work proposes an imitation learning framework that benefits from modeling both the conditional and joint probability of the expert distribution. Our proposed diffusion model-augmented behavioral cloning (DBC) employs a diffusion model trained to model expert behaviors and learns a policy to optimize both the BC loss (conditional) and our proposed diffusion model loss (joint). DBC outperforms baselines in various continuous control tasks in navigation, robot arm manipulation, dexterous manipulation, and locomotion. We design additional experiments to verify the limitations of modeling either the conditional probability or the joint probability of the expert distribution as well as compare different generative models. Ablation studies justify the effectiveness of our design choices.", "keywords": "Imitation Learning;Learning from Demonstration;Diffusion Models;Behavioral Cloning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/ce67a7bb74506963876163cf4b308455c4027d26.pdf", "author": "Hsiang-Chun Wang;Shang-Fu Chen;Ming-Hao Hsu;Chun-Mao Lai;Shao-Hua Sun", "authorids": "~Hsiang-Chun_Wang1;~Shang-Fu_Chen2;~Ming-Hao_Hsu1;~Chun-Mao_Lai1;~Shao-Hua_Sun1", "gender": ";M;M;M;M", "homepage": "https://hsiangchun0205.github.io/;https://www.linkedin.com/in/shang-fu-chen-354914199/;https://qaz159qaz159.github.io/;https://mecoli1219.github.io/;http://shaohua0116.github.io", "dblp": ";203/9102;325/4631;325/4767;158/9680", "google_scholar": "https://scholar.google.com.tw/citations?user=vpJMSjMAAAAJ;https://scholar.google.com.tw/citations?user=ZKOpgs4AAAAJ;;;uXsfnaQAAAAJ", "orcid": ";;;;0000-0001-7579-6734", "linkedin": "https://tw.linkedin.com/in/hsiang-chun-wang-8a4798269;;;;shaohua0116/", "or_profile": "~Hsiang-Chun_Wang1;~Shang-Fu_Chen2;~Ming-Hao_Hsu1;~Chun-Mao_Lai1;~Shao-Hua_Sun1", "aff": "National Taiwan University;National Taiwan University;National Taiwan University;National Taiwan University;National Taiwan University", "aff_domain": "ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw", "position": "MS student;PhD student;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@misc{\nwang2024diffusion,\ntitle={Diffusion Model-Augmented Behavioral Cloning},\nauthor={Hsiang-Chun Wang and Shang-Fu Chen and Ming-Hao Hsu and Chun-Mao Lai and Shao-Hua Sun},\nyear={2024},\nurl={https://openreview.net/forum?id=0HIMHjYhYe}\n}", "github": "", "project": "", "reviewers": "dYN2;bGrV;8bWB;Mkdd", "site": "https://openreview.net/forum?id=0HIMHjYhYe", "pdf_size": 2354520, "rating": "3;3;3;6", "confidence": "4;5;4;4", "soundness": "2;2;1;3", "contribution": "2;1;2;3", "presentation": "3;2;2;3", "wc_summary": "54;70;77;58", "wc_strengths": "28;37;45;116", "wc_weaknesses": "2;506;153;61", "wc_questions": "98;3;6;36", "wc_review": "182;616;281;271", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 64.75, 9.202581159652981 ], "wc_strengths_avg": [ 56.5, 34.87477598494362 ], "wc_weaknesses_avg": [ 180.5, 195.4795385711763 ], "wc_questions_avg": [ 35.75, 38.18622133702155 ], "wc_review_avg": [ 337.5, 165.34584966064313 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=499986354715790633&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0IaTFNJner", "title": "On the Embedding Collapse When Scaling up Recommendation Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advances in deep foundation models have led to a promising trend of developing large recommendation models to leverage vast amounts of available data. However, we experiment to scale up existing recommendation models and observe that the enlarged models do not improve satisfactorily. In this context, we investigate the embedding layers of enlarged models and identify a phenomenon of *embedding collapse*, which ultimately hinders scalability, wherein the embedding matrix tends to reside in a low-dimensional subspace. Through empirical and theoretical analysis, we demonstrate that the feature interaction module specific to recommendation models has a *two-sided effect*. On the one hand, the interaction restricts embedding learning when interacting with collapsed embeddings, exacerbating the collapse issue. On the other hand, feature interaction is crucial in mitigating the fitting of spurious features, thereby improving scalability. Based on this analysis, we propose a simple yet effective *multi-embedding* design incorporating embedding-set-specific interaction modules to capture diverse patterns and reduce collapse. Extensive experiments demonstrate that this proposed design provides consistent scalability for various recommendation models.", "keywords": "machine learning;representation learning;recommender system;collapse", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Xingzhuo Guo;Junwei Pan;Ximei Wang;Baixu Chen;Jie Jiang;Mingsheng Long", "authorids": "~Xingzhuo_Guo1;~Junwei_Pan1;~Ximei_Wang1;~Baixu_Chen2;~Jie_Jiang3;~Mingsheng_Long5", "gender": "M;M;M;M;M;M", "homepage": ";https://junwei-pan.github.io/;https://wxm17.github.io/;https://github.com/tsingcbx99;https://baike.baidu.com/item/%E8%92%8B%E6%9D%B0/58674740;http://ise.thss.tsinghua.edu.cn/~mlong", "dblp": ";210/6440;89/8876;279/4076;32/7018-15.html;74/9023", "google_scholar": "Cbinj9QAAAAJ;sUaBkFkAAAAJ;WmOCCVgAAAAJ;;;_MjXpXkAAAAJ", "orcid": ";0009-0003-2697-7012;;;0000-0001-9658-5127;0000-0002-5412-9120", "linkedin": ";;;;;", "or_profile": "~Xingzhuo_Guo1;~Junwei_Pan1;~Ximei_Wang1;~Baixu_Chen2;~jie_jiang3;~Mingsheng_Long2", "aff": "Tsinghua University;Tencent;Tencent;Tsinghua University;Tencent AI Lab;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tencent.com;tencent.com;tsinghua.edu.cn;tencent.com;tsinghua.edu.cn", "position": "PhD student;Researcher;Researcher;MS student;VP;Associate Professor", "bibtex": "@misc{\nguo2024on,\ntitle={On the Embedding Collapse When Scaling up Recommendation Models},\nauthor={Xingzhuo Guo and Junwei Pan and Ximei Wang and Baixu Chen and Jie Jiang and Mingsheng Long},\nyear={2024},\nurl={https://openreview.net/forum?id=0IaTFNJner}\n}", "github": "", "project": "", "reviewers": "DK5H;JdNi;9BGA;eFdv", "site": "https://openreview.net/forum?id=0IaTFNJner", "pdf_size": 2016960, "rating": "5;5;5;6", "confidence": "4;3;4;3", "soundness": "3;3;2;3", "contribution": "3;1;2;2", "presentation": "3;2;3;3", "wc_summary": "132;46;118;166", "wc_strengths": "127;31;155;55", "wc_weaknesses": "140;107;171;180", "wc_questions": "67;46;8;36", "wc_review": "466;230;452;437", "wc_reply_reviewers": "0;0;0;16", "wc_reply_authors": "1019;1126;1191;1016", "reply_reviewers": "0;0;0;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 115.5, 43.75785643744447 ], "wc_strengths_avg": [ 92.0, 50.70502933634887 ], "wc_weaknesses_avg": [ 149.5, 28.674901917879335 ], "wc_questions_avg": [ 39.25, 21.22940178149163 ], "wc_review_avg": [ 396.25, 96.53075934643837 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 1088.0, 74.15861379502721 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14239355219505619999&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;1;0;1;0", "aff_unique_norm": "Tsinghua University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.tencent.com", "aff_unique_abbr": "THU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0JTwZ30qPH", "title": "Task-Oriented Multi-View Representation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multi-view representation learning aims to learn a high-quality unified representation for an entity from its multiple observable views to facilitate the performance of downstream tasks. A typical multi-view representation learning framework consists of four main components: View-specific encoding, Single-view learning (SVL), Multi-view learning (MVL), and Fusion. Recent studies achieve promising performance by carefully designing SVL and MVL constraints, but almost all of them ignore the basic fact that \\textit{effective representations are different for different tasks, even for the same entity}. To bridge this gap, this work proposes a \\textbf{T}ask-\\textbf{O}riented \\textbf{M}ulti-\\textbf{V}iew \\textbf{R}epresentation \\textbf{L}earning (TOMRL) method, where the key idea is to modulate features in the View-specific encoding and Fusion modules according to the task guidance. To this end, we first design a gradient-based embedding strategy to flexibly represent multi-view tasks. After that, a meta-learner is trained to map the task embedding into a set of view-specific parameters and a view-shared parameter for modulation in the Encoding and Fusion modules, respectively. This whole process is formalized as a nested optimization problem and ultimately solved by a bi-level optimization scheme. Extensive experiments on four multi-view datasets validate that our TOMRL consistently improves the performance of most existing multi-view representation learning approaches.", "keywords": "Multi-view learning; Meta learning; Feature modulation; Task adaptation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Ren Wang;Haoliang Sun;Yuxiu Lin;Yongshun Gong;Xiushan Nie;Yilong Yin", "authorids": "~Ren_Wang5;~Haoliang_Sun2;~Yuxiu_Lin1;~Yongshun_Gong1;~Xiushan_Nie1;~Yilong_Yin1", "gender": "M;M;F;M;M;M", "homepage": "https://time.sdu.edu.cn/info/1071/2586.htm;https://haolsun.github.io/;;https://faculty.sdu.edu.cn/gongyongshun/en/index.htm;http://niexsh.sdufe.edu.cn;https://faculty.sdu.edu.cn/ylyin", "dblp": "29/50-11;117/5673;275/7309;194/1824;03/8117;", "google_scholar": "hkHRff8AAAAJ;s5FAQ5wAAAAJ;;WIHqungAAAAJ;;", "orcid": "0000-0002-5877-5023;0000-0001-7715-5682;0000-0003-1809-0828;0000-0003-3948-4471;0000-0001-9644-9723;", "linkedin": ";haoliang-sun-357a5a11a;;;;", "or_profile": "~Ren_Wang5;~Haoliang_Sun2;~Yuxiu_Lin1;~Yongshun_Gong1;~Xiushan_Nie1;~Yilong_Yin1", "aff": "Shandong University;Shandong University;Shandong University of Finance and Economics;Shandong University;Shandong Jianzhu University;Shandong University", "aff_domain": "sdu.edu.cn;sdu.edu.cn;sdufe.edu.cn;sdu.edu.cn;sdjzu.edu.cn;sdu.edu.cn", "position": "PhD student;Associate Professor;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nwang2024taskoriented,\ntitle={Task-Oriented Multi-View Representation Learning},\nauthor={Ren Wang and Haoliang Sun and Yuxiu Lin and Yongshun Gong and Xiushan Nie and Yilong Yin},\nyear={2024},\nurl={https://openreview.net/forum?id=0JTwZ30qPH}\n}", "github": "", "project": "", "reviewers": "d8EV;caBF;aCu2;fqyn;V7F6", "site": "https://openreview.net/forum?id=0JTwZ30qPH", "pdf_size": 681589, "rating": "3;3;3;5;6", "confidence": "5;4;4;3;4", "soundness": "3;2;2;3;3", "contribution": "1;2;1;2;3", "presentation": "2;2;2;3;4", "wc_summary": "46;78;46;73;43", "wc_strengths": "6;68;35;68;30", "wc_weaknesses": "148;167;127;175;138", "wc_questions": "26;5;75;35;37", "wc_review": "226;318;283;351;248", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "617;631;0;556;223", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;0;1;1", "rating_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 1.8, 0.7483314773547883 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 57.2, 15.065191668213185 ], "wc_strengths_avg": [ 41.4, 23.829393613770367 ], "wc_weaknesses_avg": [ 151.0, 17.81010948871455 ], "wc_questions_avg": [ 35.6, 22.729716232280598 ], "wc_review_avg": [ 285.2, 45.384578878733684 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 405.4, 251.38384991880446 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.8, 0.4 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.49999999999999994, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D-bjX91-YE4J:scholar.google.com/&scioq=Task-Oriented+Multi-View+Representation+Learning&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Shandong University;Shandong University of Finance and Economics;Shandong Jianzhu University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sdu.edu.cn;http://www.sdufe.edu.cn;http://www.sdjzu.edu.cn", "aff_unique_abbr": "SDU;SDUFE;SDJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0JWVWUlobv", "title": "4D Tensor Multi-task Continual Learning for Disease Dynamic Prediction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Machine learning techniques for predicting Alzheimer's disease (AD) progression can substantially help researchers and clinicians establish strong AD preventive and treatment strategies. However, current research on AD prediction algorithms encounters challenges with monotonic data form, small dataset and scarcity of time-continuous data. To address all three of these problems at once, we propose a novel machine learning approach that implements the 4D tensor multi-task continual learning algorithm to predict AD progression by quantifying multi-dimensional information on brain structural variation and knowledge sharing between patients. To meet real-world application scenarios, the method can integrate knowledge from all available data as patient data increases to continuously update and optimise prediction results. To evaluate the performance of the proposed approach, we conducted extensive experiments utilising data from the Alzheimer's Disease Neuroimaging Initiative (ADNI). The results demonstrate that the proposed approach has superior accuracy and stability in predicting various cognitive scores of AD progression compared to single-task learning, benchmarks and state-of-the-art multi-task regression methods. The proposed approach identifies structural brain variations in patients and utilises it to accurately predict and diagnose AD progression from magnetic resonance imaging (MRI) data alone, and the performance of the model improves as the MRI data increases.", "keywords": "Alzheimer\u2019s disease progression;tensor multi-task learning;continual learning;amalgamated magnitude-direction quantification;brain structure variation", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/9181459918592259ae5a65200764a826cd796d12.zip", "author": "Yu Zhang;Xulong Wang;Vitaveska Lanfranchi;Po Yang", "authorids": "~Yu_Zhang46;~Xulong_Wang2;~Vitaveska_Lanfranchi1;~Po_Yang1", "gender": ";M;F;M", "homepage": ";https://www.sheffield.ac.uk/cs/people/research-staff/xulong-wang;;https://www.sheffield.ac.uk/dcs/people/academic/pyang/profile", "dblp": ";211/7318-1;15/6230;88/5343-1", "google_scholar": "4gEGS8gAAAAJ;-oNwqtgAAAAJ;BiF44ewAAAAJ;RdK3cwgAAAAJ", "orcid": ";0000-0002-7385-4926;;0000-0002-6604-4298", "linkedin": ";;;po-yang-83a03043/", "or_profile": "~Yu_Zhang46;~Xulong_Wang2;~Vitaveska_Lanfranchi1;~Po_Yang1", "aff": "University of Sheffield;University of Sheffield;University of Sheffield;University of Sheffield", "aff_domain": "sheffield.ac.uk;shef.ac.uk;sheffield.ac.uk;sheffield.ac.uk", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nzhang2024d,\ntitle={4D Tensor Multi-task Continual Learning for Disease Dynamic Prediction},\nauthor={Yu Zhang and Xulong Wang and Vitaveska Lanfranchi and Po Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=0JWVWUlobv}\n}", "github": "", "project": "", "reviewers": "ZWzC;6zjs;Zsj5;vwzZ", "site": "https://openreview.net/forum?id=0JWVWUlobv", "pdf_size": 5511212, "rating": "5;5;5;6", "confidence": "2;4;4;4", "soundness": "3;2;3;2", "contribution": "2;2;2;3", "presentation": "1;2;2;2", "wc_summary": "81;93;27;92", "wc_strengths": "18;10;23;58", "wc_weaknesses": "93;192;265;216", "wc_questions": "90;2;270;78", "wc_review": "282;297;585;444", "wc_reply_reviewers": "0;92;1060;187", "wc_reply_authors": "424;742;1652;748", "reply_reviewers": "0;1;2;2", "reply_authors": "1;1;4;4", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.25, 27.11434122378783 ], "wc_strengths_avg": [ 27.25, 18.34904629674251 ], "wc_weaknesses_avg": [ 191.5, 62.659795722616266 ], "wc_questions_avg": [ 110.0, 98.34632682515398 ], "wc_review_avg": [ 402.0, 123.16452411307405 ], "wc_reply_reviewers_avg": [ 334.75, 423.9111787863113 ], "wc_reply_authors_avg": [ 891.5, 458.2191069783101 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D1R4C8RTFFwJ:scholar.google.com/&scioq=4D+Tensor+Multi-task+Continual+Learning+for+Disease+Dynamic+Prediction&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Sheffield", "aff_unique_dep": "", "aff_unique_url": "https://www.sheffield.ac.uk", "aff_unique_abbr": "Sheffield", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "0JnaN0Crlz", "title": "Enhancing Adversarial Robustness on Categorical Data via Attribution Smoothing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Many efforts have been contributed to alleviate the adversarial risk of deep neural networks on continuous inputs.\nAdversarial robustness on general categorical inputs, especially tabular categorical attributes, has received much less attention. To echo this challenge, our work aims to enhance the robustness of classification over categorical attributes against adversarial perturbations. We establish an information-theoretic upper bound on the expected adversarial risk. Based on it,\nwe propose an adversarially robust learning method, named Integrated Gradient-Smoothed Gradient (IGSG)-based regularization. It is designed to smooth the attributional sensitivity of each feature and the decision boundary of the classifier to achieve lower adversarial risk, i.e., desensitizing the categorical attributes in the classifier. We conduct an extensive empirical study over categorical datasets of various application domains. The experimental results confirm the effectiveness of IGSG, which surpasses the state-of-the-art robust training methods by a margin of approximately 0.4\\% to 12.2\\% on average in terms of adversarial accuracy, especially on high-dimension datasets.", "keywords": "Adversarial Robustness;Categorical Data;Attribution Smoothing;Information Theory", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/506e78e21660053fe2a0625a76f2b36263fd14fc.pdf", "author": "Yujun Zhou;Yufei Han;Hongyan Bao;Xiangliang Zhang", "authorids": "~Yujun_Zhou1;~Yufei_Han1;~Hongyan_Bao1;~Xiangliang_Zhang1", "gender": "M;M;;F", "homepage": "https://yujunzhou.github.io/;;https://mine.kaust.edu.sa/Pages/Hongyan.aspx;https://sites.nd.edu/xiangliang-zhang/", "dblp": "162/3265-2;74/2507;234/6902;74/1890-1", "google_scholar": "t0c7rQQAAAAJ;xdCvBg0AAAAJ;;BhRJe4wAAAAJ", "orcid": "0000-0003-1376-5187;;;0000-0002-3574-5665", "linkedin": "yujun-zhou-zyj/;;;", "or_profile": "~Yujun_Zhou1;~Yufei_Han1;~Hongyan_Bao1;~Xiangliang_Zhang1", "aff": "University of Notre Dame;INRIA;KAUST;University of Notre Dame", "aff_domain": "nd.edu;inria.fr;kaust.edu.sa;nd.edu", "position": "PhD student;Researcher;PhD student;Associate Professor", "bibtex": "@misc{\nzhou2024enhancing,\ntitle={Enhancing Adversarial Robustness on Categorical Data via Attribution Smoothing},\nauthor={Yujun Zhou and Yufei Han and Hongyan Bao and Xiangliang Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=0JnaN0Crlz}\n}", "github": "", "project": "", "reviewers": "oxAy;KxJ6;eEhS;rFyD;Bx5T;iddk", "site": "https://openreview.net/forum?id=0JnaN0Crlz", "pdf_size": 1463516, "rating": "3;5;5;6;6;6", "confidence": "4;4;4;3;2;4", "soundness": "2;2;2;3;3;3", "contribution": "2;2;2;3;3;3", "presentation": "1;2;3;3;3;3", "wc_summary": "15;20;60;95;31;145", "wc_strengths": "34;9;97;24;40;41", "wc_weaknesses": "119;170;159;15;8;143", "wc_questions": "44;2;30;47;188;1", "wc_review": "212;201;346;181;267;330", "wc_reply_reviewers": "0;0;0;11;0;0", "wc_reply_authors": "1707;1002;1917;706;869;718", "reply_reviewers": "0;0;0;1;0;0", "reply_authors": "7;4;4;4;6;3", "rating_avg": [ 5.166666666666667, 1.0671873729054746 ], "confidence_avg": [ 3.5, 0.7637626158259734 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.7637626158259734 ], "wc_summary_avg": [ 61.0, 46.38606112472438 ], "wc_strengths_avg": [ 40.833333333333336, 27.382577591518945 ], "wc_weaknesses_avg": [ 102.33333333333333, 66.14042300708059 ], "wc_questions_avg": [ 52.0, 63.46915261237803 ], "wc_review_avg": [ 256.1666666666667, 63.61712732345661 ], "wc_reply_reviewers_avg": [ 1.8333333333333333, 4.099457958749614 ], "wc_reply_authors_avg": [ 1153.1666666666667, 480.1220041706992 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 4.666666666666667, 1.3743685418725535 ], "replies_avg": [ 40, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5111986324432478, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IAeeVQInNRYJ:scholar.google.com/&scioq=Enhancing+Adversarial+Robustness+on+Categorical+Data+via+Attribution+Smoothing&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Notre Dame;INRIA;King Abdullah University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nd.edu;https://www.inria.fr;https://www.kaust.edu.sa", "aff_unique_abbr": "Notre Dame;INRIA;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United States;France;Saudi Arabia" }, { "title": "From Latent Graph to Latent Topology Inference: Differentiable Cell Complex Module", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19619", "id": "0JsRZEGZ7L", "author_site": "Claudio Battiloro, Indro Spinelli, Lev Telyatinkov, Michael Bronstein, Simone Scardapane, Paolo Di Lorenzo", "tldr": "", "abstract": "Latent Graph Inference (LGI) relaxed the reliance of Graph Neural Networks (GNNs) on a given graph topology by dynamically learning it. However, most of LGI methods assume to have a (noisy, incomplete, improvable, ...) input graph to rewire and can solely learn regular graph topologies. In the wake of the success of Topological Deep Learning (TDL), we study Latent Topology Inference (LTI) for learning higher-order cell complexes (with sparse and not regular topology) describing multi-way interactions between data points. To this aim, we introduce the Differentiable Cell Complex Module (DCM), a novel learnable function that computes cell probabilities in the complex to improve the downstream task. We show how to integrate DCM with cell complex message-passing networks layers and train it in an end-to-end fashion, thanks to a two-step inference procedure that avoids an exhaustive search across all possible cells in the input, thus maintaining scalability. Our model is tested on several homophilic and heterophilic graph datasets and it is shown to outperform other state-of-the-art techniques, offering significant improvements especially in cases where an input graph is not provided.", "keywords": "Topological Deep Learning;Geometric Deep Learning;Latent Topology Inference;Latent Graph Inference;Cell Complexes", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Claudio Battiloro;Indro Spinelli;Lev Telyatnikov;Michael M. Bronstein;Simone Scardapane;Paolo Di Lorenzo", "authorids": "~Claudio_Battiloro1;~Indro_Spinelli1;~Lev_Telyatnikov1;~Michael_M._Bronstein1;~Simone_Scardapane1;~Paolo_Di_Lorenzo1", "gender": "M;M;M;M;M;M", "homepage": ";;;http://www.inf.usi.ch/bronstein/;http://ispac.diet.uniroma1.it/scardapane/;https://sites.google.com/site/paolodilorenzohp/", "dblp": "243/6640;241/5134;;07/2668;144/2184;42/9879", "google_scholar": "_J11o_IAAAAJ;0glmB_UAAAAJ;MzFz-tcAAAAJ;UU3N6-UAAAAJ;https://scholar.google.it/citations?user=aSuosYoAAAAJ;https://scholar.google.it/citations?user=VZYvspQAAAAJ", "orcid": ";0000-0003-1963-3548;;;0000-0003-0881-8344;", "linkedin": "claudio-battiloro-b4390b175;;;mbronstein/;simonescardapane;", "or_profile": "~Claudio_Battiloro1;~Indro_Spinelli1;~Lev_Telyatnikov1;~Michael_M._Bronstein1;~Simone_Scardapane1;~Paolo_Di_Lorenzo1", "aff": "University of Roma \"La Sapienza\";Sapienza University of Rome;University of Roma \"La Sapienza\";University of Oxford;Sapienza University of Rome;University of Roma \"La Sapienza\"", "aff_domain": "uniroma1.it;uniroma1.it;uniroma1.it;ox.ac.uk;uniroma1.it;uniroma1.it", "position": "PhD student;Assistant Professor;PhD student;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nbattiloro2024from,\ntitle={From Latent Graph to Latent Topology Inference: Differentiable Cell Complex Module},\nauthor={Claudio Battiloro and Indro Spinelli and Lev Telyatnikov and Michael M. Bronstein and Simone Scardapane and Paolo Di Lorenzo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0JsRZEGZ7L}\n}", "github": "", "project": "", "reviewers": "EucF;XizW;ims6", "pdf_size": 2416075, "rating": "8;8;8", "confidence": "4;3;2", "soundness": "3;4;2", "contribution": "3;3;3", "presentation": "2;3;3", "wc_summary": "88;191;64", "wc_strengths": "57;68;42", "wc_weaknesses": "90;54;9", "wc_questions": "186;105;11", "wc_review": "421;418;126", "wc_reply_reviewers": "32;37;0", "wc_reply_authors": "766;807;646", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 114.33333333333333, 55.089825638577665 ], "wc_strengths_avg": [ 55.666666666666664, 10.656244908763853 ], "wc_weaknesses_avg": [ 51.0, 33.13608305156178 ], "wc_questions_avg": [ 100.66666666666667, 71.50912917631955 ], "wc_review_avg": [ 321.6666666666667, 138.36264749643317 ], "wc_reply_reviewers_avg": [ 23.0, 16.391054470858997 ], "wc_reply_authors_avg": [ 739.6666666666666, 68.31463158715627 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14758672841525436027&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=0JsRZEGZ7L", "pdf": "https://openreview.net/pdf?id=0JsRZEGZ7L", "email": "uniroma1.it;uniroma1.it;uniroma1.it;ox.ac.uk;uniroma1.it;uniroma1.it", "author_num": 6, "aff_unique_index": "0;1;0;2;1;0", "aff_unique_norm": "University of Rome La Sapienza;Sapienza University of Rome;University of Oxford", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uniroma1.it;https://www.uniroma1.it;https://www.ox.ac.uk", "aff_unique_abbr": "La Sapienza;Sapienza;Oxford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Rome;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "Italy;United Kingdom" }, { "id": "0KVkTDB6KZ", "title": "EFFL: Egalitarian Fairness in Federated Learning for Mitigating Matthew Effect", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent advances in federated learning (FL) enable collaborative training of machine learning (ML) models from large-scale and widely dispersed clients while protecting their privacy. However, when different clients' datasets are heterogeneous, traditional FL mechanisms produce a global model that does not adequately represent the poorer clients with limited data resources, resulting in lower accuracy and higher bias on their local data. According to the Matthew effect, which describes how the advantaged gain more advantage and the disadvantaged lose more over time, deploying such a global model in client applications may worsen the resource disparity among the clients and harm the principles of social welfare and fairness. To mitigate the Matthew effect, we propose Egalitarian Fairness Federated Learning (EFFL), where egalitarian fairness refers to the global model learned from FL has: (1) equal accuracy among clients; (2) equal decision bias among clients. Besides achieving egalitarian fairness among the clients, EFFL also aims for performance optimality, minimizing the empirical risk loss and the bias for each client; both are essential for any ML model training, whether centralized or decentralized. We formulate EFFL as a multi-constrained multi-objectives optimization (MCMOO) problem, with the decision bias and egalitarian fairness as constraints and the minimization of the empirical risk losses on all clients as multiple objectives to be optimized. We propose a gradient-based three-stage algorithm to obtain the Pareto optimal solutions within the constraint space. Extensive experiments demonstrate that EFFL outperforms other state-of-the-art FL algorithms in achieving a high-performance global model with enhanced egalitarian fairness among all clients.", "keywords": "Egalitarian Fairness;Federated Learning;AI Ethics", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/ccf13bad6278e0703c22b8b7c0ac0c638ad97767.zip", "author": "Jiashi Gao;Changwu Huang;Ming Tang;Shin Hwei Tan;Xin Yao;Xuetao Wei", "authorids": "~Jiashi_Gao1;~Changwu_Huang1;~Ming_Tang5;~Shin_Hwei_Tan1;~Xin_Yao1;~Xuetao_Wei2", "gender": "F;M;F;F;;M", "homepage": ";;http://mingtang.site;https://www.shinhwei.com/;http://www.cs.bham.ac.uk/~xin;https://cse.sustech.edu.cn/faculty/~weixt/", "dblp": "221/1810;227/6536;73/4373-6;16/9656;;09/5916", "google_scholar": ";https://scholar.google.fr/citations?user=QRnmYfkAAAAJ;4v9UxPYAAAAJ;1eFjFs8AAAAJ;;8fNwEScAAAAJ", "orcid": ";;0000-0003-4732-5155;;;0000-0002-4450-2251", "linkedin": ";;;;;", "or_profile": "~Jiashi_Gao1;~Changwu_Huang1;~Ming_Tang5;~Shin_Hwei_Tan1;~Xin_Yao1;~Xuetao_Wei2", "aff": "Southern University of Science and Technology;;Southern University of Science and Technology;Concordia University;;Southern University of Science and Technology", "aff_domain": "sustech.edu.cn;;sustech.edu.cn;concordia.ca;;sustech.edu.cn", "position": "PhD student;;Associate Professor;Associate Professor;;Associate Professor", "bibtex": "@misc{\ngao2024effl,\ntitle={{EFFL}: Egalitarian Fairness in Federated Learning for Mitigating Matthew Effect},\nauthor={Jiashi Gao and Changwu Huang and Ming Tang and Shin Hwei Tan and Xin Yao and Xuetao Wei},\nyear={2024},\nurl={https://openreview.net/forum?id=0KVkTDB6KZ}\n}", "github": "", "project": "", "reviewers": "YyVq;ta1u;hFqL;Raor", "site": "https://openreview.net/forum?id=0KVkTDB6KZ", "pdf_size": 725166, "rating": "3;5;6;8", "confidence": "3;4;2;4", "soundness": "2;2;2;3", "contribution": "2;2;3;3", "presentation": "2;2;2;2", "wc_summary": "39;74;102;134", "wc_strengths": "22;30;32;66", "wc_weaknesses": "159;122;330;193", "wc_questions": "48;194;3;8", "wc_review": "268;420;467;401", "wc_reply_reviewers": "0;13;70;0", "wc_reply_authors": "805;887;763;727", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 87.25, 35.02409884636577 ], "wc_strengths_avg": [ 37.5, 16.874537030686206 ], "wc_weaknesses_avg": [ 201.0, 78.5970737368765 ], "wc_questions_avg": [ 63.25, 77.47701271990293 ], "wc_review_avg": [ 389.0, 73.87489424696322 ], "wc_reply_reviewers_avg": [ 20.75, 28.925550988702014 ], "wc_reply_authors_avg": [ 795.5, 59.60494945891658 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.25087260300212727, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11222063921364090985&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Southern University of Science and Technology;Concordia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sustech.edu.cn;https://www.concordia.ca", "aff_unique_abbr": "SUSTech;Concordia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Canada" }, { "id": "0Lqyut1y7M", "title": "On the Optimality of Activations in Implicit Neural Representations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Implicit neural representations (INRs) have recently surged in popularity as a class of neural networks capable of encoding signals as compact, differentiable entities. To capture high-frequency content, INRs often employ techniques such as Fourier positional encodings or non-traditional activation functions like Gaussian, sinusoid, or wavelets. Despite the impressive results achieved with these activations, there has been limited exploration of their properties within a unified theoretical framework. To address this gap, we conduct a comprehensive analysis of these activations from the perspective of sampling theory. Our investigation reveals that, particularly in the context of shallow INRs, sinc activations\u2014previously unused in conjunction with INRs\u2014are theoretically optimal for signal encoding. Additionally, we establish a connection between dynamical systems and INRs and leverage sampling theory to bridge these two paradigms. Notably, we showcase how the implicit architectural regularization inherent to INRs allows for their application in modeling such systems with minimal need for explicit regularizations.", "keywords": "Implicit Neural Representations;Sampling theory", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/ab32a73c3f93974763b7e3d1f0236f2d1247945c.pdf", "author": "Sameera Ramasinghe;Hemanth Saratchandran;Violetta Shevchenko;Alexander Long;Simon Lucey", "authorids": "~Sameera_Ramasinghe1;~Hemanth_Saratchandran1;~Violetta_Shevchenko1;~Alexander_Long1;~Simon_Lucey2", "gender": "M;;F;M;M", "homepage": ";;;https://github.com/AlexanderJLong;https://www.adelaide.edu.au/directory/simon.lucey", "dblp": "181/4514;;231/1762;156/9630;01/3542", "google_scholar": "https://scholar.google.com.au/citations?user=-j0m9aMAAAAJ;;aWqA0BIAAAAJ;;vmAe35UAAAAJ", "orcid": ";;;;", "linkedin": ";;violetta-shevchenko-12b62714a/;;", "or_profile": "~Sameera_Ramasinghe1;~Hemanth_Saratchandran1;~Violetta_Shevchenko1;~Alexander_Long1;~Simon_Lucey2", "aff": "Amazon;;Amazon;Amazon;University of Adelaide", "aff_domain": "amazon.com;;amazon.com;amazon.com;adelaide.edu.au", "position": "Researcher;;Researcher;Researcher;Full Professor", "bibtex": "@misc{\nramasinghe2024on,\ntitle={On the Optimality of Activations in Implicit Neural Representations},\nauthor={Sameera Ramasinghe and Hemanth Saratchandran and Violetta Shevchenko and Alexander Long and Simon Lucey},\nyear={2024},\nurl={https://openreview.net/forum?id=0Lqyut1y7M}\n}", "github": "", "project": "", "reviewers": "REJk;8ETH;hcEo;9zZJ", "site": "https://openreview.net/forum?id=0Lqyut1y7M", "pdf_size": 40474055, "rating": "5;6;6;6", "confidence": "4;3;4;4", "soundness": "3;3;4;3", "contribution": "3;4;3;3", "presentation": "2;2;3;4", "wc_summary": "103;120;114;95", "wc_strengths": "76;41;77;98", "wc_weaknesses": "500;257;182;119", "wc_questions": "90;241;42;136", "wc_review": "769;659;415;448", "wc_reply_reviewers": "74;69;261;58", "wc_reply_authors": "1237;1244;967;936", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 108.0, 9.669539802906858 ], "wc_strengths_avg": [ 73.0, 20.457272545478784 ], "wc_weaknesses_avg": [ 264.5, 144.475776516342 ], "wc_questions_avg": [ 127.25, 73.60494208950918 ], "wc_review_avg": [ 572.75, 146.97002245356023 ], "wc_reply_reviewers_avg": [ 115.5, 84.20362224987711 ], "wc_reply_authors_avg": [ 1096.0, 144.9361928574088 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wcXPF73uX7EJ:scholar.google.com/&scioq=On+the+Optimality+of+Activations+in+Implicit+Neural+Representations&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Amazon;University of Adelaide", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.adelaide.edu.au", "aff_unique_abbr": "Amazon;Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Australia" }, { "id": "0NruoU6s5Z", "title": "CompoDiff: Versatile Composed Image Retrieval With Latent Diffusion", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper proposes a novel diffusion-based model, CompoDiff, for solving Composed Image Retrieval (CIR) with latent diffusion and presents a newly created dataset, named SynthTriplets18M, of 18 million reference images, conditions, and corresponding target image triplets to train the model. CompoDiff and SynthTriplets18M tackle the shortages of the previous CIR approaches, such as poor generalizability due to the small dataset scale and the limited types of conditions. CompoDiff not only achieves a new zero-shot state-of-the-art on four CIR benchmarks, including FashionIQ, CIRR, CIRCO, and GeneCIS, but also enables a more versatile and controllable CIR by accepting various conditions, such as negative text and image mask conditions, and the controllability to the importance between multiple queries or the trade-off between inference speed and the performance which are unavailable with existing CIR methods. The code and dataset samples are available at Supplementary Materials.", "keywords": "Composed Image Retrieval;Diffusion Models", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/170316c35b69a427d9cda3e6429b12e1c9cdd3a6.zip", "author": "Geonmo Gu;Sanghyuk Chun;Wonjae Kim;HeeJae Jun;Yoohoon Kang;Sangdoo Yun", "authorids": "~Geonmo_Gu1;~Sanghyuk_Chun1;~Wonjae_Kim1;~HeeJae_Jun1;~Yoohoon_Kang1;~Sangdoo_Yun1", "gender": "M;M;M;M;M;M", "homepage": ";https://sanghyukchun.github.io/home/;https://wonjae.kim;;;https://sangdooyun.github.io/", "dblp": "164/5694;213/1095.html;158/3433;238/0444;71/6597;124/3009.html", "google_scholar": "https://scholar.google.co.kr/citations?view_op=list_works;https://scholar.google.co.kr/citations?user=4_uj0xcAAAAJ;https://scholar.google.co.kr/citations?user=UpZ41EwAAAAJ;;;o0qtjzYAAAAJ", "orcid": "0009-0001-5071-0997;0000-0002-4533-2610;0000-0002-6616-7685;;;", "linkedin": "geonmo-gu-0008b013b/;https://kr.linkedin.com/in/sanghyukchun/en;;;;", "or_profile": "~Geonmo_Gu1;~Sanghyuk_Chun1;~Wonjae_Kim1;~HeeJae_Jun1;~Yoohoon_Kang1;~Sangdoo_Yun1", "aff": "NAVER;NAVER AI Lab;NAVER;NAVER;NAVER;NAVER", "aff_domain": "navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com", "position": "Researcher;Lead research scientist;Research Scientist;Researcher;Researcher;Research Scientist", "bibtex": "@misc{\ngu2024compodiff,\ntitle={CompoDiff: Versatile Composed Image Retrieval With Latent Diffusion},\nauthor={Geonmo Gu and Sanghyuk Chun and Wonjae Kim and HeeJae Jun and Yoohoon Kang and Sangdoo Yun},\nyear={2024},\nurl={https://openreview.net/forum?id=0NruoU6s5Z}\n}", "github": "", "project": "", "reviewers": "e76K;FhVu;AJF4;wkZo", "site": "https://openreview.net/forum?id=0NruoU6s5Z", "pdf_size": 6435297, "rating": "5;5;5;6", "confidence": "5;3;2;4", "soundness": "4;3;2;3", "contribution": "3;2;2;2", "presentation": "4;2;3;2", "wc_summary": "84;80;47;56", "wc_strengths": "82;81;109;50", "wc_weaknesses": "264;111;149;157", "wc_questions": "177;65;9;41", "wc_review": "607;337;314;304", "wc_reply_reviewers": "0;113;118;0", "wc_reply_authors": "1636;1550;1222;1145", "reply_reviewers": "0;1;1;0", "reply_authors": "4;4;4;4", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 66.75, 15.642490210960657 ], "wc_strengths_avg": [ 80.5, 20.886598574205422 ], "wc_weaknesses_avg": [ 170.25, 56.847933119859334 ], "wc_questions_avg": [ 73.0, 63.245553203367585 ], "wc_review_avg": [ 390.5, 125.56771081770982 ], "wc_reply_reviewers_avg": [ 57.75, 57.777049941996864 ], "wc_reply_authors_avg": [ 1388.25, 208.77784245460532 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.0, 0.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12308069164153977775&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "NAVER Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.naver.com", "aff_unique_abbr": "NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Making LLaMA SEE and Draw with SEED Tokenizer", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19618", "id": "0Nui91LBQS", "author_site": "Yuying Ge, Sijie Zhao, Ziyun Zeng, Yixiao Ge, Chen Li, Xintao Wang, Ying Shan", "tldr": "", "abstract": "The great success of Large Language Models (LLMs) has expanded the potential of multimodality, contributing to the gradual evolution of General Artificial Intelligence (AGI). A true AGI agent should not only possess the capability to perform predefined multi-tasks but also exhibit emergent abilities in an open-world context. However, despite the considerable advancements made by recent multimodal LLMs, they still fall short in effectively unifying comprehension and generation tasks, let alone open-world emergent abilities. We contend that the key to overcoming the present impasse lies in enabling text and images to be represented and processed interchangeably within a unified autoregressive Transformer. To this end, we introduce $\\textbf{SEED}$, an elaborate image tokenizer that empowers LLMs with the ability to $\\textbf{SEE}$ and $\\textbf{D}$raw at the same time. We identify two crucial design principles: (1) Image tokens should be independent of 2D physical patch positions and instead be produced with a $\\textit{1D causal dependency}$, exhibiting intrinsic interdependence that aligns with the left-to-right autoregressive prediction mechanism in LLMs. (2) Image tokens should capture $\\textit{high-level semantics}$ consistent with the degree of semantic abstraction in words, and be optimized for both discriminativeness and reconstruction during the tokenizer training phase. With SEED tokens, LLM is able to perform scalable multimodal autoregression under its original training recipe, i.e., next-word prediction. SEED-LLaMA is therefore produced by large-scale pretraining and instruction tuning on the interleaved textual and visual data, demonstrating impressive performance on a broad range of multimodal comprehension and generation tasks. More importantly, SEED-LLaMA has exhibited compositional emergent abilities such as multi-turn in-context multimodal generation, acting like your AI assistant. The code (training and inference) and models are released in https://github.com/AILab-CVC/SEED.", "keywords": "Multimodal Large Language Model;Discrete Visual Tokenizer", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yuying Ge;Sijie Zhao;Ziyun Zeng;Yixiao Ge;Chen Li;Xintao Wang;Ying Shan", "authorids": "~Yuying_Ge2;~Sijie_Zhao2;~Ziyun_Zeng1;~Yixiao_Ge2;~Chen_Li34;~Xintao_Wang1;~Ying_Shan2", "gender": "F;M;M;F;Not Specified;;M", "homepage": "https://geyuying.github.io/;https://sijeh.github.io/;https://stdkonjac.icu/;https://geyixiao.com/;https://scholar.google.com/citations?user=5fU_DtEAAAAJ&hl=en;;", "dblp": "223/4673;300/5422;282/8373.html;228/6649;164/3294-46.html;;68/5910", "google_scholar": "hv1LiiEAAAAJ;tZ3dS3MAAAAJ;RuanZLEAAAAJ;TtU74NAAAAAJ;5fU_DtEAAAAJ;;4oXBp9UAAAAJ", "orcid": ";;;;;;0000-0001-7673-8325", "linkedin": ";;;;;;YingShanProfile/", "or_profile": "~Yuying_Ge2;~Sijie_Zhao2;~Ziyun_Zeng1;~Yixiao_Ge2;~Chen_Li34;~Xintao_Wang1;~Ying_Shan2", "aff": "Tencent AI Lab;Tencent AI Lab;Tsinghua University;Tencent;Tencent ARC Lab;;Tencent PCG ARC Lab", "aff_domain": "tencent.com;tencent.com;tsinghua.edu.cn;tencent.com;tencent.com;;arc.tencent.com", "position": "Researcher;Researcher;MS student;Researcher;Researcher;;Director", "bibtex": "@inproceedings{\nge2024making,\ntitle={Making {LL}a{MA} {SEE} and Draw with {SEED} Tokenizer},\nauthor={Yuying Ge and Sijie Zhao and Ziyun Zeng and Yixiao Ge and Chen Li and Xintao Wang and Ying Shan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0Nui91LBQS}\n}", "github": "", "project": "", "reviewers": "y9Sh;i2L6;TFPg", "pdf_size": 25208002, "rating": "5;6;8", "confidence": "3;5;4", "soundness": "3;2;3", "contribution": "3;2;3", "presentation": "2;3;3", "wc_summary": "82;59;89", "wc_strengths": "27;28;115", "wc_weaknesses": "240;150;361", "wc_questions": "3;2;125", "wc_review": "352;239;690", "wc_reply_reviewers": "0;23;696", "wc_reply_authors": "1626;706;1495", "reply_reviewers": "0;1;2", "reply_authors": "4;1;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 76.66666666666667, 12.81492185782739 ], "wc_strengths_avg": [ 56.666666666666664, 41.24991582482994 ], "wc_weaknesses_avg": [ 250.33333333333334, 86.44972848746 ], "wc_questions_avg": [ 43.333333333333336, 57.74849685393455 ], "wc_review_avg": [ 427.0, 191.60549748550187 ], "wc_reply_reviewers_avg": [ 239.66666666666666, 322.8129833544838 ], "wc_reply_authors_avg": [ 1275.6666666666667, 406.34987661155037 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14781111967367961466&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=0Nui91LBQS", "pdf": "https://openreview.net/pdf?id=0Nui91LBQS", "email": "tencent.com;tencent.com;tsinghua.edu.cn;tencent.com;tencent.com;;arc.tencent.com", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Tencent;Tsinghua University", "aff_unique_dep": "Tencent AI Lab;", "aff_unique_url": "https://ai.tencent.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Tencent AI Lab;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0P12CqfvUU", "title": "3D Morphable Master Face Generation: Towards Controllable Wolf Attacks against 2D and 3D Face Recognition Systems", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Biometric authentication systems are facing increasing threats from Artificial Intelligence-Generated Content (AIGC). Previous research has revealed the vulnerability of face authentication systems against master face attacks. These attacks utilize generative models to create facial samples capable of matching multiple registered user templates in the database. In this paper, we present a systematic approach for generating master faces that can compromise both 2D and 3D face recognition systems. Notably, our approach is the first to enable morphable and controllable master face attacks on face authentication systems.\n\nOur method generates these 3D master faces using the Latent Variable Evolution (LVE) algorithm with the 3D Face Morphable Model (3DMM). Through comprehensive simulations of simultaneous master face attacks in both white-box, gray-box, and black-box scenarios, we demonstrate the significant threat posed by these 3D master faces to mainstream face authentication systems. Furthermore, we explore the realms of face morphing and facial reenactment in our generated samples, enhancing the efficacy of the master face attack. Compared to existing methods, our approach exhibits superior attack success rates and advanced flexibility, highlighting the importance of defending against master face attacks.", "keywords": "Master Attack;Wolf Attack;3D Morphable Face Model", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/13e766c2aa4be2f94bdd89e55e7c67c3acf0355a.zip", "author": "Siyun Liang;Huy H. Nguyen;Junichi Yamagishi;Isao Echizen", "authorids": "~Siyun_Liang1;~Huy_H._Nguyen1;~Junichi_Yamagishi1;~Isao_Echizen1", "gender": "F;;M;", "homepage": "https://siyun-liang.github.io/;;https://researchmap.jp/read0205283?lang=en;", "dblp": ";;87/3979;", "google_scholar": "ivuEYlUAAAAJ;;nRrdjtwAAAAJ;", "orcid": ";;;", "linkedin": "siyun-liang-328b53222/;;;", "or_profile": "~Siyun_Liang1;~Huy_H._Nguyen1;~Junichi_Yamagishi1;~Isao_Echizen1", "aff": "Technische Universit\u00e4t M\u00fcnchen;;National Institute of Informatics;", "aff_domain": "tum.de;;nii.ac.jp;", "position": "MS student;;Full Professor;", "bibtex": "@misc{\nliang2024d,\ntitle={3D Morphable Master Face Generation: Towards Controllable Wolf Attacks against 2D and 3D Face Recognition Systems},\nauthor={Siyun Liang and Huy H. Nguyen and Junichi Yamagishi and Isao Echizen},\nyear={2024},\nurl={https://openreview.net/forum?id=0P12CqfvUU}\n}", "github": "", "project": "", "reviewers": "Tr8G;ARdD;SgrS;FSLJ", "site": "https://openreview.net/forum?id=0P12CqfvUU", "pdf_size": 11460884, "rating": "3;3;3;5", "confidence": "5;3;4;4", "soundness": "3;2;2;3", "contribution": "2;2;2;2", "presentation": "2;1;3;3", "wc_summary": "37;136;85;33", "wc_strengths": "14;99;66;13", "wc_weaknesses": "248;132;64;33", "wc_questions": "5;83;188;3", "wc_review": "304;450;403;82", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1290;1109;1462;350", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 72.75, 41.85913878712748 ], "wc_strengths_avg": [ 48.0, 36.42114770294862 ], "wc_weaknesses_avg": [ 119.25, 82.5087116612543 ], "wc_questions_avg": [ 69.75, 75.50951926744071 ], "wc_review_avg": [ 309.75, 141.65870075643076 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1052.75, 424.49815959553933 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hBCi4MDQwAUJ:scholar.google.com/&scioq=3D+Morphable+Master+Face+Generation:+Towards+Controllable+Wolf+Attacks+against+2D+and+3D+Face+Recognition+Systems&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;National Institute of Informatics", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.nii.ac.jp/", "aff_unique_abbr": "TUM;NII", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;Japan" }, { "id": "0PeEOq5iK8", "title": "DST-Det: Simple Dynamic Self-Training for Open-Vocabulary Object Detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Open-vocabulary object detection (OVOD) aims to detect the objects \\textit{beyond} the set of categories observed during training. This work presents a simple yet effective strategy that leverages the zero-shot classification ability of pre-trained vision-language models (VLM), such as CLIP, to classify proposals for all possible novel classes directly. Unlike previous works that ignore novel classes during training and rely solely on the region proposal network (RPN) for novel object detection, our method selectively filters proposals based on specific design criteria. The resulting sets of identified proposals serve as pseudo-labels for novel classes during the training phase. It enables our self-training strategy to improve the recall and accuracy of novel classes in a self-training manner without requiring additional annotations or datasets. We further propose a simple offline pseudo-label generation strategy to refine the object detector. Empirical evaluations on three datasets, including LVIS, V3Det, and COCO, demonstrate significant improvements over the baseline performance without incurring additional parameters or computational costs during inference. In particular, compared with previous F-VLM, our method achieves a 1.7-2.0\\% improvement on LVIS dataset and 2.3-3.8\\% improvement on the recent challenging V3Det dataset. Our method also boosts the strong baseline by 6\\% mAP on COCO. The code and models will be publicly available.", "keywords": "open-vocabulary;object detection;self-training", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Shilin Xu;Xiangtai Li;Size Wu;Wenwei Zhang;Yining Li;Guangliang Cheng;Yunhai Tong;Kai Chen;Chen Change Loy", "authorids": "~Shilin_Xu1;~Xiangtai_Li1;~Size_Wu1;~Wenwei_Zhang1;~Yining_Li1;~Guangliang_Cheng2;~Yunhai_Tong1;~Kai_Chen4;~Chen_Change_Loy2", "gender": "M;;M;M;M;M;M;M;M", "homepage": ";;https://wusize.github.io/;https://zhangwenwei.cn;https://liyn.site;https://sites.google.com/view/guangliangcheng;http://www.cis.pku.edu.cn/faculty/system/tongyunhai/tongyunhai.htm;https://chenkai.site/;https://www.mmlab-ntu.com/person/ccloy/index.html", "dblp": "33/4756;;301/9535;;166/3420;;14/1705;181/2839-26;01/5855", "google_scholar": "8bBcL9sAAAAJ;;y2S02IcAAAAJ;QDXADSEAAAAJ;https://scholar.google.com.hk/citations?user=y_cp1sUAAAAJ;FToOC-wAAAAJ;T4gqdPkAAAAJ;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ", "orcid": ";;;0000-0002-2748-4514;;;;0000-0002-6820-2325;0000-0001-5345-1591", "linkedin": ";;%E6%80%9D%E6%B3%BD-%E5%90%B4-721544198/;wenweizhang-b9769a124/;;;;;", "or_profile": "~Shilin_Xu1;~Xiangtai_Li1;~Size_Wu1;~Wenwei_Zhang1;~Yining_Li1;~Guangliang_Cheng2;~Yunhai_Tong1;~Kai_Chen4;~Chen_Change_Loy2", "aff": "Peking University;;Nanyang Technological University;Shanghai AI Laboratory;Shanghai AI Laboratory;University of Liverpool;Peking University;Shanghai AI Laboratory;Nanyang Technological University", "aff_domain": "pku.edu.cn;;ntu.edu.sg;pjlab.org.cn;pjlab.org.cn;liverpool.ac.uk;pku.edu.cn;pjlab.org.cn;ntu.edu.sg", "position": "PhD student;;PhD student;Researcher;Researcher;Associate Professor;Full Professor;Researcher;Full Professor", "bibtex": "@misc{\nxu2024dstdet,\ntitle={{DST}-Det: Simple Dynamic Self-Training for Open-Vocabulary Object Detection},\nauthor={Shilin Xu and Xiangtai Li and Size Wu and Wenwei Zhang and Yining Li and Guangliang Cheng and Yunhai Tong and Kai Chen and Chen Change Loy},\nyear={2024},\nurl={https://openreview.net/forum?id=0PeEOq5iK8}\n}", "github": "", "project": "", "reviewers": "ht3G;Fu38;Sap3;ULPC;jftV", "site": "https://openreview.net/forum?id=0PeEOq5iK8", "pdf_size": 11566249, "rating": "3;3;3;5;5", "confidence": "4;5;4;3;5", "soundness": "2;3;3;2;3", "contribution": "1;1;2;2;3", "presentation": "3;3;2;3;3", "wc_summary": "49;114;45;76;49", "wc_strengths": "38;19;24;37;84", "wc_weaknesses": "24;147;110;157;147", "wc_questions": "66;46;23;12;217", "wc_review": "177;326;202;282;497", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 3.8, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 1.8, 0.7483314773547883 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 66.6, 26.157981573508305 ], "wc_strengths_avg": [ 40.4, 23.000869548780106 ], "wc_weaknesses_avg": [ 117.0, 49.189429758841484 ], "wc_questions_avg": [ 72.8, 74.47791618996868 ], "wc_review_avg": [ 296.8, 113.57006647880418 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.21821789023599233, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6953300397849811721&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;2;3;0;2;1", "aff_unique_norm": "Peking University;Nanyang Technological University;Shanghai AI Laboratory;University of Liverpool", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.ntu.edu.sg;https://www.shanghai-ai-lab.com;https://www.liverpool.ac.uk", "aff_unique_abbr": "Peking U;NTU;SAIL;Liv Uni", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2;0;0;1", "aff_country_unique": "China;Singapore;United Kingdom" }, { "id": "0Pu0H7y3gg", "title": "Understanding the Initial Condensation of Convolutional Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Previous research has shown that fully-connected neural networks with small initialization and gradient-based training methods exhibit a phenomenon known as condensation during training. This phenomenon refers to the input weights of hidden neurons condensing into isolated orientations during training, revealing an implicit bias towards simple solutions in the parameter space. However, the impact of neural network structure on condensation remains unknown. In this study, we study convolutional neural networks (CNNs) as the starting point to explore the distinctions in the condensation behavior compared to fully-connected neural networks. Theoretically, we firstly demonstrate that under gradient descent (GD) and the small initialization scheme, the convolutional kernels of a two-layer CNN condense towards a specific direction determined by the training samples within a given time period. Subsequently, we conduct a series of systematic experiments to substantiate our theory and confirm condensation in more general settings. These findings contribute to a preliminary understanding of the non-linear training behavior exhibited by CNNs.", "keywords": "training dynamics;convolutional neural networks;initialization;gradient-based training methods", "primary_area": "learning theory", "supplementary_material": "/attachment/610c4b13cbb73c3247427afd9d40ea264f618317.zip", "author": "Zhangchen Zhou;Hanxu Zhou;Yuqing Li;Zhi-Qin John Xu", "authorids": "~Zhangchen_Zhou1;~Hanxu_Zhou1;~Yuqing_Li3;~Zhi-Qin_John_Xu1", "gender": "M;;M;M", "homepage": ";;https://math.sjtu.edu.cn/Default/faculty/pages/MDAwMDAwMDAwMLKFbqE;https://ins.sjtu.edu.cn/people/xuzhiqin/", "dblp": ";;;223/4493.html", "google_scholar": "qNf_HecAAAAJ;https://scholar.google.com.hk/citations?user=ypD3aL8AAAAJ;https://scholar.google.com.hk/citations?hl=en;EjLvG5cAAAAJ", "orcid": ";;;0000-0002-0122-0879", "linkedin": "https://www.linkedin.cn/incareer/in/%E7%AB%A0%E8%BE%B0-%E5%91%A8-002a0a275;;;", "or_profile": "~Zhangchen_Zhou1;~Hanxu_Zhou1;~Yuqing_Li3;~Zhiqin_Xu1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@misc{\nzhou2024understanding,\ntitle={Understanding the Initial Condensation of Convolutional Neural Networks},\nauthor={Zhangchen Zhou and Hanxu Zhou and Yuqing Li and Zhi-Qin John Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=0Pu0H7y3gg}\n}", "github": "", "project": "", "reviewers": "mFgA;FvGf;P4L3", "site": "https://openreview.net/forum?id=0Pu0H7y3gg", "pdf_size": 0, "rating": "3;3;6", "confidence": "2;3;3", "soundness": "2;3;3", "contribution": "2;1;3", "presentation": "1;1;3", "wc_summary": "67;74;18", "wc_strengths": "20;42;68", "wc_weaknesses": "100;431;36", "wc_questions": "63;241;73", "wc_review": "250;788;195", "wc_reply_reviewers": "133;128;0", "wc_reply_authors": "835;2133;184", "reply_reviewers": "1;1;0", "reply_authors": "2;5;1", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_summary_avg": [ 53.0, 24.91318258807306 ], "wc_strengths_avg": [ 43.333333333333336, 19.61858529274955 ], "wc_weaknesses_avg": [ 189.0, 173.10305215872614 ], "wc_questions_avg": [ 125.66666666666667, 81.65510122188053 ], "wc_review_avg": [ 411.0, 267.52320771601603 ], "wc_reply_reviewers_avg": [ 87.0, 61.55214591439251 ], "wc_reply_authors_avg": [ 1050.6666666666667, 810.1581464271172 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14159339468738951420&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "0Q1mBvUgmt", "title": "VIPER: Vibrant Period Representation for Robust and Efficient Time Series Forecasting", "track": "main", "status": "Reject", "tldr": "", "abstract": "In a data-driven world teeming with vast volumes of time series data, forecasting models play a pivotal role. The real-world time series data often exhibits intricate periodic patterns and trends, posing challenges for accurate modeling. Existing methods, reliant on fixed parameters and sampling techniques, may struggle to capture these complexities effectively. This paper designs a Vibrant Period Representation Enrichment (VIPER) framework, which effectively and dynamically harnesses the inherent multi-periodic nature of time series data. The VIPER framework adeptly separates the input sequence into trend and seasonal components. A Temporal Aggregation Block is specifically deployed for processing the seasonal component, applying innovative multi-period transformations compounded with global self-attention mechanism. This configuration enables a comprehensive capture of both short-term and long-term period information, culminating in a vibrant period representation true to the essence of the temporal dynamics. Remarkably, experimental results from eight different time series forecasting datasets substantiate the superior performance, simplicity, and computational efficiency of VIPER compared with the state-of-the-arts.", "keywords": "long-term forecasting;deep learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/d49b4335ba136b915c9c1481eb598f4c0179bca3.zip", "author": "Guoqi Yu;Muxuan Li;Xiaowei Hu;Angelica I Aviles-Rivero;Shujun Wang", "authorids": "~Guoqi_Yu1;~Muxuan_Li1;~Xiaowei_Hu3;~Angelica_I_Aviles-Rivero1;~Shujun_Wang1", "gender": "M;M;M;F;F", "homepage": "https://github.com/Levi-Ackman;https://cv.transmux.top;https://xw-hu.github.io/;https://angelicaiaviles.wordpress.com/;https://emma-sjwang.github.io/", "dblp": ";;151/5859-1;138/9507;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;tUb4J0kAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=WFELH6IAAAAJ", "orcid": ";;0000-0002-5708-7018;;0000-0003-1495-3278", "linkedin": ";;;;", "or_profile": "~Guoqi_Yu1;~Muxuan_Li1;~Xiaowei_Hu3;~Angelica_I_Aviles-Rivero1;~Shujun_Wang1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Shanghai Artificial Intelligence Laboratory;University of Cambridge;Hong Kong Polytechnic University", "aff_domain": "uestc.edu.cn;uestc.edu.cn;pjlab.org.cn;cam.ac.uk;polyu.edu.hk", "position": "Undergrad student;Undergrad student;Researcher;Senior Research Associate;Assistant Professor", "bibtex": "@misc{\nyu2024viper,\ntitle={{VIPER}: Vibrant Period Representation for Robust and Efficient Time Series Forecasting},\nauthor={Guoqi Yu and Muxuan Li and Xiaowei Hu and Angelica I Aviles-Rivero and Shujun Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=0Q1mBvUgmt}\n}", "github": "", "project": "", "reviewers": "PNtL;juBb;NQNh", "site": "https://openreview.net/forum?id=0Q1mBvUgmt", "pdf_size": 1973289, "rating": "3;3;3", "confidence": "5;4;4", "soundness": "2;2;2", "contribution": "2;2;2", "presentation": "2;2;4", "wc_summary": "22;68;129", "wc_strengths": "18;30;45", "wc_weaknesses": "136;66;155", "wc_questions": "4;123;25", "wc_review": "180;287;354", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 73.0, 43.825411197918804 ], "wc_strengths_avg": [ 31.0, 11.045361017187261 ], "wc_weaknesses_avg": [ 119.0, 38.27096375408734 ], "wc_questions_avg": [ 50.666666666666664, 51.86092513208336 ], "wc_review_avg": [ 273.6666666666667, 71.6581390275305 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IESzvRhCyg8J:scholar.google.com/&scioq=VIPER:+Vibrant+Period+Representation+for+Robust+and+Efficient+Time+Series+Forecasting&hl=en&as_sdt=0,21", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "University of Electronic Science and Technology of China;Shanghai Artificial Intelligence Laboratory;University of Cambridge;Hong Kong Polytechnic University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uestc.edu.cn;http://www.shailab.org/;https://www.cam.ac.uk;https://www.polyu.edu.hk", "aff_unique_abbr": "UESTC;Shanghai AI Lab;Cambridge;PolyU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Inner Classifier-Free Guidance and Its Taylor Expansion for Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19617", "id": "0QAzIMq32X", "author_site": "Shikun Sun, Longhui Wei, Zhicai Wang, Zixuan Wang, Junliang Xing, Jia Jia, Qi Tian", "tldr": "", "abstract": "Classifier-free guidance (CFG) is a pivotal technique for balancing the diversity and fidelity of samples in conditional diffusion models. This approach involves utilizing a single model to jointly optimize the conditional score predictor and unconditional score predictor, eliminating the need for additional classifiers. It delivers impressive results and can be employed for continuous and discrete condition representations. However, when the condition is continuous, it prompts the question of whether the trade-off can be further enhanced. Our proposed inner classifier-free guidance (ICFG) provides an alternative perspective on the CFG method when the condition has a specific structure, demonstrating that CFG represents a first-order case of ICFG. Additionally, we offer a second-order implementation, highlighting that even without altering the training policy, our second-order approach can introduce new valuable information and achieve an improved balance between fidelity and diversity for Stable Diffusion.", "keywords": "diffusion models;classifier-free guidance", "primary_area": "generative models", "supplementary_material": "/attachment/4f440ab4467b49465a78bf5306a0243866f9c038.zip", "author": "Shikun Sun;Longhui Wei;Zhicai Wang;Zixuan Wang;Junliang Xing;Jia Jia;Qi Tian", "authorids": "~Shikun_Sun1;~Longhui_Wei1;~Zhicai_Wang1;~Zixuan_Wang3;~Junliang_Xing1;~Jia_Jia1;~Qi_Tian3", "gender": "M;M;M;M;M;F;M", "homepage": "https://skipper17.github.io;https://joinwei-pku.github.io/longhuiwei.github.io/;;http://hcsi.cs.tsinghua.edu.cn;http://people.ucas.ac.cn/~jlxing?language=en;https://hcsi.cs.tsinghua.edu.cn/;https://www.qitian1987.com/index.html", "dblp": "293/2733.html;206/6179;250/1975;05/10698;43/7659.html;71/2992-1.html;78/1467-1.html", "google_scholar": "C1YFRxAAAAAJ;thhnAhIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;4JKwZRsAAAAJ;jSwNd3MAAAAJ;RYhh3FsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-7291-6198;0000-0001-6801-0510;;0000-0002-7252-5047", "linkedin": ";;;https://www.linkedin.cn/injobs/in/\u5b50\u8f69-\u738b-b214ba228;https://www.linkedin.cn/incareer/in/ACoAAAvlU14B40ZWH1pxg5JJDtQ6LlgMYkp0e5s;;", "or_profile": "~Shikun_Sun1;~Longhui_Wei1;~Zhicai_Wang1;~Zixuan_Wang3;~Junliang_Xing1;~Jia_Jia1;~Qi_Tian3", "aff": "Tsinghua University;Huawei Technologies Ltd.;University of Science and Technology of China;Tsinghua University;Tsinghua University;Tsinghua University;Huawei Technologies Ltd.", "aff_domain": "tsinghua.edu.cn;huawei.com;ustc.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;huawei.com", "position": "PhD student;Researcher;PhD student;PhD student;Full Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nsun2024inner,\ntitle={Inner Classifier-Free Guidance and Its Taylor Expansion for Diffusion Models},\nauthor={Shikun Sun and Longhui Wei and Zhicai Wang and Zixuan Wang and Junliang Xing and Jia Jia and Qi Tian},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0QAzIMq32X}\n}", "github": "", "project": "", "reviewers": "UdAr;cPdv;jYKn;LfUU", "pdf_size": 1821720, "rating": "5;5;6;8", "confidence": "4;3;4;3", "soundness": "1;3;2;4", "contribution": "2;2;2;3", "presentation": "2;2;2;4", "wc_summary": "59;59;66;34", "wc_strengths": "27;50;22;63", "wc_weaknesses": "267;165;102;226", "wc_questions": "8;40;71;11", "wc_review": "361;314;261;334", "wc_reply_reviewers": "213;338;25;354", "wc_reply_authors": "1798;2100;730;1419", "reply_reviewers": "3;2;1;2", "reply_authors": "6;6;3;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 54.5, 12.175795661885921 ], "wc_strengths_avg": [ 40.5, 16.740669042783207 ], "wc_weaknesses_avg": [ 190.0, 62.437969217456136 ], "wc_questions_avg": [ 32.5, 25.5 ], "wc_review_avg": [ 317.5, 36.63673020344474 ], "wc_reply_reviewers_avg": [ 232.5, 131.65200340291065 ], "wc_reply_authors_avg": [ 1511.75, 511.7892022112229 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.5, 1.5 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3080187669946390848&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=0QAzIMq32X", "pdf": "https://openreview.net/pdf?id=0QAzIMq32X", "email": "tsinghua.edu.cn;huawei.com;ustc.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;huawei.com", "author_num": 7, "aff_unique_index": "0;1;2;0;0;0;1", "aff_unique_norm": "Tsinghua University;Huawei;University of Science and Technology of China", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.huawei.com;http://www.ustc.edu.cn", "aff_unique_abbr": "THU;Huawei;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0Qyxw0cCuu", "title": "CONTROL: A Contrastive Learning Framework for Open World Semi-Supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, open-world semi-supervised Learning has received tremendous attention. This is largely due to the fact that unlabeled real-world data often encompasses unseen classes -- those that are not represented in labeled datasets. Such classes can adversely affect the performance of traditional semi-supervised learning methods. The open-world semi-supervised learning algorithms are designed to enable models to distinguish between both seen and unseen classes. However, existing algorithms still suffer from the problem of insufficient classification of unseen classes and may face the risk of representation collapse. In order to better address the aforementioned issues, we propose a contrastive learning framework called CONTROL that integrates three optimization objectives: nearest neighbor contrastive learning, supervised contrastive learning, and unsupervised contrastive learning. The significance of the framework is explained by theoretically proving the optimization of contrastive learning at the feature level benefits unseen classification, and the uniformity mechanism in contrastive learning further helps to prevent representation collapse. Serving as a unified and efficient framework, CONTROL is compatible with a broad range of existing open-world semi-supervised learning algorithms. Through empirical studies, we highlight the superiority of CONTROL over prevailing state-of-the-art open-world semi-supervised learning algorithms. Remarkably, our method achieves significant improvement in both unseen class classification and all class classification over previous methods on both CIFAR and ImageNet datasets.", "keywords": "Contrastive Learning; Semi-Supervised Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Jingyi Cui;Yi-Ge Zhang;Yisen Wang", "authorids": "~Jingyi_Cui1;~Yi-Ge_Zhang1;~Yisen_Wang1", "gender": "F;M;M", "homepage": "https://zero-lab-pku.github.io/personwise/cuijingyi/;https://www.lamda.nju.edu.cn/zhangyg/;https://yisenwang.github.io/", "dblp": "216/3282;346/0977;172/1346-1", "google_scholar": ";;uMWPDboAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jingyi_Cui1;~Yi-Ge_Zhang1;~Yisen_Wang1", "aff": "Peking University;Nanjing University;Peking University", "aff_domain": "pku.edu.cn;nju.edu.cn;pku.edu.cn", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@misc{\ncui2024control,\ntitle={{CONTROL}: A Contrastive Learning Framework for Open World Semi-Supervised Learning},\nauthor={Jingyi Cui and Yi-Ge Zhang and Yisen Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=0Qyxw0cCuu}\n}", "github": "", "project": "", "reviewers": "mngU;fiFv;1Kmz;1XMG", "site": "https://openreview.net/forum?id=0Qyxw0cCuu", "pdf_size": 958055, "rating": "5;5;5;5", "confidence": "4;4;4;2", "soundness": "1;1;3;2", "contribution": "1;1;3;3", "presentation": "1;2;2;2", "wc_summary": "30;67;121;53", "wc_strengths": "20;20;124;33", "wc_weaknesses": "544;172;343;109", "wc_questions": "34;16;117;97", "wc_review": "628;275;705;292", "wc_reply_reviewers": "120;70;0;0", "wc_reply_authors": "832;553;286;430", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 2.0, 1.0 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.75, 33.46173187388842 ], "wc_strengths_avg": [ 49.25, 43.48203652084387 ], "wc_weaknesses_avg": [ 292.0, 168.8149874863011 ], "wc_questions_avg": [ 66.0, 42.089191011469914 ], "wc_review_avg": [ 475.0, 193.51873294335098 ], "wc_reply_reviewers_avg": [ 47.5, 50.68283733178323 ], "wc_reply_authors_avg": [ 525.25, 200.73536683903015 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7liTmUjPLd8J:scholar.google.com/&scioq=CONTROL:+A+Contrastive+Learning+Framework+for+Open+World+Semi-Supervised+Learning&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.nju.edu.cn", "aff_unique_abbr": "Peking U;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "0S0CgZEYxR", "title": "Examining the Achilles' Heel of CLIP Models: The Worst-Performing Categories", "track": "main", "status": "Reject", "tldr": "", "abstract": "Contrastive Language-Image Pre-training (CLIP) provides a foundation model by integrating natural language into visual concepts. Although previous studies have demonstrated that satisfactory overall accuracy can be achieved across numerous downstream tasks through well-designed textual prompts, this evaluation mechanism inevitably overlooks certain categories because the impact of some underperforming categories on overall performance remains limited, even if they are highly important. For example, on ImageNet, there are a total of 10 categories with class-wise accuracy as low as 0\\%, which is significantly inferior to the overall performance of 64.1\\%. This phenomenon reveals the potential risks of using CLIP models, especially in risk-sensitive applications. To address this issue, we investigate the alignment between the two modalities in the CLIP model and propose the Class-wise Matching Margin (\\cmm) to measure the inference confusion. \\cmm\\ can effectively identify the worst-performing categories and estimate the potential performance of the candidate prompts. We further query large language models to enrich descriptions of worst-performing categories and build a weighted ensemble to highlight the efficient prompts. Experimental results clearly verify the effectiveness of our proposal, where the accuracy on the worst-10 categories on ImageNet is boosted to 5.2\\%, without manual prompt engineering, laborious optimization, or access to labeled validation data.", "keywords": "vision-language models;worst-class performance;CLIP;prompt ensemble;zero-shot recognition", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/3c17115070f3450c754a665a6f0a558b9b714aee.zip", "author": "Jie-Jing Shao;Jiang-Xin Shi;Xiao-Wen Yang;Lan-Zhe Guo;Yu-Feng Li", "authorids": "~Jie-Jing_Shao1;~Jiang-Xin_Shi1;~Xiao-Wen_Yang4;~Lan-Zhe_Guo2;~Yu-Feng_Li1", "gender": "M;;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/shaojj/;http://www.lamda.nju.edu.cn/shijx;http://www.lamda.nju.edu.cn/guolz;https://www.lamda.nju.edu.cn/yangxw;https://cs.nju.edu.cn/liyf/index.htm", "dblp": "299/4982;299/5485.html;216/4845;165/2864.html;57/413", "google_scholar": "k1tEDpQAAAAJ;KEgtGncAAAAJ;dpunvqgAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-8107-114X;0000-0002-0318-0911;;0009-0007-4206-6242;0000-0002-2220-5248", "linkedin": ";;;;", "or_profile": "~Jie-Jing_Shao1;~Jiang-Xin_Shi1;~Lan-Zhe_Guo2;~Xiao-wen_Yang3;~Yu-feng_Li2", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;PhD student;Assistant Professor;PhD student;Assistant Professor", "bibtex": "@misc{\nshao2024examining,\ntitle={Examining the Achilles' Heel of {CLIP} Models: The Worst-Performing Categories},\nauthor={Jie-Jing Shao and Jiang-Xin Shi and Xiao-Wen Yang and Lan-Zhe Guo and Yu-Feng Li},\nyear={2024},\nurl={https://openreview.net/forum?id=0S0CgZEYxR}\n}", "github": "", "project": "", "reviewers": "7ik6;PArF;RDxr", "site": "https://openreview.net/forum?id=0S0CgZEYxR", "pdf_size": 8617562, "rating": "3;5;5", "confidence": "5;4;4", "soundness": "1;3;2", "contribution": "1;3;2", "presentation": "2;3;3", "wc_summary": "85;45;103", "wc_strengths": "10;26;29", "wc_weaknesses": "184;120;63", "wc_questions": "3;24;19", "wc_review": "282;215;214", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 77.66666666666667, 24.239545283597124 ], "wc_strengths_avg": [ 21.666666666666668, 8.339997335464536 ], "wc_weaknesses_avg": [ 122.33333333333333, 49.42558941367203 ], "wc_questions_avg": [ 15.333333333333334, 8.9566858950296 ], "wc_review_avg": [ 237.0, 31.822423959633664 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dBXEmwtk1joJ:scholar.google.com/&scioq=Examining+the+Achilles%27+Heel+of+CLIP+Models:+The+Worst-Performing+Categories&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0SOhDO7xI0", "title": "DeepDRK: Deep Dependency Regularized Knockoff for Feature Selection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Model-X knockoff, among various feature selection methods, received much attention recently due to its guarantee on false discovery rate (FDR) control. Subsequent to its introduction in parametric design, knockoff is advanced to handle arbitrary data distributions using deep learning-based generative modeling. However, we observed that current implementations of the deep Model-X knockoff framework exhibit limitations. Notably, the \"swap property\" that knockoffs necessitate frequently encounter challenges on sample level, leading to a diminished selection power. To overcome, we develop \"Deep Dependency Regularized Knockoff (DeepDRK)\", a distribution-free deep learning method that strikes a balance between FDR and power. In DeepDRK, a generative model grounded in a transformer architecture is introduced to better achieve the \"swap property\". Novel efficient regularization techniques are also proposed to reach higher power. Our model outperforms other benchmarks in synthetic, semi-synthetic, and real-world data, especially when sample size is small and data distribution is complex.", "keywords": "Feature Selection;Deep Learning;Model-X Knockoff;FDR Control", "primary_area": "generative models", "supplementary_material": "", "author": "Hongyu Shen;Yici Yan;Zhizhen Zhao", "authorids": "~Hongyu_Shen1;~Yici_Yan1;~Zhizhen_Zhao1", "gender": "M;M;", "homepage": ";;", "dblp": "35/7565;;122/3155", "google_scholar": "CVCKr-EAAAAJ;;", "orcid": ";;", "linkedin": "hongyu-shen-61943267/;yici-yan-418b36117;", "or_profile": "~Hongyu_Shen1;~Yici_Yan1;~Zhizhen_Zhao1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Washington", "aff_domain": "illinois.edu;illinois.edu;uw.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nshen2024deepdrk,\ntitle={Deep{DRK}: Deep Dependency Regularized Knockoff for Feature Selection},\nauthor={Hongyu Shen and Yici Yan and Zhizhen Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=0SOhDO7xI0}\n}", "github": "", "project": "", "reviewers": "CLhf;Zneh;cU77;e4Vy", "site": "https://openreview.net/forum?id=0SOhDO7xI0", "pdf_size": 847574, "rating": "5;6;6;6", "confidence": "2;2;3;2", "soundness": "3;3;3;3", "contribution": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "70;78;34;76", "wc_strengths": "49;28;41;69", "wc_weaknesses": "194;148;150;9", "wc_questions": "34;19;35;27", "wc_review": "347;273;260;181", "wc_reply_reviewers": "0;0;0;23", "wc_reply_authors": "1202;1084;830;30", "reply_reviewers": "0;0;0;1", "reply_authors": "3;3;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.5, 17.853571071357123 ], "wc_strengths_avg": [ 46.75, 14.872373717735847 ], "wc_weaknesses_avg": [ 125.25, 69.589420891397 ], "wc_questions_avg": [ 28.75, 6.417748826496718 ], "wc_review_avg": [ 265.25, 58.88282856656939 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 786.5, 456.98222066071673 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5293816254708543531&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.washington.edu", "aff_unique_abbr": "UIUC;UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0SSiZ5vYO8", "title": "Multi-Prompt Denoised Self-Training for Open-Vocabulary Model Adaptation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Traditional model adaptation assumes the same vocabulary across source and target domains, which often struggles with limited transfer flexibility and efficiency while handling target domains with different vocabularies. Inspired by recent vision-language models (VLMs) that enable open-vocabulary visual recognition by reasoning on both images and texts, we study open-vocabulary model adaptation (OVMA), a new unsupervised model adaptation framework that positions a pre-trained VLM as the source model and transfers it towards arbitrary unlabelled target domains. To this end, we design a Multi-prompt denOised Self-Training (MOST) technique that exploits the synergy between vision and language to mitigate the domain discrepancies in image and text distributions simultaneously. Specifically, MOST makes use of the complementary property of multiple prompts within and across vision and language modalities, which enables joint exploitation of vision and language information and effective learning of image-text correspondences in the unlabelled target domains. Additionally, MOST captures temporal information via multi-temporal prompt learning which helps memorize previously learnt target information. Extensive experiments show that MOST outperforms the state-of-the-art consistently across 11 image recognition tasks. Codes will be released", "keywords": "vision language models;model adaptation;transductive transfer learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Jiaxing Huang;Jingyi Zhang;Han Qiu;Sheng Jin;Lewei Lu;Shijian Lu", "authorids": "~Jiaxing_Huang2;~Jingyi_Zhang7;~Han_Qiu2;~Sheng_Jin3;~Lewei_Lu1;~Shijian_Lu1", "gender": "M;M;M;M;M;F", "homepage": "https://jxhuang0508.github.io/;https://qhqk.github.io/hanqiu.github.io/;;;https://personal.ntu.edu.sg/shijian.lu/;", "dblp": "62/6016-1.html;;70/6780-2;247/6438;42/2718;15/91-5", "google_scholar": "czirNcwAAAAJ;YThp3g8AAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=zdgKJXIAAAAJ;https://scholar.google.com.sg/scholar?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-7254-1664;;;", "linkedin": ";;;lewei-lu-94015977/;;jingyi-zhang-6510871b0/", "or_profile": "~Jiaxing_Huang2;~Han_Qiu2;~Sheng_Jin3;~Lewei_Lu1;~Shijian_Lu1;~JINGYI_ZHANG4", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;SenseTime;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;sensetime.com;ntu.edu.sg;ntu.edu.sg", "position": "Postdoc;PhD student;Postdoc;Researcher;Associate Professor;PhD student", "bibtex": "@misc{\nhuang2024multiprompt,\ntitle={Multi-Prompt Denoised Self-Training for Open-Vocabulary Model Adaptation},\nauthor={Jiaxing Huang and Jingyi Zhang and Han Qiu and Sheng Jin and Lewei Lu and Shijian Lu},\nyear={2024},\nurl={https://openreview.net/forum?id=0SSiZ5vYO8}\n}", "github": "", "project": "", "reviewers": "Zzqn;5Vkc;vh13;yo5V", "site": "https://openreview.net/forum?id=0SSiZ5vYO8", "pdf_size": 6765376, "rating": "5;5;5;6", "confidence": "4;5;5;3", "soundness": "3;3;2;3", "contribution": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "61;81;265;65", "wc_strengths": "33;92;23;79", "wc_weaknesses": "131;317;16;267", "wc_questions": "53;98;2;61", "wc_review": "278;588;306;472", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 118.0, 85.19976525789258 ], "wc_strengths_avg": [ 56.75, 29.328953271468794 ], "wc_weaknesses_avg": [ 182.75, 117.90329723972947 ], "wc_questions_avg": [ 53.5, 34.23813663153998 ], "wc_review_avg": [ 411.0, 126.25767303415662 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-BhvtsjAY5QJ:scholar.google.com/&scioq=Multi-Prompt+Denoised+Self-Training+for+Open-Vocabulary+Model+Adaptation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Nanyang Technological University;SenseTime", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.sensetime.com", "aff_unique_abbr": "NTU;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "Singapore;China" }, { "id": "0SgPbbyrWh", "title": "Optimal spherical codes for locality-sensitive hashing", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In the realm of Locality-Sensitive Hashing (LSH), striking the right balance between computational efficiency and accuracy has been a persistent challenge. Most existing unsupervised methods rely on dense representations, which can lead to inefficiencies. To tackle this, we advocate for the adoption of sparse representations and introduce the use of quasi-Optimal Spherical Codes (OSCs) to minimise space distortion. OSCs strive to maximise the minimum angle between any pair of points on the hypersphere, ensuring that the relative angular information between data points is preserved in the representation, which is particularly valuable in tasks involving cosine similarity. We employ Adam-based optimisation to obtain these codes and use them to partition the space into a $k^\\text{th}$-order Voronoi diagram. This approach consistently outperforms existing methods across four datasets on $K$-nearest neighbors search with cosine similarity, while capping the query time for a given embedding size.", "keywords": "Optimal spherical codes;locality sensitive hashing;similarity search;sparse coding", "primary_area": "metric learning, kernel learning, and sparse coding", "supplementary_material": "/attachment/cac03604aac528266d8d2b827d896c6871ceaf33.pdf", "author": "Rana Alkhoury Maroun;Henry Gouk;Yihe Lu;Barbara Webb", "authorids": "~Rana_Alkhoury_Maroun1;~Henry_Gouk1;~Yihe_Lu1;~Barbara_Webb1", "gender": ";M;M;", "homepage": ";https://www.henrygouk.com;;", "dblp": ";172/0943;181/4875-1;", "google_scholar": ";https://scholar.google.co.nz/citations?user=i1bzlyAAAAAJ;22zfD-gAAAAJ;https://scholar.google.com/scholar?hl=en", "orcid": " 0000-0003-4454-5320;;0000-0002-1615-4915;", "linkedin": ";;yihe-lu-5ba52856/?originalSubdomain=uk;", "or_profile": "~Rana_Alkhoury_Maroun1;~Henry_Gouk1;~Yihe_Lu1;~Barbara_Webb1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh;University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk", "position": "PhD student;RAEng Research Fellow;Postdoc;Full Professor", "bibtex": "@misc{\nmaroun2024optimal,\ntitle={Optimal spherical codes for locality-sensitive hashing},\nauthor={Rana Alkhoury Maroun and Henry Gouk and Yihe Lu and Barbara Webb},\nyear={2024},\nurl={https://openreview.net/forum?id=0SgPbbyrWh}\n}", "github": "", "project": "", "reviewers": "RRUE;KUjb;88TD;d26L", "site": "https://openreview.net/forum?id=0SgPbbyrWh", "pdf_size": 3883, "rating": "1;3;3;3", "confidence": "5;4;4;3", "soundness": "1;2;3;2", "contribution": "1;3;2;2", "presentation": "2;1;1;2", "wc_summary": "39;55;93;80", "wc_strengths": "16;17;91;23", "wc_weaknesses": "497;13;327;74", "wc_questions": "14;26;93;37", "wc_review": "566;111;604;214", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 66.75, 21.05201890555868 ], "wc_strengths_avg": [ 36.75, 31.43544973433655 ], "wc_weaknesses_avg": [ 227.75, 195.00176281254485 ], "wc_questions_avg": [ 42.5, 30.26962173533062 ], "wc_review_avg": [ 373.75, 214.78637643016376 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NE_wzqmTiMwJ:scholar.google.com/&scioq=Optimal+spherical+codes+for+locality-sensitive+hashing&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "0TZs6WOs16", "title": "Hyperbolic Embeddings in Sequential Self-Attention for Improved Next-Item Recommendations", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, self-attentive sequential learning models have surpassed conventional collaborative filtering techniques in next-item recommendation tasks. However, Euclidean geometry utilized in these models may not be optimal for capturing a complex structure of the behavioral data. Building on recent advances in the application of hyperbolic geometry to collaborative filtering tasks, we propose a novel approach that leverages hyperbolic geometry in the sequential learning setting. Our approach involves transitioning the learned parameters to a Poincar\\'e ball, which enables a linear predictor in a non-linear space. Our experimental results demonstrate that under certain conditions hyperbolic models may simultaneously improve recommendation quality and gain representational capacity. We identify several determining factors that affect the results, which include the ability of a loss function to preserve hyperbolic structure and the general compatibility of data with hyperbolic geometry. For the latter, we propose an empirical approach based on Gromov delta-hyperbolicity estimation that allows categorizing datasets as either compatible or not.", "keywords": "recommender systems;sequential self-attention;hyperbolic geometry;Gromov product", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Evgeny Frolov;Lina Bashaeva;Leyla Mirvakhabova;Ivan Oseledets", "authorids": "~Evgeny_Frolov1;~Lina_Bashaeva1;~Leyla_Mirvakhabova2;~Ivan_Oseledets1", "gender": "M;F;;M", "homepage": ";;;http://oseledets.github.io", "dblp": ";;;56/7175", "google_scholar": "https://scholar.google.ru/citations?user=l6cMdUEAAAAJ;;;https://scholar.google.ru/citations?user=5kMqBQEAAAAJ", "orcid": "0000-0003-3679-5311;;;", "linkedin": "evgenyfrolov/;lina-bashaeva/;;", "or_profile": "~Evgeny_Frolov1;~Lina_Bashaeva1;~Leyla_Mirvakhabova2;~Ivan_Oseledets1", "aff": "Skolkovo Institute of Science and Technology;;;Institute of Numerical Mathematics", "aff_domain": "skoltech.ru;;;inm.ras.ru", "position": "Researcher;;;Researcher", "bibtex": "@misc{\nfrolov2024hyperbolic,\ntitle={Hyperbolic Embeddings in Sequential Self-Attention for Improved Next-Item Recommendations},\nauthor={Evgeny Frolov and Lina Bashaeva and Leyla Mirvakhabova and Ivan Oseledets},\nyear={2024},\nurl={https://openreview.net/forum?id=0TZs6WOs16}\n}", "github": "", "project": "", "reviewers": "WdM2;9PVb;vqza;hnY1", "site": "https://openreview.net/forum?id=0TZs6WOs16", "pdf_size": 1089586, "rating": "3;3;3;5", "confidence": "5;4;5;3", "soundness": "3;2;2;3", "contribution": "2;1;2;3", "presentation": "3;2;2;3", "wc_summary": "36;56;48;107", "wc_strengths": "26;68;65;48", "wc_weaknesses": "166;254;308;63", "wc_questions": "5;4;7;6", "wc_review": "233;382;428;224", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1682;1808;2035;844", "reply_reviewers": "0;0;0;0", "reply_authors": "3;3;4;2", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 61.75, 27.07743525520835 ], "wc_strengths_avg": [ 51.75, 16.708904811506947 ], "wc_weaknesses_avg": [ 197.75, 92.85035002626539 ], "wc_questions_avg": [ 5.5, 1.118033988749895 ], "wc_review_avg": [ 316.75, 89.79246906060663 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1592.25, 450.14129726120444 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pyxtiaLGQ6AJ:scholar.google.com/&scioq=Hyperbolic+Embeddings+in+Sequential+Self-Attention+for+Improved+Next-Item+Recommendations&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Institute of Numerical Mathematics", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;", "aff_unique_abbr": "Skoltech;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Russian Federation;" }, { "id": "0V311Uh8q1", "title": "Algorithmic Stability Unleashed: Generalization Bounds with Unbounded Losses", "track": "main", "status": "Reject", "tldr": "", "abstract": "One of the central problems of statistical learning theory is quantifying the generalization ability of learning algorithms within a probabilistic framework. Algorithmic stability is a powerful tool for deriving generalization bounds, however, it typically builds on a critical assumption that losses are bounded. In this paper, we relax this condition to unbounded loss functions with subweibull diameter. This gives new generalization bound for algorithmic stability and also includes existing results of subgaussian and subexponential diameters as specific cases. Our main probabilistic result is a general concentration inequality for subweibull random variables, which may be of independent interest.", "keywords": "algorithmic stability; generalization bound", "primary_area": "learning theory", "supplementary_material": "/attachment/49261c5dd8086a8c2eb6339570401c6dfe49287b.pdf", "author": "Shaojie Li;Bowei Zhu;Yong Liu", "authorids": "~Shaojie_Li2;~Bowei_Zhu1;~Yong_Liu7", "gender": "M;;M", "homepage": ";;https://iie-liuyong.github.io", "dblp": ";304/1543;29/4867-18", "google_scholar": ";;vVhmzbAAAAAJ", "orcid": ";;0000-0002-6739-621X", "linkedin": ";;", "or_profile": "~Shaojie_Li2;~Bowei_Zhu1;~Yong_Liu7", "aff": "Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nli2024algorithmic,\ntitle={Algorithmic Stability Unleashed: Generalization Bounds with Unbounded Losses},\nauthor={Shaojie Li and Bowei Zhu and Yong Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=0V311Uh8q1}\n}", "github": "", "project": "", "reviewers": "K21p;wVug;wNWA;LoHq", "site": "https://openreview.net/forum?id=0V311Uh8q1", "pdf_size": 247036, "rating": "3;5;5;6", "confidence": "3;4;3;5", "soundness": "2;2;3;3", "contribution": "2;2;2;2", "presentation": "1;3;2;4", "wc_summary": "84;85;161;373", "wc_strengths": "35;38;35;13", "wc_weaknesses": "181;178;44;9", "wc_questions": "67;153;302;14", "wc_review": "367;454;542;409", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1030;1431;985;678", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 175.75, 118.08762636279891 ], "wc_strengths_avg": [ 30.25, 10.034316120194738 ], "wc_weaknesses_avg": [ 103.0, 77.50161288644256 ], "wc_questions_avg": [ 134.0, 108.94264546081116 ], "wc_review_avg": [ 443.0, 64.91147818375421 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1031.0, 267.7340097933021 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3002706169684833497&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Q-Bench: A Benchmark for General-Purpose Foundation Models on Low-level Vision", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19616", "id": "0V5TVt9bk0", "author_site": "Haoning Wu, Zicheng Zhang, Erli Zhang, Chaofeng Chen, Liang Liao, Annan Wang, Chunyi Li, Wenxiu Sun, Qiong Yan, Guangtao Zhai, Weisi Lin", "tldr": "", "abstract": "The rapid evolution of Multi-modality Large Language Models (MLLMs) has catalyzed a shift in computer vision from specialized models to general-purpose foundation models. Nevertheless, there is still an inadequacy in assessing the abilities of MLLMs on **low-level visual perception and understanding**. To address this gap, we present **Q-Bench**, a holistic benchmark crafted to systematically evaluate potential abilities of MLLMs on three realms: low-level visual perception, low-level visual description, and overall visual quality assessment. **_a)_** To evaluate the low-level **_perception_** ability, we construct the **LLVisionQA** dataset, consisting of 2,990 diverse-sourced images, each equipped with a human-asked question focusing on its low-level attributes. We then measure the correctness of MLLMs on answering these questions. **_b)_** To examine the **_description_** ability of MLLMs on low-level information, we propose the **LLDescribe** dataset consisting of long expert-labelled *golden* low-level text descriptions on 499 images, and a GPT-involved comparison pipeline between outputs of MLLMs and the *golden* descriptions. **_c)_** Besides these two tasks, we further measure their visual quality **_assessment_** ability to align with human opinion scores. Specifically, we design a softmax-based strategy that enables MLLMs to predict *quantifiable* quality scores, and evaluate them on various existing image quality assessment (IQA) datasets. Our evaluation across the three abilities confirms that MLLMs possess preliminary low-level visual skills. However, these skills are still unstable and relatively imprecise, indicating the need for specific enhancements on MLLMs towards these abilities. We hope that our benchmark can encourage the research community to delve deeper to discover and enhance these untapped potentials of MLLMs.", "keywords": "Benchmark;Vision-Language;Large Language Models;Low-level Vision;Image Quality Assessment", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/f30a94400274185c561b708ad935be64375bbe64.pdf", "author": "Haoning Wu;Zicheng Zhang;Erli Zhang;Chaofeng Chen;Liang Liao;Annan Wang;Chunyi Li;Wenxiu Sun;Qiong Yan;Guangtao Zhai;Weisi Lin", "authorids": "~Haoning_Wu1;~Zicheng_Zhang7;~Erli_Zhang1;~Chaofeng_Chen1;~Liang_Liao3;~Annan_Wang1;~Chunyi_Li1;~Wenxiu_Sun1;~Qiong_Yan1;~Guangtao_Zhai1;~Weisi_Lin1", "gender": "M;M;M;M;M;M;F;;M;M;M", "homepage": "https://teowu.github.io;;https://chaofengc.github.io/;https://liaoliang92.github.io/homepage/;;https://lcysyzxdxc.github.io;http://wenxiusun.com/;;https://faculty.sjtu.edu.cn/zhaiguangtao/en/index.htm;http://www.ntu.edu.sg/home/wslin/;", "dblp": "264/5802-1;32/749;198/2537;;;192/6758;16/9879;122/4814;19/3230;14/3737.html;", "google_scholar": "https://scholar.google.com.hk/citations?user=wth-VbMAAAAJ;gfjYZKMAAAAJ;lxiqnI0AAAAJ;kqTUHSIAAAAJ;;https://scholar.google.com/citations?hl=en;X9lE6O4AAAAJ;uT9CtPYAAAAJ;E6zbSYgAAAAJ;https://scholar.google.com.tw/citations?user=D_S41X4AAAAJ;QICTEckAAAAJ", "orcid": "0000-0001-8642-8101;;0000-0001-6137-5162;0000-0002-2238-2420;0009-0004-2998-9817;;;;;;", "linkedin": ";zhang-erli/;;;annan-wang-1026241a4;;;;;;", "or_profile": "~Haoning_Wu1;~Erli_Zhang1;~Chaofeng_Chen1;~Liang_Liao3;~Annan_Wang1;~Chunyi_Li1;~Wenxiu_Sun1;~Qiong_Yan1;~Guangtao_Zhai1;~Weisi_Lin1;~zicheng_zhang6", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Shanghai Artificial Intelligence Laboratory;SenseTime Group Limited;SenseTime Research;Shanghai Jiaotong University;Nanyang Technological University;Shanghai Jiaotong University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;pjlab.org.cn;sensetime.com;sensetime.com;sjtu.edu.cn;ntu.edu.sg;sjtu.edu.cn", "position": "PhD student;Undergrad student;Postdoc;Postdoc;Researcher;Intern;Principal Researcher;Research Director;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwu2024qbench,\ntitle={Q-Bench: A Benchmark for General-Purpose Foundation Models on Low-level Vision},\nauthor={Haoning Wu and Zicheng Zhang and Erli Zhang and Chaofeng Chen and Liang Liao and Annan Wang and Chunyi Li and Wenxiu Sun and Qiong Yan and Guangtao Zhai and Weisi Lin},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0V5TVt9bk0}\n}", "github": "", "project": "", "reviewers": "gz7s;rhgp;1bay", "pdf_size": 5992170, "rating": "6;8;8", "confidence": "5;4;5", "soundness": "3;3;3", "contribution": "3;3;3", "presentation": "3;3;3", "wc_summary": "43;91;100", "wc_strengths": "67;45;44", "wc_weaknesses": "75;27;114", "wc_questions": "4;23;2", "wc_review": "189;186;260", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1389;717;3118", "reply_reviewers": "0;0;0", "reply_authors": "4;1;5", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.0, 25.019992006393608 ], "wc_strengths_avg": [ 52.0, 10.614455552060438 ], "wc_weaknesses_avg": [ 72.0, 35.58089374931439 ], "wc_questions_avg": [ 9.666666666666666, 9.46337971105226 ], "wc_review_avg": [ 211.66666666666666, 34.19876540981495 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1741.3333333333333, 1011.3701377614209 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.3333333333333335, 1.699673171197595 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 151, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11683847823892452807&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=0V5TVt9bk0", "pdf": "https://openreview.net/pdf?id=0V5TVt9bk0", "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;pjlab.org.cn;sensetime.com;sensetime.com;sjtu.edu.cn;ntu.edu.sg;sjtu.edu.cn", "author_num": 11, "aff_unique_index": "0;0;0;0;0;1;2;3;4;0;4", "aff_unique_norm": "Nanyang Technological University;Shanghai Artificial Intelligence Laboratory;SenseTime Group Limited;SenseTime;Shanghai Jiao Tong University", "aff_unique_dep": ";;;SenseTime Research;", "aff_unique_url": "https://www.ntu.edu.sg;http://www.shailab.org/;https://www.sensetime.com;https://www.sensetime.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "NTU;Shanghai AI Lab;SenseTime;SenseTime;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;1;1;1;0;1", "aff_country_unique": "Singapore;China" }, { "title": "MOFDiff: Coarse-grained Diffusion for Metal-Organic Framework Design", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19615", "id": "0VBsoluxR2", "author_site": "Xiang Fu, Tian Xie, Andrew Rosen, Tommi Jaakkola, Jake Smith", "tldr": "", "abstract": "Metal-organic frameworks (MOFs) are of immense interest in applications such as gas storage and carbon capture due to their exceptional porosity and tunable chemistry. Their modular nature has enabled the use of template-based methods to generate hypothetical MOFs by combining molecular building blocks in accordance with known network topologies. However, the ability of these methods to identify top-performing MOFs is often hindered by the limited diversity of the resulting chemical space. In this work, we propose MOFDiff: a coarse-grained (CG) diffusion model that generates CG MOF structures through a denoising diffusion process over the coordinates and identities of the building blocks. The all-atom MOF structure is then determined through a novel assembly algorithm. As the diffusion model generates 3D MOF structures by predicting scores in E(3), we employ equivariant graph neural networks that respect the permutational and roto-translational symmetries. We comprehensively evaluate our model's capability to generate valid and novel MOF structures and its effectiveness in designing outstanding MOF materials for carbon capture applications with molecular simulations.", "keywords": "Materials design;diffusion model;metal-organic framework;carbon capture;generative model;AI for Science", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/a45d7028d2af95b6f15c66409ef120981a054831.zip", "author": "Xiang Fu;Tian Xie;Andrew Scott Rosen;Tommi S. Jaakkola;Jake Allen Smith", "authorids": "~Xiang_Fu4;~Tian_Xie2;~Andrew_Scott_Rosen1;~Tommi_S._Jaakkola1;~Jake_Allen_Smith1", "gender": "M;M;M;;M", "homepage": "https://xiangfu.co/;http://www.txie.me;https://rosen.cbe.princeton.edu/;;", "dblp": "97/374-5.html;;;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;xFbOAf8AAAAJ;lHBjgLsAAAAJ;;lSQbg6kAAAAJ", "orcid": ";;0000-0002-0141-7006;;", "linkedin": ";txie-93/;andrew-s-rosen/;;jakeallensmith", "or_profile": "~Xiang_Fu4;~Tian_Xie2;~Andrew_Scott_Rosen1;~Tommi_S._Jaakkola1;~Jake_Allen_Smith1", "aff": "Massachusetts Institute of Technology;Microsoft Research AI for Science;University of California, Berkeley;;Microsoft", "aff_domain": "mit.edu;microsoft.com;berkeley.edu;;microsoft.com", "position": "PhD student;Senior Researcher;Postdoc;;Researcher", "bibtex": "@inproceedings{\nfu2024mofdiff,\ntitle={{MOFD}iff: Coarse-grained Diffusion for Metal-Organic Framework Design},\nauthor={Xiang Fu and Tian Xie and Andrew Scott Rosen and Tommi S. Jaakkola and Jake Allen Smith},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0VBsoluxR2}\n}", "github": "", "project": "", "reviewers": "1BzN;yqBG;nQJ6;e39L", "pdf_size": 18740242, "rating": "8;8;8;8", "confidence": "4;3;3;4", "soundness": "3;3;2;3", "contribution": "3;3;2;2", "presentation": "3;3;3;4", "wc_summary": "65;184;159;182", "wc_strengths": "65;61;84;75", "wc_weaknesses": "75;91;251;105", "wc_questions": "149;73;68;211", "wc_review": "354;409;562;573", "wc_reply_reviewers": "0;12;229;43", "wc_reply_authors": "791;626;1125;1319", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;3;4", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 147.5, 48.63383595810637 ], "wc_strengths_avg": [ 71.25, 8.954747344286158 ], "wc_weaknesses_avg": [ 130.5, 70.37577708274347 ], "wc_questions_avg": [ 125.25, 59.00158896165424 ], "wc_review_avg": [ 474.5, 95.090746132313 ], "wc_reply_reviewers_avg": [ 71.0, 92.56079083499665 ], "wc_reply_authors_avg": [ 965.25, 272.081214897317 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12303585458809639061&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=0VBsoluxR2", "pdf": "https://openreview.net/pdf?id=0VBsoluxR2", "email": "mit.edu;microsoft.com;berkeley.edu;;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft;University of California, Berkeley", "aff_unique_dep": ";AI for Science;", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com/en-us/research/group/ai-for-science;https://www.berkeley.edu", "aff_unique_abbr": "MIT;Microsoft Research;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "0VKEJKKLvr", "title": "A GRAPH-BASED REPRESENTATION LEARNING APPROACH FOR BREAST CANCER RISK PREDICTION USING GENOTYPE DATA", "track": "main", "status": "Reject", "tldr": "", "abstract": "Breast cancer risk prediction using genotype data is a critical task in personalized medicine. However, the high dimensionality and potential redundancy of genetic features pose challenges for accurate risk prediction. We present a graph-based representation learning pipeline for breast cancer risk prediction. Our method addresses the issue of feature redundancy by developing an ensemble-based feature selection approach. We evaluated the performance of the graph-based approach in a breast cancer risk prediction task using a dataset of 644,585 genetic variants from Biobank of Eastern Finland, consisting of 168 cases and 1558 controls and compared it with the classical machine learning models. Using 200 top-ranked genetic variants selected by the ensemble approach, the graph convolutional network\n(GCN) achieved area under the ROC curve (AUC) of 0.986 \u00b1 0.001 in discriminating cases and controls, which is better than an XGBoost model with AUC of 0.955 \u00b1 0.0034", "keywords": "Graph representation;Deep learning;Single nucleotide polymorphism;Breast cancer", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Naga Raju Gudhe;veli-matti Kosma;Hamid Behravan;Arto Mannermaa", "authorids": "~Naga_Raju_Gudhe1;~veli-matti_Kosma1;~Hamid_Behravan1;~Arto_Mannermaa1", "gender": "M;M;M;Not Specified", "homepage": ";https://www.uef.fi;;https://www.uef.fi/fi/", "dblp": ";;;", "google_scholar": "ijCl-MMAAAAJ;;JARPOGcAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Naga_Raju_Gudhe1;~veli-matti_Kosma1;~Hamid_Behravan1;~Arto_Mannermaa1", "aff": "University of Eastern Finland ;uef;;", "aff_domain": "uef.fi;uef.fi;;", "position": "Researcher;Full Professor;;", "bibtex": "@misc{\ngudhe2024a,\ntitle={A {GRAPH}-{BASED} {REPRESENTATION} {LEARNING} {APPROACH} {FOR} {BREAST} {CANCER} {RISK} {PREDICTION} {USING} {GENOTYPE} {DATA}},\nauthor={Naga Raju Gudhe and veli-matti Kosma and Hamid Behravan and Arto Mannermaa},\nyear={2024},\nurl={https://openreview.net/forum?id=0VKEJKKLvr}\n}", "github": "", "project": "", "reviewers": "fYgW;6KR2;etdw;VT6g", "site": "https://openreview.net/forum?id=0VKEJKKLvr", "pdf_size": 1157317, "rating": "3;3;3;3", "confidence": "4;5;5;4", "soundness": "1;2;1;2", "contribution": "2;2;1;1", "presentation": "2;3;2;2", "wc_summary": "54;173;92;73", "wc_strengths": "44;7;37;29", "wc_weaknesses": "280;351;123;144", "wc_questions": "7;61;97;161", "wc_review": "385;592;349;407", "wc_reply_reviewers": "0;19;0;0", "wc_reply_authors": "1784;955;751;916", "reply_reviewers": "0;1;0;0", "reply_authors": "5;5;2;5", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 1.5, 0.5 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 45.33762234612662 ], "wc_strengths_avg": [ 29.25, 13.899190623917638 ], "wc_weaknesses_avg": [ 224.5, 94.69028461251978 ], "wc_questions_avg": [ 81.5, 55.97097462078001 ], "wc_review_avg": [ 433.25, 93.96375631061159 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 1101.5, 401.41281743362407 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4674983461646892121&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Eastern Finland", "aff_unique_dep": "", "aff_unique_url": "https://www.uef.fi", "aff_unique_abbr": "UEF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Finland" }, { "id": "0VZP2Dr9KX", "title": "Baseline Defenses for Adversarial Attacks Against Aligned Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "As large language models (LLMs) quickly become ubiquitous, it becomes critical to understand their security vulnerabilities.\nRecent work shows that text optimizers can produce jailbreaking prompts that bypass moderation and alignment. \nDrawing from the rich body of work on adversarial machine learning, we approach these attacks with three questions: \nWhat threat models are practically useful in this domain? How do baseline defense techniques perform in this new domain? How does LLM security differ from computer vision?\nWe evaluate several baseline defense strategies against leading adversarial attacks on LLMs, discussing the various settings in which each is feasible and effective. \nIn particular, we look at three types of defenses: detection (perplexity based), input preprocessing (paraphrase and retokenization), and adversarial training. \nWe discuss white-box and gray-box settings and discuss the robustness-performance trade-off for each of the defenses considered. \nWe find that the weakness of existing discrete optimizers for text, combined with the relatively high costs of optimization, makes standard adaptive attacks more challenging for LLMs. Future research will be needed to uncover whether more powerful optimizers can be developed, or whether the strength of filtering and preprocessing defenses is greater in the LLMs domain than it has been in computer vision.", "keywords": "baseline defenses;attacks", "primary_area": "generative models", "supplementary_material": "/attachment/842cb8f7c9593337cbb293b35d290955a1feadff.zip", "author": "Neel Jain;Avi Schwarzschild;Yuxin Wen;Gowthami Somepalli;John Kirchenbauer;Ping-yeh Chiang;Micah Goldblum;Aniruddha Saha;Jonas Geiping;Tom Goldstein", "authorids": "~Neel_Jain1;~Avi_Schwarzschild1;~Yuxin_Wen2;~Gowthami_Somepalli1;~John_Kirchenbauer1;~Ping-yeh_Chiang1;~Micah_Goldblum1;~Aniruddha_Saha1;~Jonas_Geiping1;~Tom_Goldstein1", "gender": ";M;;F;M;;;M;M;M", "homepage": ";https://cs.umd.edu/~avi1;https://yuxinwenrick.github.io/;https://somepago.github.io/;https://jwkirchenbauer.notion.site/;;;https://ani0075saha.github.io/;https://jonasgeiping.github.io/;https://www.cs.umd.edu/~tomg/", "dblp": ";249/9334.html;;286/5012;321/0678;236/4288;241/7231;221/8102;190/7229;25/8184", "google_scholar": "https://scholar.google.com/citations?hl=en;WNvQ7AcAAAAJ;oUYfjg0AAAAJ;T2ezBDsAAAAJ;48GJrbsAAAAJ;WUoMq1IAAAAJ;pGDKzuUAAAAJ;xfjALj0AAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;;;;;;;", "linkedin": "neel-jain-0a6a239/;;;;johnkirchenbauer/;;;;;", "or_profile": "~Neel_Jain1;~Avi_Schwarzschild1;~Yuxin_Wen2;~Gowthami_Somepalli1;~John_Kirchenbauer1;~Ping-yeh_Chiang1;~Micah_Goldblum1;~Aniruddha_Saha1;~Jonas_Geiping1;~Tom_Goldstein1", "aff": "University of Maryland, College Park;Carnegie Mellon University;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;New York University;University of Maryland, College Park;Max Planck Institute for Intelligent Systems, Max-Planck Institute;University of Maryland, College Park", "aff_domain": "umd.edu;cmu.edu;umd.edu;umd.edu;umd.edu;umd.edu;nyu.edu;umd.edu;tuebingen.mpg.de;umd.edu", "position": "PhD student;Postdoc;PhD student;PhD student;PhD student;PhD student;Postdoc;Postdoc;Principal Researcher;Full Professor", "bibtex": "@misc{\njain2024baseline,\ntitle={Baseline Defenses for Adversarial Attacks Against Aligned Language Models},\nauthor={Neel Jain and Avi Schwarzschild and Yuxin Wen and Gowthami Somepalli and John Kirchenbauer and Ping-yeh Chiang and Micah Goldblum and Aniruddha Saha and Jonas Geiping and Tom Goldstein},\nyear={2024},\nurl={https://openreview.net/forum?id=0VZP2Dr9KX}\n}", "github": "", "project": "", "reviewers": "9hm2;UELn;hCXb;Ywox", "site": "https://openreview.net/forum?id=0VZP2Dr9KX", "pdf_size": 544029, "rating": "3;5;5;8", "confidence": "4;4;4;3", "soundness": "2;3;3;4", "contribution": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "32;64;59;93", "wc_strengths": "11;56;40;50", "wc_weaknesses": "1088;124;439;81", "wc_questions": "83;14;39;4", "wc_review": "1214;258;577;228", "wc_reply_reviewers": "928;47;0;0", "wc_reply_authors": "1569;379;290;427", "reply_reviewers": "2;1;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.0, 21.644860821913362 ], "wc_strengths_avg": [ 39.25, 17.282577932704367 ], "wc_weaknesses_avg": [ 433.0, 402.6307241132996 ], "wc_questions_avg": [ 35.0, 30.504098085339287 ], "wc_review_avg": [ 569.25, 396.57620642191836 ], "wc_reply_reviewers_avg": [ 243.75, 395.5176197086547 ], "wc_reply_authors_avg": [ 666.25, 523.5156993825495 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.8892972917998875, "gs_citation": 111, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14062897331071035133&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;0;0;2;0;3;0", "aff_unique_norm": "University of Maryland;Carnegie Mellon University;New York University;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";;;Intelligent Systems", "aff_unique_url": "https://www/umd.edu;https://www.cmu.edu;https://www.nyu.edu;https://www.mpi-is.mpg.de", "aff_unique_abbr": "UMD;CMU;NYU;MPI-IS", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;0", "aff_country_unique": "United States;Germany" }, { "id": "0XVLzHp6Fd", "title": "Towards Well-distributed Generative Networks Using Adversarial Autoencoders", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In addition to perceptual quality, the usefulness of a generative model depends on how closely the generated distribution matches the training distribution. Previous efforts in adversarial generative models have focused on reducing \"mode collapse\", but this term, roughly meaning being unable to generate certain parts of the data distribution, is not clearly defined. In addition, being able to generate every image in the data distribution does not imply reproducing the correct distribution, which additionally requires that each image occur at the same frequency in the generated images as in the training data. Due to the lack of a precise definition and measurement, it has been difficult to evaluate the success of these efforts in producing the correct distribution. In this work we proposes an autoencoder-based adversarial training framework, which ensures that the density of the encoder's aggregate output distribution closely matches the prior latent distribution, which in turn ensures that the distribution of images generated from randomly sampled latent code will closely match the training data. To evaluate our method, we introduce the 3DShapeHD dataset, which has a moderate complexity that goes beyond simplistic toy datasets, but also a exactly known generating process and distribution of features, which enables precise measurements. Using the reduced chi-square statistic, we show significant improvement in the accuracy of the distribution of generated samples. The results also demonstrate that the enhanced diversity of our model improves the ability to generate uncommon features in real-world datasets.", "keywords": "Generative Networks;Adversarial Autoencoders", "primary_area": "generative models", "supplementary_material": "/attachment/eacc6662cae5bb13d789ff08640d7368b3ac5710.zip", "author": "Sitao Xiang;Pengda Xiang;Yajie Zhao", "authorids": "~Sitao_Xiang1;~Pengda_Xiang1;~Yajie_Zhao1", "gender": "M;M;F", "homepage": ";;https://www.yajie-zhao.com/", "dblp": "199/1894;262/3792;54/7467", "google_scholar": "vPMqq2AAAAAJ;;", "orcid": "0000-0001-9296-6889;;", "linkedin": ";pengda-xiang-20a81410a/;", "or_profile": "~Sitao_Xiang1;~Pengda_Xiang1;~Yajie_Zhao1", "aff": "University of Southern California;University of Southern California;USC Institute for Creative Technologies, University of Southern California", "aff_domain": "usc.edu;usc.edu;ict.usc.edu", "position": "PhD student;PhD student;Director", "bibtex": "@misc{\nxiang2024towards,\ntitle={Towards Well-distributed Generative Networks Using Adversarial Autoencoders},\nauthor={Sitao Xiang and Pengda Xiang and Yajie Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=0XVLzHp6Fd}\n}", "github": "", "project": "", "reviewers": "EFy9;p6Dj;e5hk;Jjz9", "site": "https://openreview.net/forum?id=0XVLzHp6Fd", "pdf_size": 539437, "rating": "3;3;3;3", "confidence": "4;4;4;4", "soundness": "2;2;2;2", "contribution": "1;2;2;1", "presentation": "2;1;2;2", "wc_summary": "102;28;79;74", "wc_strengths": "81;17;72;13", "wc_weaknesses": "324;81;527;44", "wc_questions": "124;7;14;460", "wc_review": "631;133;692;591", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.75, 26.845623479442605 ], "wc_strengths_avg": [ 45.75, 30.946526460977815 ], "wc_weaknesses_avg": [ 244.0, 195.61313861803865 ], "wc_questions_avg": [ 151.25, 184.19741447696816 ], "wc_review_avg": [ 511.75, 221.60931275557894 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8PQCC3f1At0J:scholar.google.com/&scioq=Towards+Well-distributed+Generative+Networks+Using+Adversarial+Autoencoders&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0Y26tFG3WF", "title": "Inducing Precision in Lagrangian Neural Networks : Proof of concept application on Chaotic systems", "track": "main", "status": "Reject", "tldr": "", "abstract": "Solutions of dynamic systems that exhibit chaotic behavior are particularly sensitive to errors in initial/intermediate state estimates when long term dynamics is of interest. Lagrangian Neural Networks (LNN) are a class of physics induced learning methods that seamlessly integrate physical conservation laws into functional solutions, by forming a parametric Lagrangian for the system of interest. However it has been seen that the function approximation error associated with the parametric Lagrangian modelling could prove to be catastrophic for the prediction of long term dynamics of chaotic systems. This makes improving the precision of the parametric Lagrangian particularly crucial. Considering the same in this work a modified Lagrangian Neural Network approach is proposed, where a customized neural network architecture is designed to directly emphasize the relative importance of each significant bit in the Lagrangian estimates produced. We evaluate our method on two dynamic systems that are well known in the literature in exhibiting deterministic chaos, namely the double pendulum and Henon-Helies systems. Further, we compare the obtained solutions with those estimated by Finite Element solvers (under optimal conditions) to validate the relative accuracy. We observe that the trajectory deviations as a result of chaotic behavior can be significantly reduced by the process of explicitly enforcing the precision requirement for the parametric Lagrangian, as modelled using the proposed approach.", "keywords": "Physics Informed Learning;Deep Learning;Neural Networks;Chaotic systems.", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "", "author": "Hrithwik Shalu;Bharath Govindarajan", "authorids": "~Hrithwik_Shalu1;~Bharath_Govindarajan1", "gender": "M;", "homepage": ";https://mgbharath.com", "dblp": "271/7953;", "google_scholar": ";", "orcid": "0000-0002-8944-434X;0009-0000-0430-9775", "linkedin": ";", "or_profile": "~Hrithwik_Shalu1;~Bharath_Govindarajan1", "aff": ";Indian Institute of Technology Madras", "aff_domain": ";iitm.ac.in", "position": ";Assistant Professor", "bibtex": "@misc{\nshalu2024inducing,\ntitle={Inducing Precision in Lagrangian Neural Networks : Proof of concept application on Chaotic systems},\nauthor={Hrithwik Shalu and Bharath Govindarajan},\nyear={2024},\nurl={https://openreview.net/forum?id=0Y26tFG3WF}\n}", "github": "", "project": "", "reviewers": "2inG;HA5A;88d6", "site": "https://openreview.net/forum?id=0Y26tFG3WF", "pdf_size": 1890347, "rating": "3;3;5", "confidence": "4;3;3", "soundness": "2;3;3", "contribution": "1;1;2", "presentation": "2;2;3", "wc_summary": "107;61;53", "wc_strengths": "151;20;40", "wc_weaknesses": "363;105;101", "wc_questions": "76;2;182", "wc_review": "697;188;376", "wc_reply_reviewers": "214;112;1509", "wc_reply_authors": "495;624;2332", "reply_reviewers": "2;1;11", "reply_authors": "3;2;12", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 73.66666666666667, 23.79542439676633 ], "wc_strengths_avg": [ 70.33333333333333, 57.62136949276911 ], "wc_weaknesses_avg": [ 189.66666666666666, 122.5760534887989 ], "wc_questions_avg": [ 86.66666666666667, 73.87075800943761 ], "wc_review_avg": [ 420.3333333333333, 210.14968210513402 ], "wc_reply_reviewers_avg": [ 611.6666666666666, 635.8754245570083 ], "wc_reply_authors_avg": [ 1150.3333333333333, 837.2225245151707 ], "reply_reviewers_avg": [ 4.666666666666667, 4.496912521077347 ], "reply_authors_avg": [ 5.666666666666667, 4.496912521077347 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:o33sgZ2O4TAJ:scholar.google.com/&scioq=Inducing+Precision+in+Lagrangian+Neural+Networks+:+Proof+of+concept+application+on+Chaotic+systems&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Indian Institute of Technology Madras", "aff_unique_dep": "", "aff_unique_url": "https://www.iitm.ac.in", "aff_unique_abbr": "IIT Madras", "aff_campus_unique_index": "0", "aff_campus_unique": "Madras", "aff_country_unique_index": "0", "aff_country_unique": "India" }, { "id": "0Z6lN4GYrO", "title": "S4G: Breaking the Bottleneck on Graphs with Structured State Spaces", "track": "main", "status": "Reject", "tldr": "", "abstract": "The majority of GNNs are based on message-passing mechanisms, however, message-passing neural networks (MPNN) have inherent limitations in capturing long-range interactions. The exponentially growing node information is compressed into fixed-size representations through multiple rounds of message passing, bringing the over-squashing problem, which severely hinders the flow of information on the graph and creates a bottleneck in graph learning. The natural idea of introducing global attention to point-to-point communication, as adopted in graph Transformers (GT), lacks inductive biases on graph structures and relies on complex positional encodings to enhance their performance in practical tasks. In this paper, we observe that the sensitivity between nodes in MPNN decreases exponentially with the shortest path distance. Contrarily, GT has a constant sensitivity, which leads to its loss of inductive bias. To address these issues, we introduce structured state spaces to capture the hierarchical structure of rooted-trees, achieving linear sensitivity with theoretical guarantees. We further propose a novel graph convolution based on the state-space model, resulting in a new paradigm that retains both the strong inductive biases from MPNN and the long-range modeling capabilities from GT. Extensive experimental results on long-range and general graph benchmarks demonstrate the superiority of our approach.", "keywords": "GNN;over-squashing;state-space models", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Yunchong Song;Siyuan Huang;Jiacheng Cai;Xinbing Wang;Chenghu Zhou;Zhouhan Lin", "authorids": "~Yunchong_Song1;~Siyuan_Huang8;~Jiacheng_Cai1;~Xinbing_Wang1;~Chenghu_Zhou3;~Zhouhan_Lin1", "gender": "M;M;;M;M;M", "homepage": "https://github.com/realCrush;https://github.com/SiyuanHuangSJTU;https://github.com/imjccai;http://www.cs.sjtu.edu.cn/~wang-xb/;http://www.igsnrr.cas.cn/gkjj/ysfc/ysfc_zhouchenghu/;https://hantek.github.io", "dblp": "339/6816;62/885-3;;96/1149.html;85/1324.html;121/7919.html", "google_scholar": "C-TqDNsAAAAJ;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com.tw/citations?user=CT5yZbwAAAAJ;;https://scholar.google.ca/citations?user=LNZ4efwAAAAJ", "orcid": ";;0009-0004-1304-5651;0000-0002-0357-8356;;0009-0009-7204-0689", "linkedin": ";siyuan-huang-885863235/;;;;https://ca.linkedin.com/in/zhouhan-lin-34b98975", "or_profile": "~Yunchong_Song1;~Siyuan_Huang8;~Jiacheng_Cai1;~Xinbing_Wang1;~Chenghu_Zhou3;~Zhouhan_Lin1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;IGSNRR, Chinese Academy of Sciences, Beijing, China;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;cs.sjtu.edu.cn;lreis.ac.cn;sjtu.edu.cn", "position": "PhD student;MS student;Undergrad student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nsong2024sg,\ntitle={S4G: Breaking the Bottleneck on Graphs with Structured State Spaces},\nauthor={Yunchong Song and Siyuan Huang and Jiacheng Cai and Xinbing Wang and Chenghu Zhou and Zhouhan Lin},\nyear={2024},\nurl={https://openreview.net/forum?id=0Z6lN4GYrO}\n}", "github": "", "project": "", "reviewers": "rkT5;SDke;qfiX", "site": "https://openreview.net/forum?id=0Z6lN4GYrO", "pdf_size": 297226, "rating": "3;3;8", "confidence": "4;4;3", "soundness": "2;2;3", "contribution": "1;2;3", "presentation": "2;3;3", "wc_summary": "121;87;79", "wc_strengths": "29;45;36", "wc_weaknesses": "473;175;31", "wc_questions": "5;45;5", "wc_review": "628;352;151", "wc_reply_reviewers": "121;33;0", "wc_reply_authors": "1925;1410;204", "reply_reviewers": "1;1;0", "reply_authors": "5;4;2", "rating_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 95.66666666666667, 18.208667044996883 ], "wc_strengths_avg": [ 36.666666666666664, 6.548960901462833 ], "wc_weaknesses_avg": [ 226.33333333333334, 184.06037656764212 ], "wc_questions_avg": [ 18.333333333333332, 18.856180831641268 ], "wc_review_avg": [ 377.0, 195.5351630781533 ], "wc_reply_reviewers_avg": [ 51.333333333333336, 51.07075701634177 ], "wc_reply_authors_avg": [ 1179.6666666666667, 721.2259624340272 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3235799037450276266&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Chinese Academy of Sciences", "aff_unique_dep": ";IGSNRR", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.cas.cn", "aff_unique_abbr": "SJTU;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0ZUKLCxwBo", "title": "A simple and interpretable model of grokking modular arithmetic tasks", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present a simple neural network that can generalize on various modular arithmetic tasks such as modular addition or multiplication, and exhibits a sudden jump in generalization known as \\emph{grokking}. Concretely, we present (i) fully-connected two-layer networks that exhibit grokking on various modular arithmetic tasks under vanilla gradient descent with the MSE loss function in the absence of any regularization; (ii) evidence that grokking modular arithmetic corresponds to learning specific representations whose structure is determined by the task; (iii) \\emph{analytic} expressions for the weights -- and thus for the embedding -- that solve a large class of modular arithmetic tasks; and (iv) evidence that these representations are also found by gradient descent as well as AdamW, establishing complete (\"mechanistic\") interpretability of the representations learnt by the network.", "keywords": "grokking;mechanistic interpretability;emergent capabilities;emergence;physics of AI;phase transition;circuits;pattern formation;solvable model;superposition", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "/attachment/1973960be8120f5bc7f29ceecc4b0319e7610e40.pdf", "author": "Andrey Gromov", "authorids": "~Andrey_Gromov1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "D056qfMAAAAJ", "orcid": "", "linkedin": "andrey-gromov-2329a241", "or_profile": "~Andrey_Gromov1", "aff": "University of Maryland, College Park", "aff_domain": "umd.edu", "position": "Assistant Professor", "bibtex": "@misc{\ngromov2024a,\ntitle={A simple and interpretable model of grokking modular arithmetic tasks},\nauthor={Andrey Gromov},\nyear={2024},\nurl={https://openreview.net/forum?id=0ZUKLCxwBo}\n}", "github": "", "project": "", "reviewers": "QK7D;UC6H;PaA4;9nDi;5GzG", "site": "https://openreview.net/forum?id=0ZUKLCxwBo", "pdf_size": 3005370, "rating": "5;5;6;6;8", "confidence": "3;4;3;3;4", "soundness": "2;3;2;4;4", "contribution": "2;2;2;4;4", "presentation": "2;3;3;3;4", "wc_summary": "59;46;65;35;34", "wc_strengths": "26;26;36;32;32", "wc_weaknesses": "202;83;43;241;50", "wc_questions": "1;52;41;47;44", "wc_review": "288;207;185;355;160", "wc_reply_reviewers": "0;10;0;447;7", "wc_reply_authors": "586;637;309;968;408", "reply_reviewers": "0;1;0;2;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "contribution_avg": [ 2.8, 0.9797958971132712 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 47.8, 12.480384609458156 ], "wc_strengths_avg": [ 30.4, 3.8781438859330635 ], "wc_weaknesses_avg": [ 123.8, 81.84228735806448 ], "wc_questions_avg": [ 37.0, 18.36300628982085 ], "wc_review_avg": [ 239.0, 72.16370278748174 ], "wc_reply_reviewers_avg": [ 92.8, 177.1433317965991 ], "wc_reply_authors_avg": [ 581.6, 226.64562647445902 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.372677996249965, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1q11UVO1wBYJ:scholar.google.com/&scioq=A+simple+and+interpretable+model+of+grokking+modular+arithmetic+tasks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "0aEUd9UtiA", "title": "DiffCPS: Diffusion Model based Constrained Policy Search for Offline Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Constrained policy search (CPS) is a fundamental problem in offline reinforcement learning, which is generally solved by advantage weighted regression (AWR). However, previous methods may still encounter out-of-distribution actions due to the limited expressivity of Gaussian-based policies. On the other hand, directly applying the state-of-the-art models with distribution expression capabilities (i.e., diffusion models) in the AWR framework is insufficient since AWR requires exact policy probability densities, which is intractable in diffusion models. In this paper, we propose a novel approach called $\\textbf{Diffusion Model based Constrained Policy Search (DiffCPS)}$, which tackles the diffusion-based constrained policy search without resorting to AWR. The theoretical analysis reveals our key insights by leveraging the action distribution of the diffusion model to eliminate the policy distribution constraint in the CPS and then utilizing the Evidence Lower Bound (ELBO) of diffusion-based policy to approximate the KL constraint. Consequently, DiffCPS admits the high expressivity of diffusion models while circumventing the cumbersome density calculation brought by AWR. Extensive experimental results based on the D4RL benchmark demonstrate the efficacy of our approach. We empirically show that DiffCPS achieves better or at least competitive performance compared to traditional AWR-based baselines as well as recent diffusion-based offline RL methods. Code will be made publicly available upon acceptance.", "keywords": "Offline Reinforcement Learning;Diffusion Model;Constrained Policy Search;Advantage Weighted Regression", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/2582bad1d6b9e644523f98a64d91c81c19286f6b.zip", "author": "Longxiang He;Linrui Zhang;Junbo Tan;Xueqian Wang", "authorids": "~Longxiang_He2;~Linrui_Zhang1;~Junbo_Tan1;~Xueqian_Wang1", "gender": "M;M;M;M", "homepage": "https://say-hello2y.github.io/;;;", "dblp": "358/7083;;192/2867;43/3563-1", "google_scholar": "KTGh2zYAAAAJ;;https://scholar.google.com/citations?hl=en;h9dN_ykAAAAJ", "orcid": ";;;0000-0003-3542-0593", "linkedin": ";%E9%BA%9F%E7%9D%BF-%E5%BC%A0-bb5312222/;;", "or_profile": "~Longxiang_He2;~Linrui_Zhang1;~Junbo_Tan1;~Xueqian_Wang1", "aff": "Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;;Associate Professor;Full Professor", "bibtex": "@misc{\nhe2024diffcps,\ntitle={Diff{CPS}: Diffusion Model based Constrained Policy Search for Offline Reinforcement Learning},\nauthor={Longxiang He and Linrui Zhang and Junbo Tan and Xueqian Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=0aEUd9UtiA}\n}", "github": "", "project": "", "reviewers": "4x5N;UbgT;hHF8", "site": "https://openreview.net/forum?id=0aEUd9UtiA", "pdf_size": 2207264, "rating": "3;5;8", "confidence": "5;4;4", "soundness": "2;2;4", "contribution": "2;2;3", "presentation": "2;3;4", "wc_summary": "49;58;92", "wc_strengths": "22;74;65", "wc_weaknesses": "165;123;259", "wc_questions": "97;114;71", "wc_review": "333;369;487", "wc_reply_reviewers": "188;88;24", "wc_reply_authors": "1175;1332;594", "reply_reviewers": "1;1;1", "reply_authors": "3;3;1", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 66.33333333333333, 18.517259216441534 ], "wc_strengths_avg": [ 53.666666666666664, 22.691163233490013 ], "wc_weaknesses_avg": [ 182.33333333333334, 56.858498827254394 ], "wc_questions_avg": [ 94.0, 17.682382946499793 ], "wc_review_avg": [ 396.3333333333333, 65.7740239169098 ], "wc_reply_reviewers_avg": [ 100.0, 67.4882705858334 ], "wc_reply_authors_avg": [ 1033.6666666666667, 317.4296072447489 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8029550685469661, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17102329719247376787&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Revisiting Plasticity in Visual Reinforcement Learning: Data, Modules and Training Stages", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19614", "id": "0aR1s9YxoL", "author_site": "Guozheng Ma, Lu Li, Sen Zhang, Zixuan Liu, Zhen Wang, Yixin Chen, Li Shen, Xueqian Wang, Dacheng Tao", "tldr": "", "abstract": "Plasticity, the ability of a neural network to evolve with new data, is crucial for high-performance and sample-efficient visual reinforcement learning (VRL). Although methods like resetting and regularization can potentially mitigate plasticity loss, the influences of various components within the VRL framework on the agent's plasticity are still poorly understood. In this work, we conduct a systematic empirical exploration focusing on three primary underexplored facets and derive the following insightful conclusions: (1) data augmentation is essential in maintaining plasticity; (2) the critic's plasticity loss serves as the principal bottleneck impeding efficient training; and (3) without timely intervention to recover critic's plasticity in the early stages, its loss becomes catastrophic. These insights suggest a novel strategy to address the high replay ratio (RR) dilemma, where exacerbated plasticity loss hinders the potential improvements of sample efficiency brought by increased reuse frequency. Rather than setting a static RR for the entire training process, we propose Adaptive RR, which dynamically adjusts the RR based on the critic\u2019s plasticity level. Extensive evaluations indicate that Adaptive RR not only avoids catastrophic plasticity loss in the early stages but also benefits from more frequent reuse in later phases, resulting in superior sample efficiency.", "keywords": "Plasticity;Visual Reinforcement Learning;Deep Reinforcement Learning;Sample Efficiency", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Guozheng Ma;Lu Li;Sen Zhang;Zixuan Liu;Zhen Wang;Yixin Chen;Li Shen;Xueqian Wang;Dacheng Tao", "authorids": "~Guozheng_Ma2;~Lu_Li5;~Sen_Zhang3;~Zixuan_Liu2;~Zhen_Wang9;~Yixin_Chen1;~Li_Shen1;~Xueqian_Wang1;~Dacheng_Tao1", "gender": "M;M;M;;;M;M;M;", "homepage": "https://guozheng-ma.github.io/;https://github.com/lilucse;https://github.com/SenZHANG-GitHub;https://panda-shawn.github.io;;https://www.cse.wustl.edu/~yixin.chen/;https://sites.google.com/site/mathshenli/home;;", "dblp": ";;57/6221-6;254/3346-2;;59/983;91/3680-8;43/3563-1;", "google_scholar": "jDvVglUAAAAJ;QPsrZx8AAAAJ;-bJJNV0AAAAJ;D4XufdkAAAAJ;;NByrsK0AAAAJ;yVhgENIAAAAJ;h9dN_ykAAAAJ;", "orcid": ";;;0000-0003-0667-9053;;;;0000-0003-3542-0593;", "linkedin": ";;;;;;;;", "or_profile": "~Guozheng_Ma2;~Lu_Li5;~Sen_Zhang3;~Zixuan_Liu2;~Zhen_Wang9;~Yixin_Chen1;~Li_Shen1;~Xueqian_Wang1;~Dacheng_Tao1", "aff": "Tsinghua University;Tsinghua University;University of Sydney, University of Sydney;Tsinghua University;;Washington University, Saint Louis;JD Explore Academy;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;sydney.edu.au;mails.tsinghua.edu.cn;;wustl.edu;jd.com;tsinghua.edu.cn;", "position": "MS student;MS student;Postdoc;MS student;;Full Professor;Researcher;Full Professor;", "bibtex": "@inproceedings{\nma2024revisiting,\ntitle={Revisiting Plasticity in Visual Reinforcement Learning: Data, Modules and Training Stages},\nauthor={Guozheng Ma and Lu Li and Sen Zhang and Zixuan Liu and Zhen Wang and Yixin Chen and Li Shen and Xueqian Wang and Dacheng Tao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0aR1s9YxoL}\n}", "github": "", "project": "", "reviewers": "c6tM;m8KG;Kom1;7W3D", "pdf_size": 6954013, "rating": "6;6;6;6", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "89;89;127;87", "wc_strengths": "107;256;42;202", "wc_weaknesses": "217;348;398;387", "wc_questions": "3;84;1;1", "wc_review": "416;777;568;677", "wc_reply_reviewers": "729;288;297;107", "wc_reply_authors": "4341;2617;2195;1018", "reply_reviewers": "4;2;1;1", "reply_authors": "11;5;5;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 16.76305461424021 ], "wc_strengths_avg": [ 151.75, 82.82624885868006 ], "wc_weaknesses_avg": [ 337.5, 72.00868003234055 ], "wc_questions_avg": [ 22.25, 35.66072769868837 ], "wc_review_avg": [ 609.5, 133.95614954155707 ], "wc_reply_reviewers_avg": [ 355.25, 228.7098325389619 ], "wc_reply_authors_avg": [ 2542.75, 1192.1628192071753 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 6.0, 3.0 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17017605233893666116&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=0aR1s9YxoL", "pdf": "https://openreview.net/pdf?id=0aR1s9YxoL", "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;sydney.edu.au;mails.tsinghua.edu.cn;;wustl.edu;jd.com;tsinghua.edu.cn;", "author_num": 9, "aff_unique_index": "0;0;1;0;2;3;0", "aff_unique_norm": "Tsinghua University;University of Sydney;Washington University in St. Louis;JD", "aff_unique_dep": ";;;JD Explore Academy", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.sydney.edu.au;https://wustl.edu;", "aff_unique_abbr": "THU;USYD;WUSTL;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saint Louis", "aff_country_unique_index": "0;0;1;0;2;0", "aff_country_unique": "China;Australia;United States;" }, { "title": "Contrastive Difference Predictive Coding", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19613", "id": "0akLDTFR9x", "author_site": "Chongyi Zheng, Ruslan Salakhutdinov, Benjamin Eysenbach", "tldr": "", "abstract": "Predicting and reasoning about the future lie at the heart of many time-series questions. For example, goal-conditioned reinforcement learning can be viewed as learning representations to predict which states are likely to be visited in the future. While prior methods have used contrastive predictive coding to model time series data, learning representations that encode long-term dependencies usually requires large amounts of data. In this paper, we introduce a temporal difference version of contrastive predictive coding that stitches together pieces of different time series data to decrease the amount of data required to learn predictions of future events. We apply this representation learning method to derive an off-policy algorithm for goal-conditioned RL. Experiments demonstrate that, compared with prior RL methods, ours achieves $2 \\times$ median improvement in success rates and can better cope with stochastic environments. In tabular settings, we show that our method is about $20\\times$ more sample efficient than the successor representation and $1500 \\times$ more sample efficient than the standard (Monte Carlo) version of contrastive predictive coding.", "keywords": "contrastive learning;reinforcement learning;goal-reaching;goal-conditioned RL;temporal difference", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Chongyi Zheng;Ruslan Salakhutdinov;Benjamin Eysenbach", "authorids": "~Chongyi_Zheng1;~Ruslan_Salakhutdinov1;~Benjamin_Eysenbach1", "gender": "M;M;M", "homepage": "https://chongyi-zheng.github.io;https://ben-eysenbach.github.io/;https://www.cs.cmu.edu/~rsalakhu/", "dblp": "250/9267;192/1863;", "google_scholar": "bezWXYcAAAAJ;DRnOvU8AAAAJ;", "orcid": ";0009-0000-7136-6307;", "linkedin": ";benjamin-eysenbach-a7235775/;", "or_profile": "~Chongyi_Zheng1;~Benjamin_Eysenbach1;~Russ_Salakhutdinov1", "aff": "Princeton University;Princeton University;School of Computer Science, Carnegie Mellon University", "aff_domain": "princeton.edu;princeton.edu;cs.cmu.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzheng2024contrastive,\ntitle={Contrastive Difference Predictive Coding},\nauthor={Chongyi Zheng and Ruslan Salakhutdinov and Benjamin Eysenbach},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0akLDTFR9x}\n}", "github": "", "project": "", "reviewers": "Jmo8;2Mg8;a2XQ;yDjK", "pdf_size": 2384774, "rating": "6;6;8;8", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "104;47;150;99", "wc_strengths": "83;27;38;179", "wc_weaknesses": "50;916;36;154", "wc_questions": "90;2;109;38", "wc_review": "327;992;333;470", "wc_reply_reviewers": "23;50;22;0", "wc_reply_authors": "528;1593;499;623", "reply_reviewers": "1;1;1;0", "reply_authors": "1;3;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.0, 36.489724581037876 ], "wc_strengths_avg": [ 81.75, 59.93903152370749 ], "wc_weaknesses_avg": [ 289.0, 364.8575064323057 ], "wc_questions_avg": [ 59.75, 42.2751404492049 ], "wc_review_avg": [ 530.5, 272.5165132611233 ], "wc_reply_reviewers_avg": [ 23.75, 17.725334975678173 ], "wc_reply_authors_avg": [ 810.75, 453.955050087561 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4196335089185927383&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=0akLDTFR9x", "pdf": "https://openreview.net/pdf?id=0akLDTFR9x", "email": "princeton.edu;princeton.edu;cs.cmu.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Princeton University;Carnegie Mellon University", "aff_unique_dep": ";School of Computer Science", "aff_unique_url": "https://www.princeton.edu;https://www.cmu.edu", "aff_unique_abbr": "Princeton;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0b328CMwn1", "title": "Visual Prompting Reimagined: The Power of Activation Prompts", "track": "main", "status": "Reject", "tldr": "", "abstract": "Visual prompting (VP) has emerged as a popular method to repurpose large pretrained models for downstream vision tasks. Unlike many parameter-efficient finetuning (PEFT) techniques that modify model parameters, VP introduces a universal perturbation directly into the input data to facilitate task-specific finetuning while keeping the pretrained model intact. However, there exists a noticeable performance gap between VP and conventional finetuning methods, highlighting an unexplored realm in theory and practice to understand and advance VP to close its performance gap. Towards this end, we introduce a novel concept, termed activation prompt (AP), which extends the scope of input-level VP by enabling universal perturbations to be applied to activation maps within the intermediate layers of the model. With the aid of AP, we show that VP, by its input perturbation design, has intrinsic limitations in both performance and efficiency. By contrast, AP shares a natural connection to normalization tuning, e.g., batch normalization for convolutional neural networks (CNNs) and layer normalization for vision transformers (ViTs). This illuminates the reason behind the observed better accuracy of normalization tuning than VP in the literature. Furthermore, we show that the choice of prompting exhibits a distinct preference for layer depth, with conclusions varying significantly between CNNs and ViTs. We theoretically elucidate the rationale behind such preference by analyzing global features across layers. By conducting extensive experiments across 29 datasets and various model architectures, we provide a thorough performance analysis of AP, comparing it with VP and PEFT baselines. Our experimental results demonstrate that AP significantly surpasses the input-level VP in terms of both accuracy and efficiency, considering factors like time, parameters, memory usage, and throughout. These results further support our new insights into the incapabilities of VP and the capabilities of AP.", "keywords": "parameter-efficient fine-tuning;transfer learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/6eed97249d815d388daa515edac5e1984c1012d1.zip", "author": "Yihua Zhang;Hongkang Li;Yuguang Yao;Aochuan Chen;Shuai Zhang;Pin-Yu Chen;Meng Wang;Sijia Liu", "authorids": "~Yihua_Zhang1;~Hongkang_Li1;~Yuguang_Yao1;~Aochuan_Chen1;~Shuai_Zhang6;~Pin-Yu_Chen1;~Meng_Wang4;~Sijia_Liu1", "gender": "M;;M;M;M;M;F;M", "homepage": "https://yihua-zhang.com;https://lohek330.github.io/lihongkang.github.io/;https://www.cse.msu.edu/~yaoyugua/;https://scholar.google.com/citations?hl=en&view_op=list_works&gmla=AJsN-F6N4cEX-_kViGgRpnUVo_iBHlVXwMpnhlyB-Cdrndwj6B0jaDy088r7K9gHPGqSwsQ9tNxpijGpb1IoIB2B5KVS3Scvtdz9Mt_WR9GSou_saurFpSA&user=7pY-Ie8AAAAJ;https://inchs708.github.io/shuaizhang.github.io/index.html;http://www.pinyuchen.com;https://www.ecse.rpi.edu/~wang/index.html;https://lsjxjtu.github.io/", "dblp": ";318/8643;238/9467;331/2356;71/208-15;39/8969;93/6765-3;128/6972-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=DVlDPjMAAAAJ;-chIdAkAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;jxwlCUUAAAAJ;;C7dO_UgAAAAJ", "orcid": ";;;0009-0002-2300-1498;0000-0001-8280-6988;0000-0003-1039-8369;;", "linkedin": "zhangyihua/;hongkang-li-b7a341173/;tonyyaomsu/;;;pin-yu-chen-940062a2;;", "or_profile": "~Yihua_Zhang1;~Hongkang_Li1;~Yuguang_Yao1;~Aochuan_Chen1;~Shuai_Zhang6;~Pin-Yu_Chen1;~Meng_Wang4;~Sijia_Liu1", "aff": "Michigan State University;Rensselaer Polytechnic Institute;Michigan State University;Hong Kong University of Science and Technology;New Jersey Institute of Technology;International Business Machines;Rensselaer Polytechnic Institute;Michigan State University", "aff_domain": "msu.edu;rpi.edu;msu.edu;ust.hk;njit.edu;ibm.com;rpi.edu;msu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Principal Researcher;Associate Professor;Assistant Professor", "bibtex": "@misc{\nzhang2024visual,\ntitle={Visual Prompting Reimagined: The Power of Activation Prompts},\nauthor={Yihua Zhang and Hongkang Li and Yuguang Yao and Aochuan Chen and Shuai Zhang and Pin-Yu Chen and Meng Wang and Sijia Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=0b328CMwn1}\n}", "github": "", "project": "", "reviewers": "Z13f;jt12;Yfja;TRtL", "site": "https://openreview.net/forum?id=0b328CMwn1", "pdf_size": 3550602, "rating": "5;5;5;6", "confidence": "4;4;4;2", "soundness": "3;3;2;4", "contribution": "3;3;2;3", "presentation": "2;3;3;4", "wc_summary": "96;56;39;100", "wc_strengths": "39;28;34;70", "wc_weaknesses": "105;270;331;106", "wc_questions": "65;216;27;149", "wc_review": "305;570;431;425", "wc_reply_reviewers": "249;0;0;91", "wc_reply_authors": "1952;1143;1534;1186", "reply_reviewers": "1;0;0;1", "reply_authors": "4;3;3;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.75, 25.9939896899264 ], "wc_strengths_avg": [ 42.75, 16.20763708873073 ], "wc_weaknesses_avg": [ 203.0, 99.85739832380973 ], "wc_questions_avg": [ 114.25, 73.48256595955262 ], "wc_review_avg": [ 432.75, 93.83596059080975 ], "wc_reply_reviewers_avg": [ 85.0, 101.71283104898811 ], "wc_reply_authors_avg": [ 1453.75, 325.1725503482728 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=210525781623516819&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;2;3;4;1;0", "aff_unique_norm": "Michigan State University;Rensselaer Polytechnic Institute;Hong Kong University of Science and Technology;New Jersey Institute of Technology;International Business Machines Corporation", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.msu.edu;https://www.rpi.edu;https://www.ust.hk;https://www.njit.edu;https://www.ibm.com", "aff_unique_abbr": "MSU;RPI;HKUST;NJIT;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "NEFTune: Noisy Embeddings Improve Instruction Finetuning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19612", "id": "0bMmZ3fkCk", "author_site": "Neel Jain, Ping-yeh Chiang, Yuxin Wen, John Kirchenbauer, Hong-Min Chu, Gowthami Somepalli, Brian Bartoldson, Bhavya Kailkhura, Avi Schwarzschild, Aniruddha Saha, Micah Goldblum, Jonas Geiping, Tom Goldstein", "tldr": "", "abstract": "We show that language model finetuning can be improved, sometimes dramatically, with a simple augmentation. \nNEFTune adds noise to the embedding vectors during training.\nStandard finetuning of LLaMA-2-7B using Alpaca achieves $29.79$\\% on AlpacaEval, which rises to $64.69$\\% using noisy embeddings. NEFTune also improves over strong baselines on modern instruction datasets.\nModels trained with Evol-Instruct see a $10$\\% improvement, with ShareGPT an $8$\\% improvement, and with OpenPlatypus an $8$\\% improvement. \nEven powerful models further refined with RLHF such as LLaMA-2-Chat benefit from additional training with NEFTune. Particularly, we see these improvements on the conversational abilities of the instruction model and not on traditional tasks like those on the OpenLLM Leaderboard, where performance is the same.", "keywords": "Instruction Finetuning", "primary_area": "generative models", "supplementary_material": "/attachment/d78c3905a8b9202b58358079384a6a098d702065.zip", "author": "Neel Jain;Ping-yeh Chiang;Yuxin Wen;John Kirchenbauer;Hong-Min Chu;Gowthami Somepalli;Brian R. Bartoldson;Bhavya Kailkhura;Avi Schwarzschild;Aniruddha Saha;Micah Goldblum;Jonas Geiping;Tom Goldstein", "authorids": "~Neel_Jain1;~Ping-yeh_Chiang1;~Yuxin_Wen2;~John_Kirchenbauer1;~Hong-Min_Chu1;~Gowthami_Somepalli1;~Brian_R._Bartoldson1;~Bhavya_Kailkhura1;~Avi_Schwarzschild1;~Aniruddha_Saha1;~Micah_Goldblum1;~Jonas_Geiping1;~Tom_Goldstein1", "gender": ";;;M;;F;M;M;M;;M;M;M", "homepage": ";;https://yuxinwenrick.github.io/;https://jwkirchenbauer.notion.site/;;https://somepago.github.io/;https://people.llnl.gov/kailkhura1;https://cs.umd.edu/~avi1;https://ani0075saha.github.io/;;https://jonasgeiping.github.io/;https://www.cs.umd.edu/~tomg/;https://brianbartoldson.wordpress.com/", "dblp": ";236/4288;;321/0678;185/0720;286/5012;132/8938;249/9334.html;221/8102;241/7231;190/7229;25/8184;220/5475", "google_scholar": "https://scholar.google.com/citations?hl=en;WUoMq1IAAAAJ;oUYfjg0AAAAJ;48GJrbsAAAAJ;;T2ezBDsAAAAJ;SQpJmOgAAAAJ;WNvQ7AcAAAAJ;xfjALj0AAAAJ;pGDKzuUAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;KmSuVtgAAAAJ;YdiZoJgAAAAJ", "orcid": ";;;;;;;;;;;;", "linkedin": "neel-jain-0a6a239/;;;johnkirchenbauer/;;;;;;;;;", "or_profile": "~Neel_Jain1;~Ping-yeh_Chiang1;~Yuxin_Wen2;~John_Kirchenbauer1;~Hong-Min_Chu1;~Gowthami_Somepalli1;~Bhavya_Kailkhura1;~Avi_Schwarzschild1;~Aniruddha_Saha1;~Micah_Goldblum1;~Jonas_Geiping1;~Tom_Goldstein1;~Brian_R_Bartoldson1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;Lawrence Livermore National Laboratory;Carnegie Mellon University;University of Maryland, College Park;New York University;Max Planck Institute for Intelligent Systems, Max-Planck Institute;University of Maryland, College Park;Lawrence Livermore National Labs", "aff_domain": "umd.edu;umd.edu;umd.edu;umd.edu;cs.umd.edu;umd.edu;llnl.gov;cmu.edu;umd.edu;nyu.edu;tuebingen.mpg.de;umd.edu;llnl.gov", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Research Staff;Postdoc;Postdoc;Postdoc;Principal Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\njain2024neftune,\ntitle={{NEFT}une: Noisy Embeddings Improve Instruction Finetuning},\nauthor={Neel Jain and Ping-yeh Chiang and Yuxin Wen and John Kirchenbauer and Hong-Min Chu and Gowthami Somepalli and Brian R. Bartoldson and Bhavya Kailkhura and Avi Schwarzschild and Aniruddha Saha and Micah Goldblum and Jonas Geiping and Tom Goldstein},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0bMmZ3fkCk}\n}", "github": "", "project": "", "reviewers": "nFQe;kT9q;Agi9;h2qY", "pdf_size": 705584, "rating": "5;6;6;6", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "51;49;65;145", "wc_strengths": "47;119;148;255", "wc_weaknesses": "342;199;156;344", "wc_questions": "33;86;97;48", "wc_review": "473;453;466;792", "wc_reply_reviewers": "73;26;121;1159", "wc_reply_authors": "1050;375;632;1251", "reply_reviewers": "1;1;1;2", "reply_authors": "4;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 77.5, 39.455671328720285 ], "wc_strengths_avg": [ 142.25, 74.76421269564737 ], "wc_weaknesses_avg": [ 260.25, 84.13790762789387 ], "wc_questions_avg": [ 66.0, 26.334388164527386 ], "wc_review_avg": [ 546.0, 142.2093527163386 ], "wc_reply_reviewers_avg": [ 344.75, 471.3058322363516 ], "wc_reply_authors_avg": [ 827.0, 343.4508698489494 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7229125578067553201&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=0bMmZ3fkCk", "pdf": "https://openreview.net/pdf?id=0bMmZ3fkCk", "email": "umd.edu;umd.edu;umd.edu;umd.edu;cs.umd.edu;umd.edu;llnl.gov;cmu.edu;umd.edu;nyu.edu;tuebingen.mpg.de;umd.edu;llnl.gov", "author_num": 13, "aff_unique_index": "0;0;0;0;1;0;2;3;0;4;5;0;2", "aff_unique_norm": "University of Maryland;University of Maryland, College Park;Lawrence Livermore National Laboratory;Carnegie Mellon University;New York University;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";Department of Computer Science;;;;Intelligent Systems", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;https://www.llnl.gov;https://www.cmu.edu;https://www.nyu.edu;https://www.mpi-is.mpg.de", "aff_unique_abbr": "UMD;UMD;LLNL;CMU;NYU;MPI-IS", "aff_campus_unique_index": "0;0;0;0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;1;0;0", "aff_country_unique": "United States;Germany" }, { "id": "0bjIoHD45G", "title": "Closing the gap on tabular data with Fourier and Implicit Categorical Features", "track": "main", "status": "Reject", "tldr": "", "abstract": "While Deep Learning has demonstrated impressive results in applications on various data types, it continues to lag behind tree-based methods when applied to tabular data, often referred to as the last \u201cunconquered castle\u201d for neural networks. We hypothesize that a significant advantage of tree-based methods lies in their intrinsic capability to model and exploit non-linear interactions induced by features with categorical characteristics. In contrast, neural-based methods exhibit biases toward a uniform numerical processing of features and smooth solutions, making it challenging for them to effectively leverage such patterns. We aim to address this performance gap by using simple, statistical-based feature processing techniques to identify and explicitly encode features that are strongly correlated with the target once discretized, as well as mitigate the bias of deep models for overly-smooth solutions, a bias that does not align with the inherent properties of the data, using Learned Fourier Features. Our proposed feature processing and method achieves a performance that closely matches or surpasses XGBoost on a comprehensive tabular data benchmark.", "keywords": "tabular data;neural networks;feature processing;deep learning;tree-based methods;xgboost", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Marius Dragoi;Florin Gogianu;Elena Burceanu", "authorids": "~Marius_Dragoi1;~Florin_Gogianu1;~Elena_Burceanu1", "gender": ";M;F", "homepage": ";;http://ilarele.github.io/", "dblp": ";230/0696;139/7814", "google_scholar": ";Zfk931sAAAAJ;bL34yDkAAAAJ", "orcid": ";;", "linkedin": ";;elena-burceanu-97016539/", "or_profile": "~Marius_Dragoi1;~Florin_Gogianu1;~Elena_Burceanu1", "aff": ";Bitdefender;Bitdefender", "aff_domain": ";bitdefender.com;bitdefender.com", "position": ";Researcher;Principal Researcher", "bibtex": "@misc{\ndragoi2024closing,\ntitle={Closing the gap on tabular data with Fourier and Implicit Categorical Features},\nauthor={Marius Dragoi and Florin Gogianu and Elena Burceanu},\nyear={2024},\nurl={https://openreview.net/forum?id=0bjIoHD45G}\n}", "github": "", "project": "", "reviewers": "tmhZ;xxW1;d92g;AFsk;HCF3", "site": "https://openreview.net/forum?id=0bjIoHD45G", "pdf_size": 386366, "rating": "3;3;5;5;5", "confidence": "5;4;5;4;4", "soundness": "2;1;4;3;3", "contribution": "2;1;3;2;2", "presentation": "1;2;3;3;3", "wc_summary": "97;126;84;18;72", "wc_strengths": "51;40;41;13;48", "wc_weaknesses": "536;610;26;37;514", "wc_questions": "1;107;11;83;129", "wc_review": "685;883;162;151;763", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "316;710;112;355;705", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 4.2, 0.9797958971132712 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.6, 1.019803902718557 ], "contribution_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 79.4, 35.57302348690648 ], "wc_strengths_avg": [ 38.6, 13.45511055324333 ], "wc_weaknesses_avg": [ 344.6, 257.63974848613714 ], "wc_questions_avg": [ 66.2, 51.35912771844943 ], "wc_review_avg": [ 528.8, 310.47730996000337 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 439.6, 233.79871684848914 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.16666666666666666, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6422472616623222499&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Bitdefender", "aff_unique_dep": "", "aff_unique_url": "https://www.bitdefender.com", "aff_unique_abbr": "Bitdefender", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Romania" }, { "id": "0cJ8ERfnrM", "title": "Antibody DomainBed: Out-of-Distribution Generalization in Therapeutic Protein Design", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, there has been an increased interest in accelerating drug design with machine learning (ML). Active ML-guided design of biological sequences with favorable properties involves multiple design cycles in which (1) candidate sequences are proposed, (2) a subset of the candidates is selected using ML surrogate models trained to predict target properties of interest, and (3) sequences are experimentally validated. The returned experimental results from one cycle provide valuable feedback for the next one, but the modifications they inspire in the candidate proposals or experimental protocol can lead to distribution shifts that impair the performance of surrogate models in the upcoming cycle. For the surrogate models to achieve consistent performance across cycles, we must explicitly account for the distribution shifts in their training. We apply domain generalization (DG) methods to develop robust classifiers for predicting properties of therapeutic antibodies. We adapt a recent benchmark of DG algorithms, ``DomainBed,'' to deploy DG algorithms across 5 domains, or design cycles. Our results suggest that foundational models and ensembling (in both output and weight space) lead to better predictive performance on out-of-distribution domains. We publicly release our codebase and the associated dataset of antibody-antigen binding that emulates distribution shifts across design cycles.", "keywords": "domain generalization;invariance;benchmarks;drug discovery", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Natasa Tagasovska;Ji Won Park;Matthieu Kirchmeyer;Nathan C. Frey;Andrew Martin Watkins;Aya Abdelsalam Ismail;Arian Rokkum Jamasb;Edith Lee;Tyler Bryson;Stephen Ra;Kyunghyun Cho", "authorids": "~Natasa_Tagasovska2;~Ji_Won_Park1;~Matthieu_Kirchmeyer1;~Nathan_C._Frey1;~Andrew_Martin_Watkins1;~Aya_Abdelsalam_Ismail1;~Arian_Rokkum_Jamasb1;leee76@gene.com;brysont1@gene.com;~Stephen_Ra1;~Kyunghyun_Cho1", "gender": "F;F;;;M;F;;;;M;M", "homepage": "https://datascience.ch/team_member/natasa-tagasovska-computer-scientist/;;https://mkirchmeyer.github.io;https://ncfrey.github.io/;;https://ayaismail.com/;https://www.jamasb.io;;;https://www.stephenra.com;http://kyunghyuncho.me", "dblp": ";83/10554;241/9725;306/1335;;218/6661;296/2021;;;255/5897;41/9736", "google_scholar": "S2ZUSL0AAAAJ;URG3MMYAAAAJ;oJkKtrkAAAAJ;IMUja60AAAAJ;zglcuwEAAAAJ;VDhTJHUAAAAJ;https://scholar.google.co.uk/citations?user=hYm9a-UAAAAJ;;;bxl__-MAAAAJ;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ", "orcid": ";0000-0002-0692-1092;;0000-0001-5291-6131;;;0000-0002-6727-7579;;;;", "linkedin": "natasha-tagasovska/;;;ncfrey;;;jamasb/;;;;", "or_profile": "~Natasa_Tagasovska2;~Ji_Won_Park1;~Matthieu_Kirchmeyer1;~Nathan_C._Frey1;~Andrew_Martin_Watkins1;~Aya_Abdelsalam_Ismail1;~Arian_Rokkum_Jamasb1;leee76@gene.com;brysont1@gene.com;~Stephen_Ra1;~Kyunghyun_Cho1", "aff": "Prescient Design - Genentech, Roche;Genentech;Genentech;Prescient Design, Genentech;Prescient Design, Genentech;Genentech;Prescient Design / Roche / Genentech;;;Prescient Design, Genentech;Genentech", "aff_domain": "roche.com;gene.com;gene.com;gene.com;gene.com;gene.com;roche.com;;;gene.com;gene.com", "position": "Senior Machine Learning Scientis;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;;;Director of Frontier Research;Senior Director of Frontier Research", "bibtex": "@misc{\ntagasovska2024antibody,\ntitle={Antibody DomainBed: Out-of-Distribution Generalization in Therapeutic Protein Design},\nauthor={Natasa Tagasovska and Ji Won Park and Matthieu Kirchmeyer and Nathan C. Frey and Andrew Martin Watkins and Aya Abdelsalam Ismail and Arian Rokkum Jamasb and Edith Lee and Tyler Bryson and Stephen Ra and Kyunghyun Cho},\nyear={2024},\nurl={https://openreview.net/forum?id=0cJ8ERfnrM}\n}", "github": "", "project": "", "reviewers": "keC4;nWam;PsDP;6GLG", "site": "https://openreview.net/forum?id=0cJ8ERfnrM", "pdf_size": 8990069, "rating": "5;5;6;6", "confidence": "3;3;4;3", "soundness": "2;2;3;3", "contribution": "3;2;3;3", "presentation": "2;2;4;3", "wc_summary": "67;127;110;69", "wc_strengths": "75;49;129;69", "wc_weaknesses": "205;172;233;322", "wc_questions": "59;48;128;211", "wc_review": "406;396;600;671", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "726;659;388;1149", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 93.25, 25.96512083545925 ], "wc_strengths_avg": [ 80.5, 29.609964538985857 ], "wc_weaknesses_avg": [ 233.0, 55.73598478541489 ], "wc_questions_avg": [ 111.5, 65.11720202834272 ], "wc_review_avg": [ 518.25, 119.95910761588718 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 730.5, 272.75492662828293 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15368648657494412834&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;1;0;0", "aff_unique_norm": "Genentech;Roche", "aff_unique_dep": "Prescient Design;", "aff_unique_url": "https://www.gene.com;https://www.roche.com", "aff_unique_abbr": "Genentech;Roche", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0", "aff_country_unique": "United States;Switzerland" }, { "id": "0cZDnlw0WL", "title": "Causal Discovery with Unobserved Variables: A Proxy Variable Approach", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Discovering causal relations from observational data is important. The existence of unobserved variables, such as latent confounders or mediators, can mislead the causal identification. To address this issue, proximal causal discovery methods were proposed to adjust for the bias with the proxy of the unobserved variable. However, these methods only focused on discrete variables, which limits their real-world application. Besides, the extension to the continuous case is not easy as the naive discretization method can introduce biases due to the discretization error. To tackle this challenge, we propose a new method based on a comprehensive analysis regarding discretization error. We begin by identifying the source of discretization error and how it introduces the bias. We then introduce smoothness conditions under which the discretization error can be reduced to an infinitesimal level, provided the proxy is discretized with sufficiently fine bins. We also find that such conditions can hold for a broad family of causal models, e.g., Additive Noise Model. Based on this, we design a proxy-based hypothesis test that is provable to be consistent for identifying causal relationships within continuous variables. We demonstrate the utility of our method on synthetic and real-world data.", "keywords": "causal discovery;unobserved variables;proxy variables;discretization", "primary_area": "causal reasoning", "supplementary_material": "", "author": "Mingzhou Liu;Xinwei Sun;Yu QIAO;Yizhou Wang", "authorids": "~Mingzhou_Liu1;~Xinwei_Sun1;~Yu_QIAO3;~Yizhou_Wang1", "gender": "M;M;M;M", "homepage": ";https://sunxinwei0625.github.io/sunxw.github.io/;http://www.pami.sjtu.edu.cn/yuqiao;https://cfcs.pku.edu.cn/wangyizhou/", "dblp": "159/6544-1;145/6592-1;q/YuQiao3;71/3387-1", "google_scholar": ";;hO33bVgAAAAJ;831z_VcAAAAJ", "orcid": "0000-0002-0297-0938;;0000-0001-8258-3868;", "linkedin": ";;;", "or_profile": "~Mingzhou_Liu1;~Xinwei_Sun1;~Yu_QIAO3;~Yizhou_Wang1", "aff": "Peking University;Fudan University;Shanghai Jiaotong University;Peking University", "aff_domain": "pku.edu.cn;fudan.edu.cn;sjtu.edu.cn;pku.edu.cn", "position": "PhD student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nliu2024causal,\ntitle={Causal Discovery with Unobserved Variables: A Proxy Variable Approach},\nauthor={Mingzhou Liu and Xinwei Sun and Yu QIAO and Yizhou Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=0cZDnlw0WL}\n}", "github": "", "project": "", "reviewers": "4NAf;eeri;iFkV;iE3i", "site": "https://openreview.net/forum?id=0cZDnlw0WL", "pdf_size": 976510, "rating": "3;3;6;6", "confidence": "5;4;3;3", "soundness": "2;3;3;3", "contribution": "2;2;3;2", "presentation": "2;1;3;3", "wc_summary": "49;82;56;50", "wc_strengths": "19;17;37;23", "wc_weaknesses": "135;92;81;62", "wc_questions": "4;180;29;15", "wc_review": "207;371;203;150", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 59.25, 13.40475661845451 ], "wc_strengths_avg": [ 24.0, 7.810249675906654 ], "wc_weaknesses_avg": [ 92.5, 26.781523481684157 ], "wc_questions_avg": [ 57.0, 71.56465608105722 ], "wc_review_avg": [ 232.75, 82.92880983108343 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17204668074576853571&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Peking University;Fudan University;Shanghai Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.fudan.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "Peking U;Fudan;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "LiDAR-PTQ: Post-Training Quantization for Point Cloud 3D Object Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19611", "id": "0d1gQI114C", "author_site": "Sifan Zhou, Liang Li, Xinyu Zhang, Bo Zhang, Shipeng Bai, Miao Sun, Ziyu Zhao, Xiaobo Lu, Xiangxiang Chu", "tldr": "", "abstract": "Due to highly constrained computing power and memory, deploying 3D lidar-based detectors on edge devices equipped in autonomous vehicles and robots poses a crucial challenge. Being a convenient and straightforward model compression approach, Post-Training Quantization (PTQ) has been widely adopted in 2D vision tasks. However, applying it directly to 3D lidar-based tasks inevitably leads to performance degradation. As a remedy, we propose an effective PTQ method called LiDAR-PTQ, which is particularly curated for 3D lidar detection (both SPConv-based and SPConv-free). Our LiDAR-PTQ features three main components, (1) a sparsity-based calibration method to determine the initialization of quantization parameters, (2) an adaptive rounding-to-nearest operation to minimize the layerwise reconstruction error, (3) a Task-guided Global Positive Loss (TGPL) to reduce the disparity between the final predictions before and after quantization. Extensive experiments demonstrate that our LiDAR-PTQ can achieve state-of-the-art quantization performance when applied to CenterPoint (both Pillar-based and Voxel-based). To our knowledge, for the very first time in lidar-based 3D detection tasks, the PTQ INT8 model's accuracy is almost the same as the FP32 model while enjoying 3X inference speedup. Moreover, our LiDAR-PTQ is cost-effective being 6X faster than the quantization-aware training method. The code will be released.", "keywords": "Quantization;3D Object Detection;Autonomous Driving", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "", "author": "Sifan Zhou;Liang Li;Xinyu Zhang;Bo Zhang;Shipeng Bai;Miao Sun;Ziyu Zhao;Xiaobo Lu;Xiangxiang Chu", "authorids": "~Sifan_Zhou2;~Liang_Li10;~Xinyu_Zhang2;~Bo_Zhang7;~Shipeng_Bai1;~Miao_Sun2;~Ziyu_Zhao5;~Xiaobo_Lu1;~Xiangxiang_Chu1", "gender": "M;M;M;M;M;F;M;M;M", "homepage": "https://github.com/StiphyJay;https://myaccount.google.com/?hl=zh-CN;;;https://april.zju.edu.cn/team/shipeng-bai/;https://watercube001.github.io;;;https://cxxgtxy.github.io/", "dblp": "256/3342;;;36/2259-46;;;;93/8545;207/8002", "google_scholar": "kSdqoi0AAAAJ;z_fYeJoAAAAJ;zGLVABAAAAAJ;uUNQnu0AAAAJ;;4nYbZ0YAAAAJ;tfnKbVUAAAAJ;;jn21pUsAAAAJ", "orcid": "0000-0003-3602-7566;;;0000-0003-0564-617X;;;;;0000-0003-2548-0605", "linkedin": ";;;bo-zhang-20a86588/;;;;;", "or_profile": "~Sifan_Zhou2;~Liang_Li10;~Xinyu_Zhang2;~Bo_Zhang7;~Shipeng_Bai1;~Miao_Sun2;~Ziyu_Zhao5;~Xiaobo_Lu1;~Xiangxiang_Chu1", "aff": "Southeast University;Meituan;Meituan;Meituan Inc.;;Nanyang Technological University;Southeast University;Southeast University;MeiTuan", "aff_domain": "seu.edu.cn;meituan.com;meituan.com;meituan.com;;ntu.edu.sg;seu.edu.cn;seu.edu.cn;meituan.com", "position": "PhD student;Researcher;Researcher;Senior Software Engineer;;Postdoc;PhD student;Full Professor;Senior Engineer", "bibtex": "@inproceedings{\nzhou2024lidarptq,\ntitle={Li{DAR}-{PTQ}: Post-Training Quantization for Point Cloud 3D Object Detection},\nauthor={Sifan Zhou and Liang Li and Xinyu Zhang and Bo Zhang and Shipeng Bai and Miao Sun and Ziyu Zhao and Xiaobo Lu and Xiangxiang Chu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0d1gQI114C}\n}", "github": "", "project": "", "reviewers": "fLuR;3wrT;Dy1B;szHU", "pdf_size": 1397032, "rating": "6;6;6;6", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "contribution": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "41;191;60;117", "wc_strengths": "33;62;42;106", "wc_weaknesses": "213;196;43;51", "wc_questions": "17;49;80;29", "wc_review": "304;498;225;303", "wc_reply_reviewers": "79;0;0;53", "wc_reply_authors": "691;550;1024;834", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 102.25, 58.37540149754861 ], "wc_strengths_avg": [ 60.75, 28.154706533721853 ], "wc_weaknesses_avg": [ 125.75, 79.02966215289042 ], "wc_questions_avg": [ 43.75, 23.84716964337697 ], "wc_review_avg": [ 332.5, 100.78318312099495 ], "wc_reply_reviewers_avg": [ 33.0, 34.25638626592128 ], "wc_reply_authors_avg": [ 774.75, 175.47275429536063 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3365312712747642645&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=0d1gQI114C", "pdf": "https://openreview.net/pdf?id=0d1gQI114C", "email": "seu.edu.cn;meituan.com;meituan.com;meituan.com;;ntu.edu.sg;seu.edu.cn;seu.edu.cn;meituan.com", "author_num": 9, "aff_unique_index": "0;1;1;2;3;0;0;1", "aff_unique_norm": "Southeast University;Meituan;Meituan Inc.;Nanyang Technological University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.meituan.com;https://www.meituan.com;https://www.ntu.edu.sg", "aff_unique_abbr": "SEU;Meituan;Meituan;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "id": "0e98EdIksA", "title": "A Theoretical and Empirical Analysis on Reconstruction Attacks and Defenses", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Reconstruction attacks and defenses are essential in understanding the data leakage problem in machine learning. However, prior work has centered around empirical observations of gradient inversion attacks, lacks theoretical groundings, and was unable to disentangle the usefulness of defending methods versus the computational limitation of attacking methods. In this work, we propose a strong reconstruction attack in the setting of federated learning. The attack reconstructs intermediate features and nicely integrates with and outperforms most of the previous methods. On this stronger attack, we thoroughly investigate both theoretically and empirically the effect of the most common defense methods. Our findings suggest that among various defense mechanisms, such as gradient clipping, dropout, additive noise, local aggregation, etc., gradient pruning emerges as the most effective strategy to defend against state-of-the-art attacks.", "keywords": "federated learning;learning theory;reconstruction attack;deep leakage from gradients", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/55503793094dd9a696750b77b430c4bcfbf8b2c6.pdf", "author": "Sheng Liu;Zihan Wang;Qi Lei", "authorids": "~Sheng_Liu2;~Zihan_Wang20;~Qi_Lei1", "gender": ";M;F", "homepage": "https://shengliu66.github.io/;;https://cecilialeiqi.github.io/", "dblp": ";;", "google_scholar": "rzhzR-cAAAAJ;ZBF2zKMAAAAJ;kGOgaowAAAAJ", "orcid": ";;", "linkedin": ";zihan-wang-3b0050249/;", "or_profile": "~Sheng_Liu2;~Zihan_Wang20;~Qi_Lei1", "aff": "Stanford University;New York University;New York University", "aff_domain": "stanford.edu;nyu.edu;nyu.edu", "position": "Postdoc;MS student;Assistant Professor", "bibtex": "@misc{\nliu2024a,\ntitle={A Theoretical and Empirical Analysis on Reconstruction Attacks and Defenses},\nauthor={Sheng Liu and Zihan Wang and Qi Lei},\nyear={2024},\nurl={https://openreview.net/forum?id=0e98EdIksA}\n}", "github": "", "project": "", "reviewers": "2Bck;puZu;1hsA", "site": "https://openreview.net/forum?id=0e98EdIksA", "pdf_size": 4911, "rating": "3;3;3", "confidence": "4;3;4", "soundness": "2;1;2", "contribution": "2;2;2", "presentation": "2;1;3", "wc_summary": "41;50;56", "wc_strengths": "16;19;64", "wc_weaknesses": "346;414;327", "wc_questions": "35;49;216", "wc_review": "438;532;663", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 49.0, 6.164414002968976 ], "wc_strengths_avg": [ 33.0, 21.95449840010015 ], "wc_weaknesses_avg": [ 362.3333333333333, 37.3482113211448 ], "wc_questions_avg": [ 100.0, 82.22327326655554 ], "wc_review_avg": [ 544.3333333333334, 92.26893060806306 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WbWmshGe0vYJ:scholar.google.com/&scioq=A+Theoretical+and+Empirical+Analysis+on+Reconstruction+Attacks+and+Defenses&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Stanford University;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.nyu.edu", "aff_unique_abbr": "Stanford;NYU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0ez68a5UqI", "title": "Reinforcement Learning for Node Selection in Branch-and-Bound", "track": "main", "status": "Reject", "tldr": "", "abstract": "A big challenge in branch and bound lies in identifying the optimal node within the search tree from which to proceed. \nCurrent state-of-the-art selectors utilize either hand-crafted ensembles that automatically switch between naive sub-node selectors, or learned node selectors that rely on individual node data.\nWe propose a novel bi-simulation technique that uses reinforcement learning (RL) while considering the entire tree state, rather than just isolated nodes.\nTo achieve this, we train a graph neural network that produces a probability distribution based on the path from the model's root to its ``to-be-selected'' leaves. Modelling node-selection as a probability distribution allows us to train the model using state-of-the-art RL techniques that capture both intrinsic node-quality and node-evaluation costs.\nOur method induces a high quality node selection policy on a set of varied and complex problem sets, despite only being trained on specially designed, synthetic TSP instances.\nExperiments on several benchmarks show significant improvements in optimality gap reductions and per-node efficiency under strict time constraints.", "keywords": "Reinforcement Learning;Discrete Optimization;Learning Heuristics;Bi-simulation", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/3b741c9b3061a690fea6b72d2b62e0467e1d41de.zip", "author": "Alexander Julian Mattick;Christopher Mutschler", "authorids": "~Alexander_Julian_Mattick1;~Christopher_Mutschler1", "gender": "M;M", "homepage": ";https://www.cmutschler.de", "dblp": "293/7466;118/7748", "google_scholar": ";https://scholar.google.de/citations?user=gKDSp8YAAAAJ", "orcid": "0000-0001-7805-199X;0000-0001-8108-0230", "linkedin": ";christopher-mutschler-28431576/", "or_profile": "~Alexander_Julian_Mattick1;~Christopher_Mutschler1", "aff": "Friedrich-Alexander-Universit\u00e4t, Pattern Recognition Lab;Fraunhofer IIS", "aff_domain": "cs5.fau.de;fraunhofer.de", "position": "Researcher;Principal Researcher", "bibtex": "@misc{\nmattick2024reinforcement,\ntitle={Reinforcement Learning for Node Selection in Branch-and-Bound},\nauthor={Alexander Julian Mattick and Christopher Mutschler},\nyear={2024},\nurl={https://openreview.net/forum?id=0ez68a5UqI}\n}", "github": "", "project": "", "reviewers": "Lp54;KqYv;rRw3;hSJj", "site": "https://openreview.net/forum?id=0ez68a5UqI", "pdf_size": 426340, "rating": "3;3;5;5", "confidence": "4;3;3;3", "soundness": "2;2;3;2", "contribution": "3;1;2;2", "presentation": "2;2;2;3", "wc_summary": "90;46;77;96", "wc_strengths": "121;37;87;58", "wc_weaknesses": "281;284;372;182", "wc_questions": "18;5;74;21", "wc_review": "510;372;610;357", "wc_reply_reviewers": "0;149;79;0", "wc_reply_authors": "1482;912;898;546", "reply_reviewers": "0;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.25, 19.30511590226798 ], "wc_strengths_avg": [ 75.75, 31.586191603293994 ], "wc_weaknesses_avg": [ 279.75, 67.23977617452337 ], "wc_questions_avg": [ 29.5, 26.386549603917523 ], "wc_review_avg": [ 462.25, 104.08259941027607 ], "wc_reply_reviewers_avg": [ 57.0, 62.14096877262214 ], "wc_reply_authors_avg": [ 959.5, 335.42025877993717 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6942358784740442277&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Friedrich-Alexander-Universit\u00e4t;Fraunhofer Institute for Integrated Circuits", "aff_unique_dep": "Pattern Recognition Lab;", "aff_unique_url": "https://www.uni-erlangen.de/;https://www.iis.fraunhofer.de/", "aff_unique_abbr": "FAU;Fraunhofer IIS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "0fSNU64FV7", "title": "Sorting Out Quantum Monte Carlo", "track": "main", "status": "Reject", "tldr": "", "abstract": "Molecular modeling at the quantum level requires choosing a parameterization of the wavefunction that both respects the required symmetries, and is scalable to systems of many particles. For the simulation of fermions, valid parameterizations must be antisymmetric with the transposition of particles. Typically, antisymmetry is enforced by leveraging the anti-symmetry of determinants with respect to exchange of matrix rows, but this involves computing a full determinant each time the wavefunction is evaluated. Instead, we introduce a new antisymmetrization layer derived from sorting, the $\\text{\\emph{sortlet}}$, which scales as $O(N \\log N )$ in the number of particles, in contrast to the $O(N^3)$ of the determinant. We show experimentally that applying this anti-symmeterization layer on top of an attention based neural-network backbone yields a flexible wavefunction parameterization capable of reaching chemical accuracy when approximating the ground state of first-row atoms and molecules.", "keywords": "quantum chemistry;scientific machine learning;quantum monte carlo;quantum statisical mechanics;inductive bias", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/424577c531fc7cfa7fce1d26b33b8ea472ecca91.pdf", "author": "Jack Richter-Powell;Luca Thiede;Alan Aspuru-Guzik;David Duvenaud", "authorids": "~Jack_Richter-Powell1;~Luca_Thiede1;~Alan_Aspuru-Guzik2;~David_Duvenaud2", "gender": "Non-Binary;M;M;M", "homepage": "https://www.jrichterpowell.ca;;http://matter.toronto.edu;https://www.cs.toronto.edu/~duvenaud/", "dblp": ";241/6690;;86/9380", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;Ag_6KEgAAAAJ;https://scholar.google.ca/citations?user=ZLpO3XQAAAAJ", "orcid": ";;0000-0002-8277-4434;", "linkedin": ";;;", "or_profile": "~Jack_Richter-Powell1;~Luca_Thiede1;~Alan_Aspuru-Guzik2;~David_Duvenaud2", "aff": "Massachusetts Institute of Technology;Vector Institute;University of Toronto;Anthropic", "aff_domain": "mit.edu;vectorinstitute.ai;utoronto.ca;anthropic.com", "position": "PhD student;PhD student;Full Professor;Researcher", "bibtex": "@misc{\nrichter-powell2024sorting,\ntitle={Sorting Out Quantum Monte Carlo},\nauthor={Jack Richter-Powell and Luca Thiede and Alan Aspuru-Guzik and David Duvenaud},\nyear={2024},\nurl={https://openreview.net/forum?id=0fSNU64FV7}\n}", "github": "", "project": "", "reviewers": "G9wz;o8Z6;1VSk;TwGB", "site": "https://openreview.net/forum?id=0fSNU64FV7", "pdf_size": 1158565, "rating": "3;3;5;6", "confidence": "4;5;3;4", "soundness": "3;2;2;3", "contribution": "1;2;3;3", "presentation": "4;2;2;3", "wc_summary": "163;52;141;27", "wc_strengths": "35;11;176;32", "wc_weaknesses": "193;186;622;55", "wc_questions": "303;7;100;159", "wc_review": "694;256;1039;273", "wc_reply_reviewers": "166;352;58;0", "wc_reply_authors": "257;539;394;0", "reply_reviewers": "1;3;1;0", "reply_authors": "1;4;1;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 95.75, 57.469013389825996 ], "wc_strengths_avg": [ 63.5, 65.60678318588711 ], "wc_weaknesses_avg": [ 264.0, 213.8749634716509 ], "wc_questions_avg": [ 142.25, 107.46947240961035 ], "wc_review_avg": [ 565.5, 324.8311099633162 ], "wc_reply_reviewers_avg": [ 144.0, 134.05222862750176 ], "wc_reply_authors_avg": [ 297.5, 198.60828280814474 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.5, 1.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3538753825599944540&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Massachusetts Institute of Technology;Vector Institute;University of Toronto;Anthropic", "aff_unique_dep": ";;;", "aff_unique_url": "https://web.mit.edu;https://vectorinstitute.ai/;https://www.utoronto.ca;https://www.anthropic.com", "aff_unique_abbr": "MIT;Vector Institute;U of T;Anthropic", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;Canada" }, { "id": "0fpLLsAynh", "title": "Sporadicity in Decentralized Federated Learning: Theory and Algorithm", "track": "main", "status": "Reject", "tldr": "", "abstract": "Decentralized Federated Learning methods are a family of techniques employed by devices in a distributed setup to (i) reach consensus over a common model which (ii) is optimal with respect to the global objective function. As this is carried out without the presence of any centralized server, prominent challenges of conventional Federated Learning become even more significant, namely heterogeneous data distributions among devices and their varying resource capabilities. In this work, we propose $\\textit{Decentralized Sporadic Federated Learning}$ ($\\texttt{DSpodFL}$), which introduces sporadicity to decentralized federated learning. $\\texttt{DSpodFL}$ includes sporadic stochastic gradient calculations and model exchanges for aggregations. Our motivation is to achieve joint computation and communication savings without losing statistical performance. We prove that by using a constant step size, our method achieves a geometric convergence rate to a finite optimality gap. Through numerical evaluation, we demonstrate the resource savings achieved by $\\texttt{DSpodFL}$ compared to the existing baselines.", "keywords": "Decentralized Federated Learning;Distributed Optimization;Sporadicity;Resource Efficiency;Sporadic SGDs;Anarchic Federated Learning", "primary_area": "optimization", "supplementary_material": "/attachment/63b24031205cd54823f86ccbf5d1a7ebceb5631a.pdf", "author": "Shahryar Zehtabi;Rohit Parasnis;Seyyedali Hosseinalipour;Christopher Brinton", "authorids": "~Shahryar_Zehtabi1;~Rohit_Parasnis1;~Seyyedali_Hosseinalipour1;~Christopher_Brinton1", "gender": ";M;;", "homepage": ";;;https://www.cbrinton.net/", "dblp": ";;;", "google_scholar": "kmeHQn4AAAAJ;DcAdzxQAAAAJ;;vWmHA5MAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shahryar_Zehtabi1;~Rohit_Parasnis1;~Seyyedali_Hosseinalipour1;~Christopher_Brinton1", "aff": "Purdue University;Massachusetts Institute of Technology;;Purdue University", "aff_domain": "purdue.edu;mit.edu;;purdue.edu", "position": "PhD student;Postdoc;;Assistant Professor", "bibtex": "@misc{\nzehtabi2024sporadicity,\ntitle={Sporadicity in Decentralized Federated Learning: Theory and Algorithm},\nauthor={Shahryar Zehtabi and Rohit Parasnis and Seyyedali Hosseinalipour and Christopher Brinton},\nyear={2024},\nurl={https://openreview.net/forum?id=0fpLLsAynh}\n}", "github": "", "project": "", "reviewers": "Mn6u;Sqyj;RbX5", "site": "https://openreview.net/forum?id=0fpLLsAynh", "pdf_size": 401422, "rating": "3;3;5", "confidence": "4;4;4", "soundness": "2;3;3", "contribution": "1;1;2", "presentation": "2;2;3", "wc_summary": "41;79;111", "wc_strengths": "11;51;50", "wc_weaknesses": "239;132;211", "wc_questions": "72;181;3", "wc_review": "363;443;375", "wc_reply_reviewers": "12;253;24", "wc_reply_authors": "671;1730;612", "reply_reviewers": "1;1;1", "reply_authors": "2;3;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 77.0, 28.61235164516658 ], "wc_strengths_avg": [ 37.333333333333336, 18.624953392931992 ], "wc_weaknesses_avg": [ 194.0, 45.306364527146364 ], "wc_questions_avg": [ 85.33333333333333, 73.27725128275183 ], "wc_review_avg": [ 393.6666666666667, 35.22625283632775 ], "wc_reply_reviewers_avg": [ 96.33333333333333, 110.8883322185983 ], "wc_reply_authors_avg": [ 1004.3333333333334, 513.688837938126 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OahLiBq3M_IJ:scholar.google.com/&scioq=Sporadicity+in+Decentralized+Federated+Learning:+Theory+and+Algorithm&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Purdue University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://web.mit.edu", "aff_unique_abbr": "Purdue;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0gDQgwjoX0", "title": "Stochastic Gradient Discrete Langevin Dynamics", "track": "main", "status": "Reject", "tldr": "", "abstract": "Sampling via Markov chain Monte Carlo can be inefficient when each evaluation of the gradient of energy function depends on a large dataset. In continuous spaces, this challenge has been addressed by extending Langevin samplers with stochastic gradient estimators. However, such an approach cannot be directly applied to discrete spaces, as a naive migration leads to biased estimation with large variance. To fill this gap, we propose a new sampling strategy, \\emph{Stochastic Gradient Discrete Langevin Dynamics}, to provide the first practical method for stochastic distribution sampling in discrete spaces. Our approach mitigates the bias of naive ``gradient'' estimators via a novel caching scheme, and reduces the estimation variance by introducing a modified Polyak step size control for simulation time adaptation. We demonstrate significant efficiency improvements across various sampling problems in discrete spaces, including Bayesian learning, stochastic integer programming, and prompt tuning for text-image models.", "keywords": "Stochastic Gradient;Langevin Dynamics;Discrete Langevin Dynamics;MCMC;Discrete Sampling", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Haoran Sun;Bethany Yixin Wang;Katayoon Goshvadi;Yuan Xue;Dale Schuurmans;Hanjun Dai", "authorids": "~Haoran_Sun2;~Bethany_Yixin_Wang1;~Katayoon_Goshvadi1;~Yuan_Xue5;~Dale_Schuurmans1;~Hanjun_Dai1", "gender": "M;F;F;F;;M", "homepage": ";;;;;https://hanjun-dai.github.io", "dblp": ";334/1546;;;;144/7311", "google_scholar": "p7of_yoAAAAJ;;;jcatRRIAAAAJ;;obpl7GQAAAAJ", "orcid": ";;;;;", "linkedin": ";yixin-wang/;katayoon-goshvadi/;yuan-emily-xue-3483012;;hanjun-dai", "or_profile": "~Haoran_Sun2;~Bethany_Yixin_Wang1;~Katayoon_Goshvadi1;~Yuan_Xue5;~Dale_Schuurmans1;~Hanjun_Dai1", "aff": ";Google;;Google;;Google Research", "aff_domain": ";google.com;;google.com;;google.com", "position": ";Researcher;;Researcher;;Researcher", "bibtex": "@misc{\nsun2024stochastic,\ntitle={Stochastic Gradient Discrete Langevin Dynamics},\nauthor={Haoran Sun and Bethany Yixin Wang and Katayoon Goshvadi and Yuan Xue and Dale Schuurmans and Hanjun Dai},\nyear={2024},\nurl={https://openreview.net/forum?id=0gDQgwjoX0}\n}", "github": "", "project": "", "reviewers": "m4Ve;Ap8o;i63H", "site": "https://openreview.net/forum?id=0gDQgwjoX0", "pdf_size": 6662571, "rating": "3;5;6", "confidence": "2;4;2", "soundness": "2;2;3", "contribution": "2;2;2", "presentation": "1;3;3", "wc_summary": "151;54;34", "wc_strengths": "39;30;55", "wc_weaknesses": "98;35;40", "wc_questions": "154;305;2", "wc_review": "442;424;131", "wc_reply_reviewers": "0;0;19", "wc_reply_authors": "716;356;333", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 79.66666666666667, 51.09685791600975 ], "wc_strengths_avg": [ 41.333333333333336, 10.338708279513881 ], "wc_weaknesses_avg": [ 57.666666666666664, 28.592928418676454 ], "wc_questions_avg": [ 153.66666666666666, 123.69945656936233 ], "wc_review_avg": [ 332.3333333333333, 142.55369358790938 ], "wc_reply_reviewers_avg": [ 6.333333333333333, 8.956685895029603 ], "wc_reply_authors_avg": [ 468.3333333333333, 175.37832122459022 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.1889822365046137, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3md5aSvelN8J:scholar.google.com/&scioq=Stochastic+Gradient+Discrete+Langevin+Dynamics&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "TopoMLP: A Simple yet Strong Pipeline for Driving Topology Reasoning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19610", "id": "0gTW5JUFTW", "author_site": "Dongming Wu, Jiahao Chang, Fan Jia, Yingfei Liu, Tiancai Wang, Jianbing Shen", "tldr": "", "abstract": "Topology reasoning aims to comprehensively understand road scenes and present drivable routes in autonomous driving. It requires detecting road centerlines (lane) and traffic elements, further reasoning their topology relationship, \\textit{i.e.}, lane-lane topology, and lane-traffic topology. In this work, we first present that the topology score relies heavily on detection performance on lane and traffic elements. Therefore, we introduce a powerful 3D lane detector and an improved 2D traffic element detector to extend the upper limit of topology performance. Further, we propose TopoMLP, a simple yet high-performance pipeline for driving topology reasoning. Based on the impressive detection performance, we develop two simple MLP-based heads for topology generation. TopoMLP achieves state-of-the-art performance on OpenLane-V2 dataset, \\textit{i.e.}, 41.2\\% OLS with ResNet-50 backbone. It is also the 1st solution for 1st OpenLane Topology in Autonomous Driving Challenge. We hope such simple and strong pipeline can provide some new insights to the community. Code is at https://github.com/wudongming97/TopoMLP.", "keywords": "Autonomous Driving;Driving Topology Understanding", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Dongming Wu;Jiahao Chang;Fan Jia;Yingfei Liu;Tiancai Wang;Jianbing Shen", "authorids": "~Dongming_Wu1;~Jiahao_Chang3;~Fan_Jia7;~Yingfei_Liu1;~Tiancai_Wang1;~Jianbing_Shen1", "gender": "M;M;M;M;M;M", "homepage": "https://wudongming97.github.io/;;;;https://www.wangeniusky.com/;https://scholar.google.com/citations?user=_Q3NTToAAAAJ&hl=en", "dblp": ";;;13/5577;179/0530;38/5435", "google_scholar": "ejFCAq0AAAAJ;HA5zLp4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=YI0sRroAAAAJ;_Q3NTToAAAAJ", "orcid": "0000-0003-4938-5813;0009-0009-6877-1649;0000-0002-0252-7207;0000-0002-2412-5225;;0000-0003-2656-3082", "linkedin": ";;;;;", "or_profile": "~Dongming_Wu1;~Jiahao_Chang3;~Fan_Jia7;~Yingfei_Liu1;~Tiancai_Wang1;~Jianbing_Shen1", "aff": "Beijing Institute of Technology;The Chinese University of Hong Kong, Shenzhen;Megvii Technology Inc.;Megvii Technology Inc.;Megvii Technology Inc.;University of Macau", "aff_domain": "bit.edu.cn;cuhk.edu.cn;megvii.com;megvii.com;megvii.com;um.edu.mo", "position": "PhD student;Intern;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nwu2024topomlp,\ntitle={Topo{MLP}: A Simple yet Strong Pipeline for Driving Topology Reasoning},\nauthor={Dongming Wu and Jiahao Chang and Fan Jia and Yingfei Liu and Tiancai Wang and Jianbing Shen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0gTW5JUFTW}\n}", "github": "", "project": "", "reviewers": "bKsZ;tnze;MJLZ;Cep6", "pdf_size": 2432095, "rating": "6;6;6;8", "confidence": "5;4;3;4", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "132;54;230;87", "wc_strengths": "162;97;27;56", "wc_weaknesses": "75;126;66;73", "wc_questions": "178;235;35;48", "wc_review": "547;512;358;264", "wc_reply_reviewers": "23;28;24;15", "wc_reply_authors": "403;685;316;317", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 125.75, 66.25094338950956 ], "wc_strengths_avg": [ 85.5, 50.6877697280123 ], "wc_weaknesses_avg": [ 85.0, 23.90606617576384 ], "wc_questions_avg": [ 124.0, 85.04998530276181 ], "wc_review_avg": [ 420.25, 114.86160150372272 ], "wc_reply_reviewers_avg": [ 22.5, 4.716990566028302 ], "wc_reply_authors_avg": [ 430.25, 151.2603302257403 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15140104278097721464&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=0gTW5JUFTW", "pdf": "https://openreview.net/pdf?id=0gTW5JUFTW", "email": "bit.edu.cn;cuhk.edu.cn;megvii.com;megvii.com;megvii.com;um.edu.mo", "author_num": 6, "aff_unique_index": "0;1;2;2;2;3", "aff_unique_norm": "Beijing Institute of Technology;Chinese University of Hong Kong;Megvii Technology;University of Macau", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.cuhk.edu.cn;https://www.megvii.com;https://www.um.edu.mo", "aff_unique_abbr": "BIT;CUHK;Megvii;UM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenzhen;Macau SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Variance Reduced Halpern Iteration for Finite-Sum Monotone Inclusions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19609", "id": "0i6Z9N5MLY", "author_site": "Xufeng Cai, Ahmet Alacaoglu, Jelena Diakonikolas", "tldr": "", "abstract": "Machine learning approaches relying on such criteria as adversarial robustness or multi-agent settings have raised the need for solving game-theoretic equilibrium problems. Of particular relevance to these applications are methods targeting finite-sum structure, which generically arises in empirical variants of learning problems in these contexts. Further, methods with computable approximation errors are highly desirable, as they provide verifiable exit criteria. Motivated by these applications, we study finite-sum monotone inclusion problems, which model broad classes of equilibrium problems. Our main contributions are variants of the classical Halpern iteration that employ variance reduction to obtain improved complexity guarantees in which $n$ component operators in the finite sum are ``on average'' either cocoercive or Lipschitz continuous and monotone, with parameter $L$. The resulting oracle complexity of our methods, which provide guarantees for the last iterate and for a (computable) operator norm residual, is $\\widetilde{\\mathcal{O}}( n + \\sqrt{n}L\\varepsilon^{-1})$, which improves upon existing methods by a factor up to $\\sqrt{n}$. This constitutes the first variance reduction-type result for general finite-sum monotone inclusions and for more specific problems such as convex-concave optimization when operator norm residual is the optimality measure. We further argue that, up to poly-logarithmic factors, this complexity is unimprovable in the monotone Lipschitz setting; i.e., the provided result is near-optimal.", "keywords": "finite-sum problems;monotone inclusion;operator norm residual;stochastic Halpern iteration;last iterate convergence;variance reduction;min-max optimization", "primary_area": "optimization", "supplementary_material": "/attachment/279e1f4321206702dce0ad254d5fbaebb5cc217f.zip", "author": "Xufeng Cai;Ahmet Alacaoglu;Jelena Diakonikolas", "authorids": "~Xufeng_Cai1;~Ahmet_Alacaoglu2;~Jelena_Diakonikolas2", "gender": ";;F", "homepage": ";https://ahmetalacaoglu.github.io;http://www.jelena-diakonikolas.com/", "dblp": ";209/4889;147/5178", "google_scholar": ";-yRi8D4AAAAJ;J8ixfu8AAAAJ", "orcid": ";;0000-0003-3439-0310", "linkedin": ";;", "or_profile": "~Xufeng_Cai1;~Ahmet_Alacaoglu2;~Jelena_Diakonikolas2", "aff": ";University of Wisconsin-Madison;University of Wisconsin, Madison", "aff_domain": ";wisc.edu;wisc.edu", "position": ";Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ncai2024variance,\ntitle={Variance Reduced Halpern Iteration for Finite-Sum Monotone Inclusions},\nauthor={Xufeng Cai and Ahmet Alacaoglu and Jelena Diakonikolas},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0i6Z9N5MLY}\n}", "github": "", "project": "", "reviewers": "9YKM;wjm9;P51Q", "pdf_size": 689876, "rating": "6;6;6", "confidence": "3;3;3", "soundness": "3;3;3", "contribution": "3;3;2", "presentation": "3;2;3", "wc_summary": "69;48;69", "wc_strengths": "56;28;59", "wc_weaknesses": "412;635;149", "wc_questions": "2;308;42", "wc_review": "539;1019;319", "wc_reply_reviewers": "21;495;0", "wc_reply_authors": "2298;2809;1528", "reply_reviewers": "1;1;0", "reply_authors": "5;6;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 62.0, 9.899494936611665 ], "wc_strengths_avg": [ 47.666666666666664, 13.960261060914616 ], "wc_weaknesses_avg": [ 398.6666666666667, 198.63254740572825 ], "wc_questions_avg": [ 117.33333333333333, 135.80705267236883 ], "wc_review_avg": [ 625.6666666666666, 292.27080289043965 ], "wc_reply_reviewers_avg": [ 172.0, 228.55633878761708 ], "wc_reply_authors_avg": [ 2211.6666666666665, 526.5170673608048 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 4.666666666666667, 1.247219128924647 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6364381838039303577&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=0i6Z9N5MLY", "pdf": "https://openreview.net/pdf?id=0i6Z9N5MLY", "email": ";wisc.edu;wisc.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "UNR-Explainer: Counterfactual Explanations for Unsupervised Node Representation Learning Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19608", "id": "0j9ZDzMPqr", "author_site": "Hyunju Kang, Geonhee Han, Hogun Park", "tldr": "", "abstract": "Node representation learning, such as Graph Neural Networks (GNNs), has become one of the important learning methods in machine learning, and the demand for reliable explanation generation is growing. Despite extensive research on explanation generation for supervised node representation learning, explaining unsupervised models has been less explored. To address this gap, we propose a method for generating counterfactual (CF) explanations in unsupervised node representation learning, aiming to identify the most important subgraphs that cause a significant change in the $k$-nearest neighbors of a node of interest in the learned embedding space upon perturbation. The $k$-nearest neighbor-based CF explanation method provides simple, yet pivotal, information for understanding unsupervised downstream tasks, such as top-$k$ link prediction and clustering. Furthermore, we introduce a Monte Carlo Tree Search (MCTS)-based explainability method for generating expressive CF explanations for **U**nsupervised **N**ode **R**epresentation learning methods, which we call **UNR-Explainer**. The proposed method demonstrates improved performance on six datasets for both unsupervised GraphSAGE and DGI.", "keywords": "XAI;Unsupervised node representation learning;Counterfactual Explanations", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Hyunju Kang;Geonhee Han;Hogun Park", "authorids": "~Hyunju_Kang1;~Geonhee_Han1;~Hogun_Park2", "gender": ";M;", "homepage": ";https://learndatalab.github.io/;https://hogunpark.com", "dblp": "382/4096;382/3962;05/3540", "google_scholar": "yI623McAAAAJ;1IbJqfkAAAAJ;0YEYuGIAAAAJ", "orcid": ";0009-0000-8967-5255;0000-0003-0576-5806", "linkedin": ";;hogunpark/en", "or_profile": "~Hyunju_Kang1;~Geonhee_Han1;~Hogun_Park2", "aff": "Sungkyunkwan University;Sung Kyun Kwan University;Sungkyunkwan University", "aff_domain": "skku.edu;skku.edu;skku.edu", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nkang2024unrexplainer,\ntitle={{UNR}-Explainer: Counterfactual Explanations for Unsupervised Node Representation Learning Models},\nauthor={Hyunju Kang and Geonhee Han and Hogun Park},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0j9ZDzMPqr}\n}", "github": "", "project": "", "reviewers": "TnSw;8JCL;eAe7;WfpS", "pdf_size": 1935301, "rating": "6;6;6;8", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "contribution": "2;3;2;3", "presentation": "2;2;3;3", "wc_summary": "80;87;70;392", "wc_strengths": "52;109;51;169", "wc_weaknesses": "77;109;174;120", "wc_questions": "27;105;70;94", "wc_review": "236;410;365;775", "wc_reply_reviewers": "0;13;11;0", "wc_reply_authors": "474;995;1523;284", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;3;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 157.25, 135.66756244585514 ], "wc_strengths_avg": [ 95.25, 48.6229112661922 ], "wc_weaknesses_avg": [ 120.0, 34.94996423460259 ], "wc_questions_avg": [ 74.0, 29.941609843159736 ], "wc_review_avg": [ 446.5, 200.12308712389984 ], "wc_reply_reviewers_avg": [ 6.0, 6.041522986797286 ], "wc_reply_authors_avg": [ 819.0, 482.6598180913758 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14211516623000206023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=0j9ZDzMPqr", "pdf": "https://openreview.net/pdf?id=0j9ZDzMPqr", "email": "skku.edu;skku.edu;skku.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Magic123: One Image to High-Quality 3D Object Generation Using Both 2D and 3D Diffusion Priors", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19607", "id": "0jHkUDyEO9", "author_site": "Guocheng Qian, Jinjie Mai, Abdullah Hamdi, Jian Ren, Aliaksandr Siarohin, Bing Li, Hsin-Ying Lee, Ivan Skorokhodov, Peter Wonka, Sergey Tulyakov, Bernard Ghanem", "tldr": "", "abstract": "We present ``Magic123'', a two-stage coarse-to-fine approach for high-quality, textured 3D mesh generation from a single image in the wild using *both 2D and 3D priors*. In the first stage, we optimize a neural radiance field to produce a coarse geometry. In the second stage, we adopt a memory-efficient differentiable mesh representation to yield a high-resolution mesh with a visually appealing texture. In both stages, the 3D content is learned through reference-view supervision and novel-view guidance by a joint 2D and 3D diffusion prior. We introduce a trade-off parameter between the 2D and 3D priors to control the details and 3D consistencies of the generation. Magic123 demonstrates a significant improvement over previous image-to-3D techniques, as validated through extensive experiments on diverse synthetic and real-world images.", "keywords": "Neural Radiance Fields;Shape from Image;Generative 3D models", "primary_area": "generative models", "supplementary_material": "/attachment/4ad77057b440a0face0239bab4dbc19a1d7954d8.zip", "author": "Guocheng Qian;Jinjie Mai;Abdullah Hamdi;Jian Ren;Aliaksandr Siarohin;Bing Li;Hsin-Ying Lee;Ivan Skorokhodov;Peter Wonka;Sergey Tulyakov;Bernard Ghanem", "authorids": "~Guocheng_Qian1;~Jinjie_Mai1;~Abdullah_Hamdi1;~Jian_Ren2;~Aliaksandr_Siarohin1;~Bing_Li7;~Hsin-Ying_Lee2;~Ivan_Skorokhodov1;~Peter_Wonka1;~Sergey_Tulyakov1;~Bernard_Ghanem1", "gender": "M;M;M;M;F;M;M;M;M;M;M", "homepage": "https://guochengqian.github.io/;;https://alanspike.github.io/;;https://cemse.kaust.edu.sa/vcc/people/person/bing-li;http://hsinyinglee.com/;https://universome.github.io/;http://peterwonka.net;http://www.stulyakov.com/;https://ivul.kaust.edu.sa;https://abdullahamdi.com/", "dblp": "241/7000;272/0975;59/2180-5;199/1971;13/2692-24;149/7976-1.html;223/0010;98/5522;40/6115;37/2516;205/2625", "google_scholar": "DUDaxg4AAAAJ;;https://scholar.google.co.jp/citations?user=vDALiU4AAAAJ;https://scholar.google.it/citations?user=uMl5-k4AAAAJ;;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=0EKXSXgAAAAJ;mgzXR0sAAAAJ;rVsGTeEAAAAJ;tQkWPKAAAAAJ", "orcid": "0000-0002-2935-8570;0000-0002-3396-1970;;;;;0000-0002-7611-9310;0000-0003-0627-9746;;0000-0002-5534-587X;0000-0003-3989-7540", "linkedin": "guochengqian/;;;;;;ivan-skorokhodov;;sergeytulyakov/;bernardghanem/;ajhamdi/", "or_profile": "~Guocheng_Qian1;~Jinjie_Mai1;~Jian_Ren2;~Aliaksandr_Siarohin1;~Bing_Li7;~Hsin-Ying_Lee2;~Ivan_Skorokhodov1;~Peter_Wonka1;~Sergey_Tulyakov1;~Bernard_Ghanem1;~Abdullah_Jamal_Hamdi1", "aff": "Snap Inc.;King Abdullah University of Science and Technology;Snap Inc.;Snap Inc.;KAUST;Snap Inc.;Snap Inc.;KAUST;Snap Inc.;King Abdullah University of Science and Technology;University of Oxford", "aff_domain": "snapchat.com;kaust.edu.sa;snapchat.com;snapchat.com;kaust.edu.sa;snap.com;snap.com;kaust.edu.sa;snapchat.com;kaust.edu.sa;eng.ox.ac.uk", "position": "Researcher;MS student;Research Scientist;Intern;Postdoc;Researcher;Researcher;Full Professor;Director of Research;Full Professor;Postdoc", "bibtex": "@inproceedings{\nqian2024magic,\ntitle={Magic123: One Image to High-Quality 3D Object Generation Using Both 2D and 3D Diffusion Priors},\nauthor={Guocheng Qian and Jinjie Mai and Abdullah Hamdi and Jian Ren and Aliaksandr Siarohin and Bing Li and Hsin-Ying Lee and Ivan Skorokhodov and Peter Wonka and Sergey Tulyakov and Bernard Ghanem},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0jHkUDyEO9}\n}", "github": "", "project": "", "reviewers": "jWBN;UQdQ;25qf;Ea4Z", "pdf_size": 26253959, "rating": "5;5;8;8", "confidence": "4;5;5;4", "soundness": "2;3;3;4", "contribution": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "66;54;84;85", "wc_strengths": "48;40;259;116", "wc_weaknesses": "224;140;321;225", "wc_questions": "120;19;25;35", "wc_review": "458;253;689;461", "wc_reply_reviewers": "0;133;82;10", "wc_reply_authors": "951;626;945;387", "reply_reviewers": "0;2;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.25, 12.968712349342937 ], "wc_strengths_avg": [ 115.75, 87.81906114278381 ], "wc_weaknesses_avg": [ 227.5, 64.06442070291435 ], "wc_questions_avg": [ 49.75, 40.959583738119214 ], "wc_review_avg": [ 465.25, 154.26012932705586 ], "wc_reply_reviewers_avg": [ 56.25, 54.444352324185104 ], "wc_reply_authors_avg": [ 727.25, 236.37932968007163 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 356, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9455191109482969642&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=0jHkUDyEO9", "pdf": "https://openreview.net/pdf?id=0jHkUDyEO9", "email": "snapchat.com;kaust.edu.sa;snapchat.com;snapchat.com;kaust.edu.sa;snap.com;snap.com;kaust.edu.sa;snapchat.com;kaust.edu.sa;eng.ox.ac.uk", "author_num": 11, "aff_unique_index": "0;1;0;0;1;0;0;1;0;1;2", "aff_unique_norm": "Snap Inc.;King Abdullah University of Science and Technology;University of Oxford", "aff_unique_dep": ";;", "aff_unique_url": "https://www.snapinc.com;https://www.kast.kau.edu.sa;https://www.ox.ac.uk", "aff_unique_abbr": "Snap;KAUST;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;0;0;1;0;1;2", "aff_country_unique": "United States;Saudi Arabia;United Kingdom" }, { "title": "Sparse Spiking Neural Network: Exploiting Heterogeneity in Timescales for Pruning Recurrent SNN", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19606", "id": "0jsfesDZDq", "author_site": "Biswadeep Chakraborty, Beomseok Kang, Harshit Kumar, Saibal Mukhopadhyay", "tldr": "", "abstract": "Recurrent Spiking Neural Networks (RSNNs) have emerged as a computationally efficient and brain-inspired machine learning model. The design of sparse RSNNs with fewer neurons and synapses helps reduce the computational complexity of RSNNs. Traditionally, sparse SNNs are obtained by first training a dense and complex SNN for a target task and, next, eliminating neurons with low activity (activity-based pruning) while maintaining task performance. In contrast, this paper presents a task-agnostic methodology for designing sparse RSNNs by pruning an untrained (arbitrarily initialized) large model. \nWe introduce a novel Lyapunov Noise Pruning (LNP) algorithm that uses graph sparsification methods and utilizes Lyapunov exponents to design a stable sparse RSNN from an untrained RSNN. We show that the LNP can leverage diversity in neuronal timescales to design a sparse Heterogeneous RSNN (HRSNN). Further, we show that the same sparse HRSNN model can be trained for different tasks, such as image classification and time-series prediction. The experimental results show that, in spite of being task-agnostic, LNP increases computational efficiency (fewer neurons and synapses) and prediction performance of RSNNs compared to traditional activity-based pruning of trained dense models.", "keywords": "spiking neural network;SNN;network pruning;stability;neuromorphic;leaky integrate and fire;STDP;sparsification;task-agnostic pruning;timescale optimization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/d86e0dbdc96d62a023bc91a41b638f899c25509b.zip", "author": "Biswadeep Chakraborty;Beomseok Kang;Harshit Kumar;Saibal Mukhopadhyay", "authorids": "~Biswadeep_Chakraborty1;~Beomseok_Kang1;hkumar64@gatech.edu;~Saibal_Mukhopadhyay2", "gender": "M;M;;M", "homepage": ";https://sites.google.com/view/beomseok-kang;;https://greenlab.ece.gatech.edu", "dblp": "238/0554;309/1053;;66/1210", "google_scholar": "8soIjY8AAAAJ;kbqaf1EAAAAJ;;5KRtMEkAAAAJ", "orcid": ";;;0000-0002-8894-3390", "linkedin": ";;;", "or_profile": "~Biswadeep_Chakraborty1;~Beomseok_Kang1;hkumar64@gatech.edu;~Saibal_Mukhopadhyay2", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;;gatech.edu", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nchakraborty2024sparse,\ntitle={Sparse Spiking Neural Network: Exploiting Heterogeneity in Timescales for Pruning Recurrent {SNN}},\nauthor={Biswadeep Chakraborty and Beomseok Kang and Harshit Kumar and Saibal Mukhopadhyay},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0jsfesDZDq}\n}", "github": "", "project": "", "reviewers": "mdv5;MeNm;YTsp;udA5", "pdf_size": 12466223, "rating": "5;6;8;8", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "145;40;113;83", "wc_strengths": "18;64;63;69", "wc_weaknesses": "157;79;121;105", "wc_questions": "4;101;25;164", "wc_review": "324;284;322;421", "wc_reply_reviewers": "0;0;80;50", "wc_reply_authors": "1684;1534;1557;2817", "reply_reviewers": "0;0;1;1", "reply_authors": "3;3;3;6", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.25, 38.70642711488623 ], "wc_strengths_avg": [ 53.5, 20.62159062730128 ], "wc_weaknesses_avg": [ 115.5, 28.26216552212516 ], "wc_questions_avg": [ 73.5, 63.5 ], "wc_review_avg": [ 337.75, 50.637807022026536 ], "wc_reply_reviewers_avg": [ 32.5, 34.18698582794336 ], "wc_reply_authors_avg": [ 1898.0, 533.6511032500542 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14817744695773258013&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=0jsfesDZDq", "pdf": "https://openreview.net/pdf?id=0jsfesDZDq", "email": "gatech.edu;gatech.edu;;gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0k85noSawb", "title": "Variance-Covariance Regularization Improves Representation Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Transfer learning plays a key role in advancing machine learning models, yet conventional supervised pretraining often undermines feature transferability by prioritizing features that minimize the pretraining loss. Recent progress in self-supervised learning (SSL) has introduced regularization techniques that bolster feature transferability. In this work, we adapt an SSL regularization technique from the VICReg method to supervised learning contexts, introducing Variance-Covariance Regularization (VCReg). This adaptation encourages the network to learn a high-variance, low-covariance representation, promoting the learning of more diverse features. We outline best practices for implementing this regularization framework into various neural network architectures and present an optimized strategy for regularizing intermediate representations. Through extensive empirical evaluation, we demonstrate that our method significantly enhances transfer learning, achieving excellent performance across numerous tasks and datasets. VCReg also improves performance in scenarios like long-tail learning, and hierarchical classification. Additionally, we conduct analyses to suggest that its effectiveness may stem from its success in addressing challenges like gradient starvation and neural collapse. In summary, VCReg offers a universally applicable regularization framework that significantly advances the state of transfer learning, highlights the connection between gradient starvation, neural collapse, and feature transferability, and potentially opens a new avenue for regularization in this domain.", "keywords": "Representation Learning;Transfer Learning;Regularization", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Jiachen Zhu;Ravid Shwartz-Ziv;Yubei Chen;Yann LeCun", "authorids": "~Jiachen_Zhu1;~Ravid_Shwartz-Ziv2;~Yubei_Chen1;~Yann_LeCun1", "gender": "M;M;M;M", "homepage": "https://cs.nyu.edu/~jz3224/;https://redwood.berkeley.edu/people/yubei-chen/;http://yann.lecun.com;https://www.ravid-shwartz-ziv.com/", "dblp": "250/0741-2;30/10064;l/YannLeCun;", "google_scholar": "https://scholar.google.com/citations?hl=en;WeyLqFUAAAAJ;WLN3QrAAAAAJ;https://scholar.google.co.il/citations?user=SqsLFwMAAAAJ", "orcid": ";;;", "linkedin": ";yubei-chen-05998a39/;;", "or_profile": "~Jiachen_Zhu1;~Yubei_Chen1;~Yann_LeCun1;~ravid_ziv1", "aff": "New York University;University of California, Davis;New York University;New York University", "aff_domain": "nyu.edu;ucdavis.edu;nyu.edu;nyu.edu", "position": "PhD student;Assistant Professor;Full Professor;Postdoc", "bibtex": "@misc{\nzhu2024variancecovariance,\ntitle={Variance-Covariance Regularization Improves Representation Learning},\nauthor={Jiachen Zhu and Ravid Shwartz-Ziv and Yubei Chen and Yann LeCun},\nyear={2024},\nurl={https://openreview.net/forum?id=0k85noSawb}\n}", "github": "", "project": "", "reviewers": "NHSr;qZNE;mHKo;DtqU", "site": "https://openreview.net/forum?id=0k85noSawb", "pdf_size": 378590, "rating": "3;3;5;5", "confidence": "4;4;4;3", "soundness": "1;2;2;2", "contribution": "1;2;2;2", "presentation": "1;4;3;3", "wc_summary": "49;85;27;102", "wc_strengths": "9;70;33;115", "wc_weaknesses": "224;127;189;233", "wc_questions": "760;168;2;106", "wc_review": "1042;450;251;556", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 65.75, 29.439556722206262 ], "wc_strengths_avg": [ 56.75, 40.03982392568679 ], "wc_weaknesses_avg": [ 193.25, 41.63156855080049 ], "wc_questions_avg": [ 259.0, 295.2710619075293 ], "wc_review_avg": [ 574.75, 291.1403226968054 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3137145687617437765&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "New York University;University of California, Davis", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.ucdavis.edu", "aff_unique_abbr": "NYU;UC Davis", "aff_campus_unique_index": "1", "aff_campus_unique": ";Davis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MINDE: Mutual Information Neural Diffusion Estimation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19605", "id": "0kWd8SJq8d", "author_site": "Giulio Franzese, Mustapha BOUNOUA, Pietro Michiardi", "tldr": "", "abstract": "In this work we present a new method for the estimation of Mutual Information (MI) between random variables. Our approach is based on an original interpretation of the Girsanov theorem, which allows us to use score-based diffusion models to estimate the KL divergence between two densities as a difference between their score functions. As a by-product, our method also enables the estimation of the entropy of random variables. \nArmed with such building blocks, we present a general recipe to measure MI, which unfolds in two directions: one uses conditional diffusion process, whereas the other uses joint diffusion processes that allow simultaneous modelling of two random variables. \nOur results, which derive from a thorough experimental protocol over all the variants of our approach, indicate that our method is more accurate than the main alternatives from the literature, especially for challenging distributions. Furthermore, our methods pass MI self-consistency tests, including data processing and additivity under independence, which instead are a pain-point of existing methods", "keywords": "mutual information;score matching;diffusion models", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/21f8712040fa29b7638eb83c95cfc3888fe59c7b.zip", "author": "Giulio Franzese;Mustapha BOUNOUA;Pietro Michiardi", "authorids": "~Giulio_Franzese1;~Mustapha_BOUNOUA1;~Pietro_Michiardi1", "gender": "M;M;M", "homepage": ";https://mustaphabounoua.github.io/;http://www.eurecom.fr/~michiard/", "dblp": "217/1859.html;348/9789;54/3028", "google_scholar": "kEtx_WwAAAAJ;1ooHDEMAAAAJ;https://scholar.google.com.tw/citations?user=mlx1eCgAAAAJ", "orcid": "0000-0003-4244-2053;0009-0003-5244-8528;", "linkedin": ";mustb/;", "or_profile": "~Giulio_Franzese1;~Mustapha_BOUNOUA1;~Pietro_Michiardi1", "aff": "Eurecom;Eurecom;EURECOM", "aff_domain": "eurecom.fr;eurecom.fr;eurecom.fr", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nfranzese2024minde,\ntitle={{MINDE}: Mutual Information Neural Diffusion Estimation},\nauthor={Giulio Franzese and Mustapha BOUNOUA and Pietro Michiardi},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0kWd8SJq8d}\n}", "github": "", "project": "", "reviewers": "QLQ9;hHbr;N2Vu;a1nb", "pdf_size": 19358556, "rating": "6;6;6;8", "confidence": "2;2;3;3", "soundness": "3;2;4;4", "contribution": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "83;121;11;76", "wc_strengths": "88;57;32;79", "wc_weaknesses": "106;93;734;249", "wc_questions": "460;86;57;74", "wc_review": "737;357;834;478", "wc_reply_reviewers": "352;27;500;151", "wc_reply_authors": "1672;616;1665;989", "reply_reviewers": "1;1;3;1", "reply_authors": "4;2;5;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.75, 39.5498103661699 ], "wc_strengths_avg": [ 64.0, 21.644860821913362 ], "wc_weaknesses_avg": [ 295.5, 260.4616094552132 ], "wc_questions_avg": [ 169.25, 168.18052057238972 ], "wc_review_avg": [ 601.5, 191.99544265424635 ], "wc_reply_reviewers_avg": [ 257.5, 181.80277775655685 ], "wc_reply_authors_avg": [ 1235.5, 452.6436236157536 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12753365406615297679&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=0kWd8SJq8d", "pdf": "https://openreview.net/pdf?id=0kWd8SJq8d", "email": "eurecom.fr;eurecom.fr;eurecom.fr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "EURECOM", "aff_unique_dep": "", "aff_unique_url": "https://www.eurecom.fr", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "id": "0kvrymILfy", "title": "Making Predictors More Reliable with Selective Recalibration", "track": "main", "status": "Reject", "tldr": "", "abstract": "A reliable deep learning system should be able to accurately express its confidence with respect to its predictions, a quality known as calibration. One of the most effective ways to produce reliable confidence estimates with a pre-trained model is by applying a post-hoc recalibration method. Popular recalibration methods like temperature scaling are typically fit on a small amount of data and work in the model's output space, as opposed to the more expressive feature embedding space, and thus usually have only one or a handful of parameters. However, the target distribution to which they are applied is often complex and difficult to fit well with such a function. To this end we propose selective recalibration, where a selection model learns to reject some user-chosen proportion of the data in order to allow the recalibrator to focus on regions of the input space that can be well-captured by such a model. We provide theoretical analysis to motivate our algorithm, and test our method through comprehensive experiments on difficult medical imaging and zero-shot classification tasks. Our results show that selective recalibration consistently leads to significantly lower calibration error than a wide range of selection and recalibration baselines.", "keywords": "calibration;statistical learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/87fbedc398ec82602df4467c794e39d1ddcc41fa.zip", "author": "Thomas P Zollo;Zhun Deng;Jake Snell;Toniann Pitassi;Richard Zemel", "authorids": "~Thomas_P_Zollo1;~Zhun_Deng1;~Jake_Snell1;~Toniann_Pitassi3;~Richard_Zemel1", "gender": "M;M;M;F;M", "homepage": "https://www.thomaszollo.com/;https://www.zhundeng.org/;https://www.jakesnell.com;http://www.cs.columbia.edu/~toni;http://www.cs.columbia.edu/~zemel", "dblp": "336/8946;204/4353;172/1406;p/TPitassi;16/6366", "google_scholar": "Xp7LgAwAAAAJ;nkmi-moAAAAJ;MbXKAK8AAAAJ;;https://scholar.google.ca/citations?user=iBeDoRAAAAAJ", "orcid": ";;;;", "linkedin": "thomas-zollo/;;;;", "or_profile": "~Thomas_P_Zollo1;~Zhun_Deng1;~Jake_Snell1;~Toniann_Pitassi3;~Richard_Zemel1", "aff": "Columbia University;Columbia University;Princeton University;Columbia University;Department of Computer Science, University of Toronto", "aff_domain": "columbia.edu;columbia.edu;princeton.edu;columbia.edu;cs.toronto.edu", "position": "PhD student;Postdoc;Postdoc;Full Professor;Full Professor", "bibtex": "@misc{\nzollo2024making,\ntitle={Making Predictors More Reliable with Selective Recalibration},\nauthor={Thomas P Zollo and Zhun Deng and Jake Snell and Toniann Pitassi and Richard Zemel},\nyear={2024},\nurl={https://openreview.net/forum?id=0kvrymILfy}\n}", "github": "", "project": "", "reviewers": "NrpW;RXXm;6mVa;DcsN", "site": "https://openreview.net/forum?id=0kvrymILfy", "pdf_size": 868824, "rating": "3;5;5;8", "confidence": "3;4;2;5", "soundness": "3;3;2;3", "contribution": "2;2;2;3", "presentation": "3;3;2;4", "wc_summary": "119;82;86;203", "wc_strengths": "18;165;73;67", "wc_weaknesses": "260;193;161;279", "wc_questions": "1;101;6;281", "wc_review": "398;541;326;830", "wc_reply_reviewers": "81;106;291;22", "wc_reply_authors": "555;252;322;180", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 122.5, 48.64411577981452 ], "wc_strengths_avg": [ 80.75, 53.11485197192966 ], "wc_weaknesses_avg": [ 223.25, 48.0852108241193 ], "wc_questions_avg": [ 97.25, 113.32337578805178 ], "wc_review_avg": [ 523.75, 193.00566701524596 ], "wc_reply_reviewers_avg": [ 125.0, 100.57584202978367 ], "wc_reply_authors_avg": [ 327.25, 140.75044404903312 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6888467201936643, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FDLPOlQ8bYMJ:scholar.google.com/&scioq=Making+Predictors+More+Reliable+with+Selective+Recalibration&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Columbia University;Princeton University;University of Toronto", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.columbia.edu;https://www.princeton.edu;https://www.utoronto.ca", "aff_unique_abbr": "Columbia;Princeton;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;Canada" }, { "id": "0lW9cDUtf8", "title": "FairReweighing: density estimation-based reweighing framework for improving separation in fair regression", "track": "main", "status": "Reject", "tldr": "", "abstract": "There has been a prevalence of implementing machine learning technologies in both high-stakes public-sector and industrial contexts. However, the lack of transparency in these algorithmic solutions has raised concerns over whether these data-informed decisions secure fairness against people from all racial, gender, or age groups. Despite the extensive research and work that emerged on fairness-aware machine learning, up till now, most efforts on solving this issue have been dedicated to binary classification tasks. In this work, we propose a density estimation-based pre-processing algorithm to train regression models satisfying the separation criterion $\\hat{Y} \\perp A \\mid Y$. Evaluated by the ratio estimation of separation via probabilistic classification on both synthetic and real world data, we show that the proposed algorithm outperforms existing state-of-the-art regression fairness solutions in terms of maintaining high predicting accuracy while improving separation in fair regression.", "keywords": "fairness;separation;reweighing;machine learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "XIAOYIN XI;Zhe Yu", "authorids": "~XIAOYIN_XI1;~Zhe_Yu3", "gender": "M;M", "homepage": ";https://zhe-yu.github.io/", "dblp": ";", "google_scholar": "h9693fMAAAAJ;MWWd15EAAAAJ", "orcid": ";0000-0002-6841-1725", "linkedin": ";", "or_profile": "~XIAOYIN_XI1;~Zhe_Yu3", "aff": "Rochester Institute of Technology;Rochester Institute of Technology", "aff_domain": "rit.edu;rit.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nxi2024fairreweighing,\ntitle={FairReweighing: density estimation-based reweighing framework for improving separation in fair regression},\nauthor={XIAOYIN XI and Zhe Yu},\nyear={2024},\nurl={https://openreview.net/forum?id=0lW9cDUtf8}\n}", "github": "", "project": "", "reviewers": "gSA5;xkbJ;7rTx;NWYi", "site": "https://openreview.net/forum?id=0lW9cDUtf8", "pdf_size": 330958, "rating": "3;3;3;6", "confidence": "4;5;5;4", "soundness": "3;1;1;3", "contribution": "1;1;2;3", "presentation": "1;3;2;4", "wc_summary": "55;72;89;89", "wc_strengths": "18;29;59;71", "wc_weaknesses": "273;279;730;56", "wc_questions": "6;2;193;150", "wc_review": "352;382;1071;366", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "549;460;584;298", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.0, 1.0 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 76.25, 14.095655359010449 ], "wc_strengths_avg": [ 44.25, 21.533404282648853 ], "wc_weaknesses_avg": [ 334.5, 245.37980764520947 ], "wc_questions_avg": [ 87.75, 85.13041465892199 ], "wc_review_avg": [ 542.75, 305.16993216894747 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 472.75, 110.5562639564127 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1WsgfzHK320J:scholar.google.com/&scioq=FairReweighing:+density+estimation-based+reweighing+framework+for+improving+separation+in+fair+regression&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Rochester Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.rit.edu", "aff_unique_abbr": "RIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "0oIkKERYhH", "title": "DOG: Discriminator-only Generation Beats GANs on Graphs", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose discriminator-only generation (DOG) as a generative modeling approach that bridges the gap between energy-based models (EBMs) and generative adversarial networks (GANs). DOG generates samples through iterative gradient descent on a discriminator's input, eliminating the need for a separate generator model. This simplification obviates the extensive tuning of generator architectures required by GANs. In the graph domain, where GANs have lagged behind diffusion approaches in generation quality, DOG demonstrates significant improvements over GANs using the same discriminator architectures. Surprisingly, despite its computationally intensive iterative generation, DOG produces higher-quality samples than GANs on the QM9 molecule dataset in less training time.", "keywords": "generative modeling;graph generation", "primary_area": "generative models", "supplementary_material": "/attachment/3dc2692c0fc643ad08ba8cb3e86ddd1ea0667108.zip", "author": "Franz Rieger;Joergen Kornfeld", "authorids": "~Franz_Rieger1;~Joergen_Kornfeld1", "gender": "M;M", "homepage": ";https://www.bi.mpg.de/kornfeld", "dblp": ";164/5639.html", "google_scholar": ";aT2MvAEAAAAJ", "orcid": ";0000-0002-2547-8700", "linkedin": "riegerfr/;", "or_profile": "~Franz_Rieger1;~J\u00f6rgen_Kornfeld1", "aff": "Max-Planck Institute for Biological Intelligence;MPI for Biological Intelligence", "aff_domain": "bi.mpg.de;bi.mpg.de", "position": "PhD student;Principal Researcher", "bibtex": "@misc{\nrieger2024dog,\ntitle={{DOG}: Discriminator-only Generation Beats {GAN}s on Graphs},\nauthor={Franz Rieger and Joergen Kornfeld},\nyear={2024},\nurl={https://openreview.net/forum?id=0oIkKERYhH}\n}", "github": "", "project": "", "reviewers": "8BtC;SsB3;2QS2;5Xb1", "site": "https://openreview.net/forum?id=0oIkKERYhH", "pdf_size": 6692319, "rating": "3;3;5;6", "confidence": "4;4;4;3", "soundness": "3;2;2;2", "contribution": "2;2;2;3", "presentation": "2;3;3;2", "wc_summary": "52;67;70;129", "wc_strengths": "74;37;39;75", "wc_weaknesses": "152;112;404;79", "wc_questions": "198;33;74;6", "wc_review": "476;249;587;289", "wc_reply_reviewers": "97;0;269;47", "wc_reply_authors": "861;211;848;124", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.5, 29.381116384507923 ], "wc_strengths_avg": [ 56.25, 18.267115262131565 ], "wc_weaknesses_avg": [ 186.75, 128.06516895705875 ], "wc_questions_avg": [ 77.75, 73.52678083528477 ], "wc_review_avg": [ 400.25, 137.7195973708898 ], "wc_reply_reviewers_avg": [ 103.25, 101.65720584395383 ], "wc_reply_authors_avg": [ 511.0, 344.90505940040947 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YnXbYPZHPeoJ:scholar.google.com/&scioq=DOG:+Discriminator-only+Generation+Beats+GANs+on+Graphs&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Max-Planck Institute for Biological Intelligence;Max Planck Institute for Biological Cybernetics", "aff_unique_dep": ";Biological Cybernetics", "aff_unique_url": "https://www mpi-bi.de;https://www.biological-cybernetics.de", "aff_unique_abbr": ";MPIBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "0pBX9FLGRR", "title": "Decoupled Diffusion Models: Image to Zero and Zero to Noise", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper proposes decoupled diffusion models (DDMs), featuring a new diffusion paradigm that allows for high-quality (un)conditioned image generation in less than 10 function evaluations. In a nutshell, DDMs decouple the forward image-to-noise mapping into image-to-zero mapping and zero-to-noise mapping. Under this framework, we mathematically derive 1) the training objectives and %and mathematically show that DDMs learn noise and image components separately. 2) for reverse time the sampling formula based on an analytic transition probability which models image to zero transition. The former enables DDMs to learn noise and image components separately which simplifies learning. Importantly, because of the latter's analyticity in the zero-to-image sampling function, DDMs can avoid the ordinary differential equation based accelerators and instead naturally perform sampling with an arbitrary step size. Under the few function evaluation setup, DDMs experimentally yield very competitive performance compared with the state of the art in 1) unconditioned image generation, e.g., CIFAR-10 and CelebA-HQ-256 and 2) image-conditioned downstream tasks such as super-resolution, saliency detection, and image inpainting.", "keywords": "Diffusion Probabilistic Models; Decoupled Diffusion Models", "primary_area": "generative models", "supplementary_material": "", "author": "Yuhang Huang;Liang Zheng;Zheng Qin;Xinwang Liu;Kai Xu", "authorids": "~Yuhang_Huang1;~Liang_Zheng4;~Zheng_Qin2;~Xinwang_Liu1;~Kai_Xu5", "gender": "M;M;M;M;M", "homepage": ";http://zheng-lab.cecs.anu.edu.au/;;https://xinwangliu.github.io/;http://kevinkaixu.net/", "dblp": ";61/7360-1;95/6861-2;45/6569-2.html;Xu_0004:Kai", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.au/citations?user=vNHqr3oAAAAJ;DnHBAN0AAAAJ;A56vWC4AAAAJ;https://scholar.google.com.hk/citations?user=GuVkg-8AAAAJ", "orcid": ";;0000-0003-4373-4111;;", "linkedin": ";liang-zheng-76341311a/;;;", "or_profile": "~Yuhang_Huang1;~Liang_Zheng4;~Zheng_Qin2;~Xinwang_Liu1;~Kevin_Xu1", "aff": "National University of Defense Technology;Australian National University;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology", "aff_domain": "nudt.edu.cn;anu.edu.au;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn", "position": "PhD student;Associate Professor;Assistant Professor;Full Professor;Professor", "bibtex": "@misc{\nhuang2024decoupled,\ntitle={Decoupled Diffusion Models: Image to Zero and Zero to Noise},\nauthor={Yuhang Huang and Liang Zheng and Zheng Qin and Xinwang Liu and Kai Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=0pBX9FLGRR}\n}", "github": "", "project": "", "reviewers": "7ZTA;UWnj;D5Rq;Jv4X", "site": "https://openreview.net/forum?id=0pBX9FLGRR", "pdf_size": 14956234, "rating": "3;3;3;5", "confidence": "5;3;4;4", "soundness": "2;3;2;3", "contribution": "1;2;2;2", "presentation": "2;3;2;2", "wc_summary": "82;51;106;136", "wc_strengths": "42;45;47;60", "wc_weaknesses": "944;315;552;121", "wc_questions": "12;3;6;139", "wc_review": "1080;414;711;456", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.75, 31.227992250543423 ], "wc_strengths_avg": [ 48.5, 6.87386354243376 ], "wc_weaknesses_avg": [ 483.0, 306.818350168304 ], "wc_questions_avg": [ 40.0, 57.24945414586937 ], "wc_review_avg": [ 665.25, 265.0578946192699 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ll7rPf4M4qMJ:scholar.google.com/&scioq=Decoupled+Diffusion+Models:+Image+to+Zero+and+Zero+to+Noise&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "National University of Defense Technology;Australian National University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.anu.edu.au", "aff_unique_abbr": "NUDT;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Australia" }, { "id": "0rXGGYNVAw", "title": "Unlocking the Potential of Federated Learning for Deeper Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated learning (FL) is a new paradigm for distributed machine learning that allows a global model to be trained across multiple clients without compromising their privacy. Although FL has demonstrated remarkable success in various scenarios, recent studies mainly utilize shallow and small neural networks. In our research, we discover a significant performance decline when applying the existing FL framework to deeper neural networks, even when client data are independently and identically distributed. Our further investigation shows that the decline is due to the continuous accumulation of dissimilarities among client models during the layer-by-layer back-propagation process, which we refer to as \"divergence accumulation.\" As deeper models involve a longer chain of divergence accumulation, they tend to exhibit more significant divergence, subsequently leading to performance decline. Both theoretical derivations and empirical evidence are proposed to support the existence of divergence accumulation and its amplified effects in deeper models. To tackle this challenge, we propose a set of technical guidelines centered on minimizing divergence. These guidelines, consisting of strategies such as employing wider models and reducing the receptive field, greatly improve the performance of FL on deeper models. Their effectiveness is validated via extensive evaluation with various metrics. For example, applying the guidelines can boost the performance of ResNet101 on the Tiny-ImageNet dataset by as much as 43\\%.", "keywords": "Federated Learning;Distributed Model Optimization", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Haolin Wang;Xuefeng Liu;Jianwei Niu;Shaojie Tang;Jiaxing Shen", "authorids": "~Haolin_Wang6;~Xuefeng_Liu5;~Jianwei_Niu3;~Shaojie_Tang2;~Jiaxing_Shen2", "gender": "M;M;M;;M", "homepage": ";;https://shi.buaa.edu.cn/jwniu/zh_CN/index.htm;;https://shenjiaxing.github.io", "dblp": ";96/600-1;25/4653-2;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;KOciOtEAAAAJ;;amJMlVoAAAAJ", "orcid": "0000-0002-0659-2006;;0000-0003-3946-5107;;0000-0002-0833-0288", "linkedin": ";;;;", "or_profile": "~Haolin_Wang6;~Xuefeng_Liu5;~Jianwei_Niu3;~Shaojie_Tang2;~Jiaxing_Shen2", "aff": "Beihang University;Behang University;Beihang University;;Lingnan University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;;ln.edu.hk", "position": "MS student;Associate Professor;Full Professor;;Assistant Professor", "bibtex": "@misc{\nwang2024unlocking,\ntitle={Unlocking the Potential of Federated Learning for Deeper Models},\nauthor={Haolin Wang and Xuefeng Liu and Jianwei Niu and Shaojie Tang and Jiaxing Shen},\nyear={2024},\nurl={https://openreview.net/forum?id=0rXGGYNVAw}\n}", "github": "", "project": "", "reviewers": "wvyK;w79q;pCdo", "site": "https://openreview.net/forum?id=0rXGGYNVAw", "pdf_size": 444445, "rating": "1;3;6", "confidence": "4;4;2", "soundness": "1;2;2", "contribution": "1;3;2", "presentation": "2;3;3", "wc_summary": "63;38;62", "wc_strengths": "26;38;28", "wc_weaknesses": "405;220;56", "wc_questions": "77;89;166", "wc_review": "571;385;312", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.3333333333333335, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 54.333333333333336, 11.55662388223981 ], "wc_strengths_avg": [ 30.666666666666668, 5.2493385826745405 ], "wc_weaknesses_avg": [ 227.0, 142.56460523799961 ], "wc_questions_avg": [ 110.66666666666667, 39.43207943906698 ], "wc_review_avg": [ 422.6666666666667, 109.03923860498823 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9176629354822472, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2558871393691791377&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Beihang University;Behang University;Lingnan University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;;http://www.lingnan.edu.cn", "aff_unique_abbr": "BUAA;;LNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "id": "0sO2euxhUQ", "title": "Learning Latent Structural Causal Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Causal learning has long concerned itself with the recovery of underlying causal mechanisms. Such causal modelling enables better explanations of out-of-distribution data. Prior works on causal learning assume that the causal variables are given. However, in machine learning tasks, one often operates on low-level data like image pixels or high-dimensional vectors. In such settings, the entire Structural Causal Model (SCM) -- structure, parameters, \\textit{and} high-level causal variables -- is latent and needs to be learnt from low-level data. We treat this problem as Bayesian inference of the latent SCM, given low-level data. We present BIOLS, a tractable approximate inference method which performs joint inference over the causal variables, structure and parameters of the latent SCM from known interventions. Experiments are performed on synthetic datasets and a causal benchmark image dataset to demonstrate the efficacy of our approach. We also demonstrate the ability of BIOLS to generate images from unseen interventional distributions.", "keywords": "Bayesian Causal Discovery;Latent variable models", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Jithendaraa Subramanian;Yashas Annadani;Tristan Deleu;Ivaxi Sheth;Nan Rosemary Ke;Stefan Bauer;Derek Nowrouzezahrai;Samira Ebrahimi Kahou", "authorids": "~Jithendaraa_Subramanian1;~Yashas_Annadani1;~Tristan_Deleu1;~Ivaxi_Sheth1;~Nan_Rosemary_Ke1;~Stefan_Bauer1;~Derek_Nowrouzezahrai1;~Samira_Ebrahimi_Kahou1", "gender": "M;;;F;F;;Not Specified;F", "homepage": "https://jithendaraa.github.io/;https://yashasannadani.com;https://tristandeleu.github.io/;;https://nke001.github.io/;https://cifar.ca/bios/stefan-bauer/;https://www.cim.mcgill.ca/~derek/;https://saebrahimi.github.io", "dblp": "281/6755;190/7411;192/1896;291/2912.html;120/5291;;30/4225;20/11069", "google_scholar": "s0BzYvYAAAAJ;ExgzcVMAAAAJ;nLNwh-wAAAAJ;Isz5M1UAAAAJ;https://scholar.google.ca/citations?user=dxwPYhQAAAAJ;O-oICE8AAAAJ;https://scholar.google.ca/citations?user=nCZ2PMcAAAAJ;https://scholar.google.ca/citations?user=F99FuaAAAAAJ", "orcid": ";;;;;;;", "linkedin": "jithendaraa-subramanian-85a22b176/;;;;;;;", "or_profile": "~Jithendaraa_Subramanian1;~Yashas_Annadani1;~Tristan_Deleu1;~Ivaxi_Sheth1;~Nan_Rosemary_Ke1;~Stefan_Bauer1;~Derek_Nowrouzezahrai1;~Samira_Ebrahimi_Kahou1", "aff": "McGill University, McGill University;Max Planck Institute for Intelligent Systems, Max-Planck Institute;University of Montreal;CISPA, saarland university, saarland informatics campus;Google DeepMind;Technische Universit\u00e4t M\u00fcnchen;McGill University;\u00c9cole de technologie sup\u00e9rieure", "aff_domain": "mail.mcgill.ca;tuebingen.mpg.de;umontreal.ca;cispa.saarland;deepmind.com;tum.de;mcgill.ca;etsmtl.ca", "position": "MS student;PhD student;PhD student;PhD student;Researcher;Associate Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nsubramanian2024learning,\ntitle={Learning Latent Structural Causal Models},\nauthor={Jithendaraa Subramanian and Yashas Annadani and Tristan Deleu and Ivaxi Sheth and Nan Rosemary Ke and Stefan Bauer and Derek Nowrouzezahrai and Samira Ebrahimi Kahou},\nyear={2024},\nurl={https://openreview.net/forum?id=0sO2euxhUQ}\n}", "github": "", "project": "", "reviewers": "DxH4;YGzo;WQKo;h7nM", "site": "https://openreview.net/forum?id=0sO2euxhUQ", "pdf_size": 833989, "rating": "3;3;5;5", "confidence": "4;4;3;3", "soundness": "2;2;1;2", "contribution": "1;2;2;2", "presentation": "3;3;3;2", "wc_summary": "39;55;91;65", "wc_strengths": "19;27;46;331", "wc_weaknesses": "68;92;490;2", "wc_questions": "56;52;231;2", "wc_review": "182;226;858;400", "wc_reply_reviewers": "0;25;212;290", "wc_reply_authors": "971;1318;4433;1439", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;8;3", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 62.5, 18.887826767524103 ], "wc_strengths_avg": [ 105.75, 130.41735889060166 ], "wc_weaknesses_avg": [ 163.0, 191.64811504421326 ], "wc_questions_avg": [ 85.25, 86.79681733796464 ], "wc_review_avg": [ 416.5, 267.6167969317322 ], "wc_reply_reviewers_avg": [ 131.75, 122.71588120532729 ], "wc_reply_authors_avg": [ 2040.25, 1392.0932754309247 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 2.48746859276655 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12399942743285689830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;4;5;0;6", "aff_unique_norm": "McGill University;Max Planck Institute for Intelligent Systems;University of Montreal;Saarland University;Google;Technische Universit\u00e4t M\u00fcnchen;\u00c9cole de technologie sup\u00e9rieure", "aff_unique_dep": ";Intelligent Systems;;CISPA;Google DeepMind;;", "aff_unique_url": "https://www.mcgill.ca;https://www.mpi-is.mpg.de;https://wwwumontreal.ca;https://www.uni-saarland.de;https://deepmind.com;https://www.tum.de;https://www.etsmtl.ca", "aff_unique_abbr": "McGill;MPI-IS;UM;Saarland U;DeepMind;TUM;ETS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saarland Informatics Campus", "aff_country_unique_index": "0;1;0;1;2;1;0;0", "aff_country_unique": "Canada;Germany;United Kingdom" }, { "id": "0sbIEkIutN", "title": "From Interpolation to Extrapolation: Complete Length Generalization for Arithmetic Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Since its introduction, the transformer model has demonstrated outstanding performance across various tasks. However, there are still unresolved issues regarding length generalization, particularly in algorithmic tasks. In this paper, we focus on investigating the inherent capabilities of transformer models in learning arithmetic algorithms, such as addition and multiplication. Through experiments and attention analysis, we identify a number of crucial factors for achieving optimal length generalization. We show that transformer models are able to generalize to arbitrarily long lengths with the help of targeted attention biasing. Building on this, we introduce Attention Bias Calibration (ABC), a calibration stage that enables the model to automatically learn the proper attention biases, which we link to mechanisms in relative position encoding. We demonstrate that using ABC, the transformer model can achieve unprecedented perfect length generalization on certain arithmetic tasks.", "keywords": "Transformer;Length Generalization;Attention;Arithmetic", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Shaoxiong Duan;Yining Shi", "authorids": "~Shaoxiong_Duan1;~Yining_Shi2", "gender": "M;", "homepage": "https://github.com/shaoxiongduan;https://icc.rdfz.cn/CHN_RDFZ_SZLL/CHN_RDFZ_SLKJXK/index_2.html", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Shaoxiong_Duan1;~Yining_Shi2", "aff": "International Curriculum Centre, RDFZ;", "aff_domain": "icc.rdfz.cn;", "position": "Undergrad student;", "bibtex": "@misc{\nduan2024from,\ntitle={From Interpolation to Extrapolation: Complete Length Generalization for Arithmetic Transformers},\nauthor={Shaoxiong Duan and Yining Shi},\nyear={2024},\nurl={https://openreview.net/forum?id=0sbIEkIutN}\n}", "github": "", "project": "", "reviewers": "usKR;69hA;hH7Y;25Z2", "site": "https://openreview.net/forum?id=0sbIEkIutN", "pdf_size": 954799, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "contribution": "2;2;3;3", "presentation": "2;2;3;2", "wc_summary": "63;57;88;168", "wc_strengths": "26;20;45;70", "wc_weaknesses": "241;174;208;248", "wc_questions": "27;4;267;168", "wc_review": "357;255;608;654", "wc_reply_reviewers": "157;0;201;24", "wc_reply_authors": "565;272;662;692", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.0, 44.27753380666091 ], "wc_strengths_avg": [ 40.25, 19.49839737004044 ], "wc_weaknesses_avg": [ 217.75, 29.431063521388417 ], "wc_questions_avg": [ 116.5, 107.2019122963765 ], "wc_review_avg": [ 468.5, 167.24607618715604 ], "wc_reply_reviewers_avg": [ 95.5, 85.35953373818299 ], "wc_reply_authors_avg": [ 547.75, 165.97947915329775 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11404935692635392920&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "RDFZ International Curriculum Centre", "aff_unique_dep": "International Curriculum Centre", "aff_unique_url": "", "aff_unique_abbr": "RDFZ ICC", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Retrieval-Guided Reinforcement Learning for Boolean Circuit Minimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19604", "id": "0t1O8ziRZp", "author_site": "Animesh Basak Chowdhury, Marco Romanelli, Benjamin Tan, Ramesh Karri, Siddharth Garg", "tldr": "", "abstract": "Logic synthesis, a pivotal stage in chip design, entails optimizing chip specifications encoded in hardware description languages like Verilog into highly efficient implementations using Boolean logic gates. The process involves a sequential application of logic minimization heuristics (``synthesis recipe\"), with their arrangement significantly impacting crucial metrics such as area and delay. Addressing the challenge posed by the broad spectrum of hardware design complexities \u2014 from variations of past designs (e.g., adders and multipliers) to entirely novel configurations (e.g., innovative processor instructions) \u2014 requires a nuanced 'synthesis recipe' guided by human expertise and intuition. This study conducts a thorough examination of learning and search techniques for logic synthesis, unearthing a surprising revelation: pre-trained agents, when confronted with entirely novel designs, may veer off course, detrimentally affecting the search trajectory. We present ABC-RL, a meticulously tuned $\\alpha$ parameter that adeptly adjusts recommendations from pre-trained agents during the search process. Computed based on similarity scores through nearest neighbor retrieval from the training dataset, ABC-RL yields superior synthesis recipes tailored for a wide array of hardware designs. Our findings showcase substantial enhancements in the Quality of Result (QoR) of synthesized circuits, boasting improvements of up to 24.8\\% compared to state-of-the-art techniques. Furthermore, ABC-RL achieves an impressive up to 9x reduction in runtime (iso-QoR) when compared to current state-of-the-art methodologies.", "keywords": "Electronics Design Automation (EDA);Logic Synthesis;Reinforcement Learning;Hardware design;Circuits", "primary_area": "infrastructure, software libraries, hardware, etc.", "supplementary_material": "/attachment/96d64b30ce8657cadea469fc4c3cdb5c78c4313c.zip", "author": "Animesh Basak Chowdhury;Marco Romanelli;Benjamin Tan;Ramesh Karri;Siddharth Garg", "authorids": "~Animesh_Basak_Chowdhury1;~Marco_Romanelli1;~Benjamin_Tan1;~Ramesh_Karri1;~Siddharth_Garg1", "gender": "M;;M;M;M", "homepage": "https://gitlab.com/animeshbchowdhury;;;http://engineering.nyu.edu/people/ramesh-karri/;http://engineering.nyu.edu/people/siddharth-garg/", "dblp": "217/4860;;https://dblp.uni-trier.de/pid/195/3070;;94/3807", "google_scholar": "_7dNuMwAAAAJ;;GOjr_RAAAAAJ;https://scholar.google.com.tw/citations?user=o60TaTEAAAAJ;https://scholar.google.com.tw/citations?user=Yf8OqQQAAAAJ", "orcid": ";;0000-0002-7642-3638;;", "linkedin": ";;;;", "or_profile": "~Animesh_Basak_Chowdhury1;~Marco_Romanelli1;~Benjamin_Tan1;~Ramesh_Karri1;~Siddharth_Garg1", "aff": "Qualcomm Inc, QualComm;;University of Calgary;New York University;New York University", "aff_domain": "qti.qualcomm.com;;ucalgary.ca;nyu.edu;nyu.edu", "position": "Researcher;;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nchowdhury2024retrievalguided,\ntitle={Retrieval-Guided Reinforcement Learning for Boolean Circuit Minimization},\nauthor={Animesh Basak Chowdhury and Marco Romanelli and Benjamin Tan and Ramesh Karri and Siddharth Garg},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0t1O8ziRZp}\n}", "github": "", "project": "", "reviewers": "3ENJ;yxFf;qBwB;Pwwd;pgL7", "pdf_size": 1216286, "rating": "5;6;6;6;8", "confidence": "4;5;4;3;4", "soundness": "2;2;2;3;4", "contribution": "2;2;3;2;3", "presentation": "3;3;3;3;4", "wc_summary": "65;51;58;107;144", "wc_strengths": "69;38;70;62;121", "wc_weaknesses": "149;109;231;64;24", "wc_questions": "6;23;7;34;48", "wc_review": "289;221;366;267;337", "wc_reply_reviewers": "419;0;0;0;0", "wc_reply_authors": "3119;402;1408;473;174", "reply_reviewers": "4;0;0;0;0", "reply_authors": "7;2;3;2;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.8 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 85.0, 35.35533905932738 ], "wc_strengths_avg": [ 72.0, 27.09243436828813 ], "wc_weaknesses_avg": [ 115.4, 71.4551607653359 ], "wc_questions_avg": [ 23.6, 16.057397049335236 ], "wc_review_avg": [ 296.0, 51.17812032499826 ], "wc_reply_reviewers_avg": [ 83.8, 167.6 ], "wc_reply_authors_avg": [ 1115.2, 1087.0031094711735 ], "reply_reviewers_avg": [ 0.8, 1.6000000000000003 ], "reply_authors_avg": [ 3.0, 2.0976176963403033 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4042388069948974798&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=0t1O8ziRZp", "pdf": "https://openreview.net/pdf?id=0t1O8ziRZp", "email": "qti.qualcomm.com;;ucalgary.ca;nyu.edu;nyu.edu", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Qualcomm Incorporated;University of Calgary;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.qualcomm.com;https://www.ucalgary.ca;https://www.nyu.edu", "aff_unique_abbr": "Qualcomm;U of C;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Distributional Preference Learning: Understanding and Accounting for Hidden Context in RLHF", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19603", "id": "0tWTxYYPnW", "author_site": "Anand Siththaranjan, Cassidy Laidlaw, Dylan Hadfield-Menell", "tldr": "", "abstract": "In practice, preference learning from human feedback depends on incomplete data with hidden context. Hidden context refers to data that affects the feedback received, but which is not represented in the data used to train a preference model. This captures common issues of data collection, such as having human annotators with varied preferences, cognitive processes that result in seemingly irrational behavior, and combining data labeled according to different criteria. We prove that standard applications of preference learning, including reinforcement learning from human feedback (RLHF), implicitly aggregate over hidden contexts according to a well-known voting rule called *Borda count*. We show this can produce counter-intuitive results that are very different from other methods which implicitly aggregate via expected utility. Furthermore, our analysis formalizes the way that preference learning from users with diverse values tacitly implements a social choice function. A key implication of this result is that annotators have an incentive to misreport their preferences in order to influence the learned model, leading to vulnerabilities in the deployment of RLHF. As a step towards mitigating these problems, we introduce a class of methods called *distributional preference learning* (DPL). DPL methods estimate a distribution of possible score values for each alternative in order to better account for hidden context. Experimental results indicate that applying DPL to RLHF for LLM chatbots identifies hidden context in the data and significantly reduces subsequent jailbreak vulnerability.", "keywords": "Preference Learning;Reinforcement Learning from Human Feedback;Social Choice Theory", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Anand Siththaranjan;Cassidy Laidlaw;Dylan Hadfield-Menell", "authorids": "~Anand_Siththaranjan1;~Cassidy_Laidlaw1;~Dylan_Hadfield-Menell2", "gender": "M;M;M", "homepage": ";https://cassidylaidlaw.com;http://people.csail.mit.edu/dhm/", "dblp": ";241/5375;135/8332", "google_scholar": "qYXPDjQAAAAJ;DzeJ67UAAAAJ;4mVPFQ8AAAAJ", "orcid": ";;0000-0002-6168-4763", "linkedin": ";;", "or_profile": "~Anand_Siththaranjan1;~Cassidy_Laidlaw1;~Dylan_Hadfield-Menell2", "aff": "University of California, Berkeley;University of California, Berkeley;Massachusetts Institute of Technology", "aff_domain": "berkeley.edu;berkeley.edu;mit.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsiththaranjan2024distributional,\ntitle={Distributional Preference Learning: Understanding and Accounting for Hidden Context in {RLHF}},\nauthor={Anand Siththaranjan and Cassidy Laidlaw and Dylan Hadfield-Menell},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0tWTxYYPnW}\n}", "github": "", "project": "", "reviewers": "i1j8;9LkN;fmx5;PKnu", "pdf_size": 443558, "rating": "5;6;6;8", "confidence": "3;2;3;2", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "83;121;123;55", "wc_strengths": "22;67;127;117", "wc_weaknesses": "161;34;100;62", "wc_questions": "7;713;125;3", "wc_review": "273;935;475;237", "wc_reply_reviewers": "128;63;0;4", "wc_reply_authors": "551;1126;977;236", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.5, 28.297526393662043 ], "wc_strengths_avg": [ 83.25, 42.03792930200059 ], "wc_weaknesses_avg": [ 89.25, 47.58873291021731 ], "wc_questions_avg": [ 212.0, 293.3751864081214 ], "wc_review_avg": [ 480.0, 277.91545476997135 ], "wc_reply_reviewers_avg": [ 48.75, 52.112258634605354 ], "wc_reply_authors_avg": [ 722.5, 351.31075986937833 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4301635191466793158&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=0tWTxYYPnW", "pdf": "https://openreview.net/pdf?id=0tWTxYYPnW", "email": "berkeley.edu;berkeley.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu", "aff_unique_abbr": "UC Berkeley;MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0tsJ7Nv5hk", "title": "Harnessing Orthogonality to Train Low-Rank Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the realm of neural network training, the question of what is truly being learned beyond mathematical optimization has intrigued researchers for decades. \nThis study delves into the essence of neural network weights. \nBy leveraging the principles of singular value decomposition, we explore the hypothesis that the orthogonal bases of the low-rank decomposition of neural network weights stabilize during training, and provide experimental evidence to support this notion. \nBuilding upon this insight, we introduce Orthogonality-Informed Adaptive Low-Rank neural network training. \nOur novel approach seamlessly integrates into existing training workflows with minimal accuracy loss, as demonstrated by benchmarking on various datasets and well-established network architectures. \nWe find that, through standard tuning procedures, our method surpasses the performance of conventional training setups. \nFinally, we showcase the effectiveness of our tuned low-rank training procedure by applying it to a state-of-the-art transformer model for time series prediction.", "keywords": "orthogonal;low rank;low-rank;svd;compression;optimization", "primary_area": "learning theory", "supplementary_material": "/attachment/acee7a6f965fc83b773aa14440a746ece0302445.pdf", "author": "Daniel Coquelin;Katharina Fl\u00fcgel;Marie Weiel;Nicholas Kiefer;Charlotte Debus;Achim Streit;Markus G\u00f6tz", "authorids": "~Daniel_Coquelin1;~Katharina_Fl\u00fcgel1;~Marie_Weiel1;~Nicholas_Kiefer1;~Charlotte_Debus1;~Achim_Streit1;~Markus_G\u00f6tz1", "gender": ";;F;M;F;M;M", "homepage": ";;;https://scholar.google.com/citations?view_op=list_works&hl=de&user=nfgHgAQAAAAJ;;http://www.scc.kit.edu/personen/achim.streit.php;https://www.scc.kit.edu/personen/11452.php", "dblp": "271/0898;;269/0868;;;s/AchimStreit;70/8283", "google_scholar": "6dHRVREAAAAJ;;xCAzgAIAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=de;https://scholar.google.de/citations?user=i72A44MAAAAJ;https://scholar.google.de/citations?user=eD2svJQAAAAJ", "orcid": "0000-0001-8552-5153;;0000-0001-9648-4385;;0000-0002-7156-2022;0000-0002-5065-469X;0000-0002-2233-1041", "linkedin": ";;;;;achimstreit/;", "or_profile": "~Daniel_Coquelin1;~Katharina_Fl\u00fcgel1;~Marie_Weiel1;~Nicholas_Kiefer1;~Charlotte_Debus1;~Achim_Streit1;~Markus_G\u00f6tz1", "aff": "Karlsruher Institut f\u00fcr Technologie;;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "position": "Researcher;;Postdoc;PhD student;Principal Researcher;Full Professor;Principal Researcher", "bibtex": "@misc{\ncoquelin2024harnessing,\ntitle={Harnessing Orthogonality to Train Low-Rank Neural Networks},\nauthor={Daniel Coquelin and Katharina Fl{\\\"u}gel and Marie Weiel and Nicholas Kiefer and Charlotte Debus and Achim Streit and Markus G{\\\"o}tz},\nyear={2024},\nurl={https://openreview.net/forum?id=0tsJ7Nv5hk}\n}", "github": "", "project": "", "reviewers": "LcGV;uXNh;Mh5x;ViAF", "site": "https://openreview.net/forum?id=0tsJ7Nv5hk", "pdf_size": 615340, "rating": "3;3;5;6", "confidence": "4;4;4;1", "soundness": "2;2;3;3", "contribution": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "67;37;95;80", "wc_strengths": "19;20;34;85", "wc_weaknesses": "113;147;187;53", "wc_questions": "8;159;133;24", "wc_review": "207;363;449;242", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "121;222;233;133", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.75, 21.34683817336891 ], "wc_strengths_avg": [ 39.5, 26.93046601899046 ], "wc_weaknesses_avg": [ 125.0, 49.13247398615299 ], "wc_questions_avg": [ 81.0, 65.89005994837157 ], "wc_review_avg": [ 315.25, 96.50485739070339 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 177.25, 50.57852805291985 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7777777777777778, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2377516874846279502&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;1;1", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kit.edu;https://www.kit.edu", "aff_unique_abbr": "KIT;KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "0u9uvPdRgV", "title": "Semi-supervised Diffusion Solver for Travelling Salesman Problem", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We propose a semi-supervised diffusion solver for solving the Travelling Sales- man Problem (TSP). Data-driven combinatorial optimization models recently at- tract an amount of attention, since they have shown promising results in solving various NP-hard problems without too much expert knowledge. However, most of them rely on reinforcement learning (RL) and supervised learning (SL) which face some intractable challenges: RL methods often encounter sparse reward problems and SL methods pose a strict assumption that the optimal solution (label) is always available. To address these challenges in arbitrarily large-scale TSP, this article proposes a novel semi-supervised learning-based diffusion framework towards a more general situation, i.e., we can freely produce instances as many as possible but the acquisition of optimal solution is costly. This semi-supervised paradigm is made viable by modeling the generative process upon a special transition matrix, which facilitates the effective learning of the generative diffusion, compared with learning the heatmap directly like other solvers do. Comprehensive experiments validate our method across various scales TSP, showing that our method remarkably outperforms state-of-the-art data-driven solvers on large benchmark datasets for Traveling Salesman Problems, and has an outstanding generalization ability.", "keywords": "Travelling Salesman Problem;Semi-supervised Learning;Diffusion Model", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Ning Ma;Yishun Dou", "authorids": "~Ning_Ma2;~Yishun_Dou1", "gender": "F;M", "homepage": ";", "dblp": ";273/9779", "google_scholar": "https://scholar.google.com.tw/citations?view_op=list_works;https://scholar.google.com/citations?view_op=list_works", "orcid": ";0009-0008-8345-8258", "linkedin": ";", "or_profile": "~Ning_Ma2;~Yishun_Dou1", "aff": ";Huawei Technologies Ltd.", "aff_domain": ";huawei.com", "position": ";Researcher", "bibtex": "@misc{\nma2024semisupervised,\ntitle={Semi-supervised Diffusion Solver for Travelling Salesman Problem},\nauthor={Ning Ma and Yishun Dou},\nyear={2024},\nurl={https://openreview.net/forum?id=0u9uvPdRgV}\n}", "github": "", "project": "", "reviewers": "FvPj;9n2o;JPUZ;r8xE", "site": "https://openreview.net/forum?id=0u9uvPdRgV", "pdf_size": 317157, "rating": "3;3;5;6", "confidence": "3;5;5;3", "soundness": "2;2;2;4", "contribution": "2;2;2;2", "presentation": "3;2;3;2", "wc_summary": "64;45;103;111", "wc_strengths": "35;61;180;152", "wc_weaknesses": "329;312;41;161", "wc_questions": "128;62;90;68", "wc_review": "556;480;414;492", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 80.75, 27.24311839712921 ], "wc_strengths_avg": [ 107.0, 60.52685354452187 ], "wc_weaknesses_avg": [ 210.75, 117.81845144118981 ], "wc_questions_avg": [ 87.0, 25.865034312755125 ], "wc_review_avg": [ 485.5, 50.386009963084 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7016110411812515437&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Linear attention is (maybe) all you need (to understand Transformer optimization)", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19602", "id": "0uI5415ry7", "author_site": "Kwangjun Ahn, Xiang Cheng, Minhak Song, Chulhee Yun, Ali Jadbabaie, Suvrit Sra", "tldr": "", "abstract": "Transformer training is notoriously difficult, requiring a careful design of optimizers and use of various heuristics. We make progress towards understanding the subtleties of training Transformers by carefully studying a simple yet canonical linearized *shallow* Transformer model. Specifically, we train linear Transformers to solve regression tasks, inspired by J. von Oswald et al. (ICML 2023), and K. Ahn et al. (NeurIPS 2023). Most importantly, we observe that our proposed linearized models can reproduce several prominent aspects of Transformer training dynamics. Consequently, the results obtained in this paper suggest that a simple linearized Transformer model could actually be a valuable, realistic abstraction for understanding Transformer optimization.", "keywords": "Transformer;optimization;adam;clipping;heavy-tailed noise;directional smoothness", "primary_area": "optimization", "supplementary_material": "", "author": "Kwangjun Ahn;Xiang Cheng;Minhak Song;Chulhee Yun;Ali Jadbabaie;Suvrit Sra", "authorids": "~Kwangjun_Ahn2;~Xiang_Cheng1;~Minhak_Song1;~Chulhee_Yun1;~Ali_Jadbabaie1;~Suvrit_Sra1", "gender": ";M;M;M;M;", "homepage": "http://kjahn.mit.edu/;https://sites.google.com/berkeley.edu/xiangcheng/home;https://songminhak.github.io;https://chulheeyun.github.io/;http://www.mit.edu/~jadbabai/www;https://optml.mit.edu", "dblp": ";29/1059-6;;138/0148.html;83/3158;90/930", "google_scholar": "z94iNtgAAAAJ;-WJinlEAAAAJ;https://scholar.google.com/citations?hl=en;Ukl64ggAAAAJ;ZBc_WwYAAAAJ;eyCw9goAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Kwangjun_Ahn2;~Xiang_Cheng1;~Minhak_Song1;~Chulhee_Yun1;~Ali_Jadbabaie1;~Suvrit_Sra1", "aff": "Massachusetts Institute of Technology;;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;kaist.ac.kr;kaist.ac.kr;mit.edu;mit.edu", "position": "PhD student;;Undergrad student;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nahn2024linear,\ntitle={Linear attention is (maybe) all you need (to understand Transformer optimization)},\nauthor={Kwangjun Ahn and Xiang Cheng and Minhak Song and Chulhee Yun and Ali Jadbabaie and Suvrit Sra},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0uI5415ry7}\n}", "github": "", "project": "", "reviewers": "gJVN;bjjH;GNbE;dN4V", "pdf_size": 3520901, "rating": "6;6;6;8", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "80;83;134;77", "wc_strengths": "57;78;45;92", "wc_weaknesses": "187;99;49;134", "wc_questions": "24;100;117;48", "wc_review": "348;360;345;351", "wc_reply_reviewers": "21;123;0;0", "wc_reply_authors": "135;697;286;183", "reply_reviewers": "1;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.5, 23.47871376374779 ], "wc_strengths_avg": [ 68.0, 18.207141456033124 ], "wc_weaknesses_avg": [ 117.25, 50.34071413875652 ], "wc_questions_avg": [ 72.25, 37.71190130449538 ], "wc_review_avg": [ 351.0, 5.612486080160912 ], "wc_reply_reviewers_avg": [ 36.0, 50.95586325438909 ], "wc_reply_authors_avg": [ 325.25, 221.4547075589047 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13865784961911745848&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=0uI5415ry7", "pdf": "https://openreview.net/pdf?id=0uI5415ry7", "email": "mit.edu;;kaist.ac.kr;kaist.ac.kr;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.kaist.ac.kr", "aff_unique_abbr": "MIT;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;South Korea" }, { "id": "0uUASYeXav", "title": "Graphical Object-Centric Actor-Critic", "track": "main", "status": "Reject", "tldr": "", "abstract": "There have recently been significant advances in the problem of unsupervised object-centric representation learning and its application to downstream tasks. The latest works support the argument that employing disentangled object representations in image-based object-centric reinforcement learning tasks facilitates policy learning. We propose a novel object-centric reinforcement learning algorithm combining actor-critic and model-based approaches to utilize these representations effectively.\nIn our approach, we use a transformer encoder to extract object representations and graph neural networks to approximate the dynamics of an environment. The proposed method fills a research gap in developing efficient object-centric world models for reinforcement learning settings that can be used for environments with discrete or continuous action spaces. Our algorithm performs better in a visually complex 3D robotic environment and a 2D environment with compositional structure than the state-of-the-art model-free actor-critic algorithm built upon transformer architecture and the state-of-the-art monolithic model-based algorithm.", "keywords": "Reinforcement Learning;World Model;Actor-critic;Object-centric Representation;Graph Neural Network", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/f2a25ac4327a0679800f55abd7420aaccb29bc8c.zip", "author": "Leonid Anatolievich Ugadiarov;Vitaliy Vorobyov;Aleksandr Panov", "authorids": "~Leonid_Anatolievich_Ugadiarov1;~Vitaliy_Vorobyov1;~Aleksandr_Panov1", "gender": "M;M;M", "homepage": "https://github.com/ugadiarov-la-phystech-edu;https://v3code.github.io/personal_page;http://grafft.github.io", "dblp": ";;177/9975", "google_scholar": ";;https://scholar.google.ru/citations?hl=ru", "orcid": ";;0000-0002-9747-3837", "linkedin": ";;", "or_profile": "~Leonid_Anatolievich_Ugadiarov1;~Vitaliy_Vorobyov1;~Aleksandr_Panov1", "aff": "Federal Research Center \u00abComputer Science and Control\u00bb of Russian Academy of Sciences;Moscow Institute of Physics and Technology;Federal Research Center \u00abComputer Science and Control\u00bb of Russian Academy of Sciences", "aff_domain": "frccsc.ru;phystech.edu;frccsc.ru", "position": "Researcher;MS student;Principal Researcher", "bibtex": "@misc{\nugadiarov2024graphical,\ntitle={Graphical Object-Centric Actor-Critic},\nauthor={Leonid Anatolievich Ugadiarov and Vitaliy Vorobyov and Aleksandr Panov},\nyear={2024},\nurl={https://openreview.net/forum?id=0uUASYeXav}\n}", "github": "", "project": "", "reviewers": "aEan;rSLu;6gyQ;HvnL", "site": "https://openreview.net/forum?id=0uUASYeXav", "pdf_size": 10261470, "rating": "3;5;5;6", "confidence": "4;4;4;3", "soundness": "2;2;2;2", "contribution": "1;2;2;2", "presentation": "2;2;2;2", "wc_summary": "59;50;75;115", "wc_strengths": "36;18;43;152", "wc_weaknesses": "200;120;111;512", "wc_questions": "128;105;44;11", "wc_review": "423;293;273;790", "wc_reply_reviewers": "184;98;26;47", "wc_reply_authors": "1064;641;937;611", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;3;3", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 74.75, 24.903564001965663 ], "wc_strengths_avg": [ 62.25, 52.613567641816495 ], "wc_weaknesses_avg": [ 235.75, 163.21209360828627 ], "wc_questions_avg": [ 72.0, 46.71723450719231 ], "wc_review_avg": [ 444.75, 207.48298122978665 ], "wc_reply_reviewers_avg": [ 88.75, 60.90720400740786 ], "wc_reply_authors_avg": [ 813.25, 192.85016852468655 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14845202733643043297&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Russian Academy of Sciences;Moscow Institute of Physics and Technology", "aff_unique_dep": "Computer Science and Control;", "aff_unique_url": "https://www.ras.ru;https://www.mipt.ru/en", "aff_unique_abbr": "RAS;MIPT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Russian Federation" }, { "id": "0unbjYPmbC", "title": "ChatSearch: a Dataset and a Generative Retrieval Model for General Conversational Image Retrieval", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this paper, we investigate the task of general conversational image retrieval on open-domain images.\nThe objective is to search for images based on interactive conversations between humans and computers. To advance this task, we curate a dataset called ChatSearch. This dataset includes a multimodal conversational context query for each target image, thereby requiring the retrieval system to infer the underlying retrieval intention from the multimodal dialogue conducted over multiple rounds. \nSimultaneously, we propose a generative retrieval model named ChatSearcher, which is trained end-to-end to accept and produce interleaved image-text inputs/outputs. ChatSearcher exhibits strong capability in reasoning with multimodal context and can leverage world knowledge to yield more sophisticated retrieval results. It demonstrates superior performance on the ChatSearch dataset and also achieves competitive results on other image retrieval tasks, such as zero-shot text-to-image retrieval and zero-shot composed image retrieval. With the availability of the ChatSearch dataset and the effectiveness of the ChatSearcher model, we anticipate that this work will inspire further research on interactive multimodal retrieval systems.", "keywords": "Image Retrieval; Multimodal Learning; Conversational Image Retrieval; Human-computer Interaction", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Zijia Zhao;Longteng Guo;Tongtian Yue;Erdong Hu;Shuai Shao;Zehuan Yuan;Jing Liu", "authorids": "~Zijia_Zhao1;~Longteng_Guo1;~Tongtian_Yue1;~Erdong_Hu2;~Shuai_Shao3;~Zehuan_Yuan1;~Jing_Liu1", "gender": "M;M;M;M;M;F;", "homepage": "https://ltguo19.github.io/;;https://github.com/Hedone00;http://www.sshao.com;https://shallowyuan.github.io/;http://www.nlpr.ia.ac.cn/iva/liujing/;https://blog.csdn.net/JoeCucu?type=blog", "dblp": "207/1905;348/6568;;71/8201-5;227/3298;72/2590-1.html;296/3659", "google_scholar": "OaGRHWYAAAAJ;OrICiVQAAAAJ;https://scholar.google.com/citations?hl=en;uL9iyKgAAAAJ;;sOI-S7oAAAAJ;", "orcid": ";0000-0001-5774-4084;;0000-0001-8560-4572;;;", "linkedin": ";;;shuai-shao-b7a047a2/;;;", "or_profile": "~Longteng_Guo1;~Tongtian_Yue1;~Erdong_Hu2;~Shuai_Shao3;~Zehuan_Yuan1;~Jing_Liu1;~Joe_Z1", "aff": "Institute of Automation, Chinese Academy of Sciences;, Institute of automation, Chinese academy of science;Institute of Automation,Chinese Academy of Sciences;ByteDance Inc.;ByteDance Inc.;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;bytedance.com;bytedance.com;nlpr.ia.ac.cn;ia.ac.cn", "position": "Associate Professor;PhD student;MS student;Researcher;Researcher;Full Professor;PhD student", "bibtex": "@misc{\nzhao2024chatsearch,\ntitle={ChatSearch: a Dataset and a Generative Retrieval Model for General Conversational Image Retrieval},\nauthor={Zijia Zhao and Longteng Guo and Tongtian Yue and Erdong Hu and Shuai Shao and Zehuan Yuan and Jing Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=0unbjYPmbC}\n}", "github": "", "project": "", "reviewers": "nix8;Lx3N;fGPE;DUdh", "site": "https://openreview.net/forum?id=0unbjYPmbC", "pdf_size": 18734609, "rating": "5;5;5;6", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "1;3;4;2", "wc_summary": "64;52;63;63", "wc_strengths": "30;36;58;102", "wc_weaknesses": "176;168;186;87", "wc_questions": "41;23;135;22", "wc_review": "311;279;442;274", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "546;751;1030;374", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 60.5, 4.924428900898052 ], "wc_strengths_avg": [ 56.5, 28.26216552212516 ], "wc_weaknesses_avg": [ 154.25, 39.347013863824536 ], "wc_questions_avg": [ 55.25, 46.66034183329565 ], "wc_review_avg": [ 326.5, 68.17807565486136 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 675.25, 244.45999161416987 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15039888198124627695&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;1;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;ByteDance", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;https://www.bytedance.com", "aff_unique_abbr": "CAS;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0upMDCx8AA", "title": "Post-Training Recovery from Injected Bias with Self-Influence", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning generalized models from biased data with strong spurious correlations to the class label is an important undertaking toward fairness in deep learning. In the absence of any prior knowledge or supervision of bias, recent studies tackle the problem by presuming the bias severity to be sufficiently high and employing a bias-amplified model trained by empirical risk minimization (ERM) to identify and utilize bias-conflicting samples that are free of spurious correlations. However, insufficient preciseness in detecting bias-conflicting samples results in injecting erroneous signals during training; conversely, it leads to learning malignant biases instead of excluding them. In practice, as the presumption about the magnitude of bias often does not hold, it is important for the model to demonstrate robust performance across a wide spectrum of biases. In this paper, we propose SePT (Self-influence-based Post-Training), a fine-tuning framework leveraging the self-influence score to filter bias-conflicting samples, which yields a pivotal subset with significantly diminished spurious correlations. Our method enables the quick recovery of a biased model from learned bias through fine-tuning with minimal friction. In addition, SePT also utilizes the remaining training dataset to adjust the model, thereby maintaining robust performance in situations with weak spurious correlation or even in the absence of it. Experiments on diverse benchmark datasets with a wide range of bias strengths show that SePT is capable of boosting the performance of both bias-injected and state-of-the-art debiased models.", "keywords": "Deep learning;dataset bias;debiasing", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/b22796957ae69f570f80da38d113b2742baef045.zip", "author": "Yeonsung Jung;Jaeyun Song;June Yong Yang;Jin-Hwa Kim;Sung-Yub Kim;Eunho Yang", "authorids": "~Yeonsung_Jung1;~Jaeyun_Song2;~June_Yong_Yang1;~Jin-Hwa_Kim1;~Sung-Yub_Kim1;~Eunho_Yang1", "gender": ";M;;Unspecified;M;M", "homepage": "https://yeonsungjung.github.io/;;http://mli.kaist.ac.kr/people/;http://wityworks.com;https://sites.google.com/site/hleehome2/;https://sungyubkim.github.io", "dblp": "264/2809;289/2048;277/5624;48/258;96/2621;236/4532", "google_scholar": "https://scholar.google.com/citations?hl=ko;;nkLNWg0AAAAJ;https://scholar.google.co.kr/citations?user=3f2wPekAAAAJ;;m2rhgrkAAAAJ", "orcid": ";;;0000-0002-0423-0415;;", "linkedin": "yeonsung-jung-a50015213/;jaeyun-song-9a4111213/;;;;", "or_profile": "~Yeonsung_Jung1;~Jaeyun_Song2;~June_Yong_Yang1;~Jin-Hwa_Kim1;~Eunho_Yang1;~SungYub_Kim1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;NAVER;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;navercorp.com;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;PhD student;Research Scientist;Associate Professor;PhD student", "bibtex": "@misc{\njung2024posttraining,\ntitle={Post-Training Recovery from Injected Bias with Self-Influence},\nauthor={Yeonsung Jung and Jaeyun Song and June Yong Yang and Jin-Hwa Kim and Sung-Yub Kim and Eunho Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=0upMDCx8AA}\n}", "github": "", "project": "", "reviewers": "coZv;pbA4;dWNy", "site": "https://openreview.net/forum?id=0upMDCx8AA", "pdf_size": 10904349, "rating": "3;3;5", "confidence": "2;3;4", "soundness": "2;1;2", "contribution": "2;2;3", "presentation": "3;2;2", "wc_summary": "93;96;148", "wc_strengths": "49;69;40", "wc_weaknesses": "85;207;56", "wc_questions": "8;21;363", "wc_review": "235;393;607", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "386;599;618", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 112.33333333333333, 25.249862485874168 ], "wc_strengths_avg": [ 52.666666666666664, 12.119772641798562 ], "wc_weaknesses_avg": [ 116.0, 65.42680388546171 ], "wc_questions_avg": [ 130.66666666666666, 164.37017842527138 ], "wc_review_avg": [ 411.6666666666667, 152.44088107270377 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 534.3333333333334, 105.17393001859136 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8X31mcmToDEJ:scholar.google.com/&scioq=Post-Training+Recovery+from+Injected+Bias+with+Self-Influence&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NAVER Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.naver.com", "aff_unique_abbr": "KAIST;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "LipSim: A Provably Robust Perceptual Similarity Metric", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19601", "id": "0w42S2Gp70", "author_site": "Sara Ghazanfari, Alexandre Araujo, Prashanth Krishnamurthy, Farshad Khorrami, Siddharth Garg", "tldr": "", "abstract": "Recent years have seen growing interest in developing and applying perceptual similarity metrics. Research has shown the superiority of perceptual metrics over pixel-wise metrics in aligning with human perception and serving as a proxy for the human visual system.\nOn the other hand, as perceptual metrics rely on neural networks, there is a growing concern regarding their resilience, given the established vulnerability of neural networks to adversarial attacks. It is indeed logical to infer that perceptual metrics may inherit both the strengths and shortcomings of neural networks.\nIn this work, we demonstrate the vulnerability of state-of-the-art perceptual similarity metrics based on an ensemble of ViT-based feature extractors to adversarial attacks. We then propose a framework to train a robust perceptual similarity metric called LipSim (Lipschitz Similarity Metric) with provable guarantees. \nBy leveraging 1-Lipschitz neural networks as the backbone, LipSim provides guarded areas around each data point and certificates for all perturbations within an $\\ell_2$ ball. Finally, a comprehensive set of experiments shows the performance of LipSim in terms of natural and certified scores and on the image retrieval application.", "keywords": "Perceptual similarity metric;certified defense;deep learning", "primary_area": "metric learning, kernel learning, and sparse coding", "supplementary_material": "/attachment/ac1c68fa813678f31bc42d9bafc10c7a84a43cd6.zip", "author": "Sara Ghazanfari;Alexandre Araujo;Prashanth Krishnamurthy;Farshad Khorrami;Siddharth Garg", "authorids": "~Sara_Ghazanfari1;~Alexandre_Araujo3;~Prashanth_Krishnamurthy1;~Farshad_Khorrami1;~Siddharth_Garg1", "gender": "F;;M;M;M", "homepage": "https://saraghazanfari.github.io/;;https://engineering.nyu.edu/faculty/farshad-khorrami;http://engineering.nyu.edu/people/siddharth-garg/;https://alexandrearaujo.com/", "dblp": "236/6982;24/3420;94/5644;94/3807;228/6599", "google_scholar": "0dMW47QAAAAJ;W-_zgGgAAAAJ;NdOqlPQAAAAJ;https://scholar.google.com.tw/citations?user=Yf8OqQQAAAAJ;https://scholar.google.fr/citations?user=wsu61VYAAAAJ", "orcid": ";;;;", "linkedin": "sara-ghazanfari-1a8b37163/;;;;", "or_profile": "~Sara_Ghazanfari1;~Prashanth_Krishnamurthy1;~Farshad_Khorrami1;~Siddharth_Garg1;~Alexandre_ARAUJO1", "aff": "New York University;New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;Research Scientist and Adjunct Faculty;Full Professor;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nghazanfari2024lipsim,\ntitle={LipSim: A Provably Robust Perceptual Similarity Metric},\nauthor={Sara Ghazanfari and Alexandre Araujo and Prashanth Krishnamurthy and Farshad Khorrami and Siddharth Garg},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=0w42S2Gp70}\n}", "github": "", "project": "", "reviewers": "YdcH;KFDY;sTNV", "pdf_size": 36179352, "rating": "5;5;6", "confidence": "2;2;3", "soundness": "3;2;3", "contribution": "3;2;3", "presentation": "3;2;3", "wc_summary": "101;38;67", "wc_strengths": "76;24;60", "wc_weaknesses": "57;65;71", "wc_questions": "1;2;211", "wc_review": "235;129;409", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "376;1105;1711", "reply_reviewers": "0;0;0", "reply_authors": "1;2;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 68.66666666666667, 25.746628689770024 ], "wc_strengths_avg": [ 53.333333333333336, 21.74600857373345 ], "wc_weaknesses_avg": [ 64.33333333333333, 5.734883511361751 ], "wc_questions_avg": [ 71.33333333333333, 98.76009090495795 ], "wc_review_avg": [ 257.6666666666667, 115.42770705318354 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1064.0, 545.782007765005 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13175108423726794276&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=0w42S2Gp70", "pdf": "https://openreview.net/pdf?id=0w42S2Gp70", "email": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "0xLWPdObG1", "title": "Subject-specific Deep Neural Networks for Count Data with High-cardinality Categorical Features", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "There is a growing interest in subject-specific predictions using deep neural networks (DNNs) because real-world data often exhibit correlations, which has been typically overlooked in traditional DNN frameworks. In this paper, we propose a novel hierarchical likelihood learning framework for introducing gamma random effects into the Poisson DNN, so as to improve the prediction performance by capturing both nonlinear effects of input variables and subject-specific cluster effects. The proposed method simultaneously yields maximum likelihood estimators for fixed parameters and best unbiased predictors for random effects by optimizing a single objective function. This approach enables a fast end-to-end algorithm for handling clustered count data, which often involve high-cardinality categorical features. Furthermore, state-of-the-art network architectures can be easily implemented into the proposed h-likelihood framework. As an example, we introduce multi-head attention layer and a sparsemax function, which allows feature selection in high-dimensional settings. To enhance practical performance and learning efficiency, we present an adjustment procedure for prediction of random parameters and a method-of-moments estimator for pretraining of variance component. Various experiential studies and real data analyses confirm the advantages of our proposed methods.", "keywords": "subject-specific prediction;random effect;high-cardinality categorical feature;count data;clustered data;hierarchical likelihood;deep learning", "primary_area": "learning theory", "supplementary_material": "/attachment/0cd785bb937afe682c53cc975c89f7e8433e7e6b.zip", "author": "Hangbin Lee;IL DO HA;Changha Hwang;Youngjo Lee", "authorids": "~Hangbin_Lee1;~IL_DO_HA1;~Changha_Hwang1;~Youngjo_Lee2", "gender": "M;M;M;M", "homepage": ";https://stat-eng.pknu.ac.kr/stat-eng/2348;https://www.dankook.ac.kr/web/kor/-167?p_p_id=DeptInfo_WAR_empInfoportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_count=1&_DeptInfo_WAR_empInfoportlet_empId=JIeao4QTeONm1350kqqKBw%3D%3D&_DeptInfo_WAR_empInfoportlet_action=view_message;", "dblp": "321/6052;;;", "google_scholar": "https://scholar.google.co.kr/citations?user=Bow6RRIAAAAJ;;;yEoD89QAAAAJ", "orcid": "0000-0002-3447-4306;;;0000-0001-9820-6434", "linkedin": "hangbin-lee-b415b1172/;;;", "or_profile": "~Hangbin_Lee1;~IL_DO_HA1;~Changha_Hwang1;~Youngjo_Lee2", "aff": "Seoul National University;;;Seoul National University", "aff_domain": "snu.ac.kr;;;snu.ac.kr", "position": "Postdoc;;;Emeritus", "bibtex": "@misc{\nlee2024subjectspecific,\ntitle={Subject-specific Deep Neural Networks for Count Data with High-cardinality Categorical Features},\nauthor={Hangbin Lee and IL DO HA and Changha Hwang and Youngjo Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=0xLWPdObG1}\n}", "github": "", "project": "", "reviewers": "HXxH;ZRFo;z2gb", "site": "https://openreview.net/forum?id=0xLWPdObG1", "pdf_size": 690080, "rating": "3;6;6", "confidence": "3;2;3", "soundness": "2;4;3", "contribution": "2;3;3", "presentation": "2;3;2", "wc_summary": "66;53;72", "wc_strengths": "38;51;140", "wc_weaknesses": "574;53;258", "wc_questions": "128;106;119", "wc_review": "806;263;589", "wc_reply_reviewers": "376;47;35", "wc_reply_authors": "2441;903;1101", "reply_reviewers": "2;1;1", "reply_authors": "5;2;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 63.666666666666664, 7.93025150224688 ], "wc_strengths_avg": [ 76.33333333333333, 45.330882286680556 ], "wc_weaknesses_avg": [ 295.0, 214.30041219434614 ], "wc_questions_avg": [ 117.66666666666667, 9.030811456096044 ], "wc_review_avg": [ 552.6666666666666, 223.16262132255832 ], "wc_reply_reviewers_avg": [ 152.66666666666666, 157.99648378647194 ], "wc_reply_authors_avg": [ 1481.6666666666667, 683.1502193677627 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=929589920201687781&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "0xT87opqKV", "title": "ProteinAdapter: Adapting Pre-trained Large Protein Models for Efficient Protein Representation Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The study of proteins is crucial in various scientific disciplines, but understanding their intricate multi-level relationships remains challenging. Recent advancements in Large Protein Models (LPMs) have demonstrated their ability in sequence and structure understanding, suggesting the potential of directly using them for efficient protein representation learning. In this work, we introduce ProteinAdapter, to efficiently transfer the general reference from the multiple Large Protein Models (LPMs), e.g., ESM-1b, to the task-specific knowledge. ProteinAdapter could largely save labor-intensive analysis on the 3D position and the amino acid order. We observe that such a simple yet effective approach works well on multiple downstream tasks. Specifically, (1) with limited extra parameters, ProteinAdapter enables multi-level protein representation learning by integrating both sequence and geometric structure embeddings from LPMs. (2) Based on the learned embedding, we further scale the proposed ProteinAdapter to multiple conventional protein tasks. Considering different task priors, we propose a unified multi-scale predictor to fully take advantage of the learned embeddings via task-specific focus. Extensive experiments on over 20 tasks show that ProteinAdapter outperforms state-of-the-art methods under both single-task and multi-task settings. We hope that the proposed method could accelerate the study of protein analysis in the future.", "keywords": "Pretrained Large Models;Parameter-Efficient Fine-tuning;Protein Representation Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Chao Wang;Zhedong Zheng;Yifan Sun;Hehe Fan;Yi Yang", "authorids": "~Chao_Wang31;~Zhedong_Zheng1;~Yifan_Sun2;~Hehe_Fan1;~Yi_Yang22", "gender": "M;M;M;M;M", "homepage": ";http://zdzheng.xyz;https://yifansun-reid.github.io;https://hehefan.github.io;https://person.zju.edu.cn/yiyang", "dblp": ";190/7710;99/10261-3.html;184/5722.html;33/4854-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;XT17oUEAAAAJ;uUZEL7UAAAAJ;hVuflMQAAAAJ;RMSuNFwAAAAJ", "orcid": "0000-0003-1297-768X;0000-0002-2434-9050;0000-0003-3532-6521;0000-0001-9572-2345;", "linkedin": ";zhedongzheng;;;", "or_profile": "~Chao_Wang31;~Zhedong_Zheng1;~Yifan_Sun2;~Hehe_Fan1;~Yi_Yang22", "aff": "University of Technology Sydney;University of Macau;Baidu;Zhejiang University;Zhejiang University", "aff_domain": "uts.edu.au;um.edu.mo;baidu.com;zju.edu.cn;zju.edu.cn", "position": "PhD student;Assistant Professor;Senior Expert;Assistant Professor;Full Professor", "bibtex": "@misc{\nwang2024proteinadapter,\ntitle={ProteinAdapter: Adapting Pre-trained Large Protein Models for Efficient Protein Representation Learning},\nauthor={Chao Wang and Zhedong Zheng and Yifan Sun and Hehe Fan and Yi Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=0xT87opqKV}\n}", "github": "", "project": "", "reviewers": "ZPKb;PNTU;XhEm;vd8N", "site": "https://openreview.net/forum?id=0xT87opqKV", "pdf_size": 970157, "rating": "3;3;5;5", "confidence": "3;3;4;5", "soundness": "1;3;3;2", "contribution": "2;2;2;3", "presentation": "1;3;3;3", "wc_summary": "87;47;56;114", "wc_strengths": "72;24;65;26", "wc_weaknesses": "525;60;62;242", "wc_questions": "110;56;28;22", "wc_review": "794;187;211;404", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 76.0, 26.485845276298054 ], "wc_strengths_avg": [ 46.75, 21.901769334919038 ], "wc_weaknesses_avg": [ 222.25, 189.77140854196134 ], "wc_questions_avg": [ 54.0, 34.785054261852174 ], "wc_review_avg": [ 399.0, 243.07303429216495 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NAGG0TncKEAJ:scholar.google.com/&scioq=ProteinAdapter:+Adapting+Pre-trained+Large+Protein+Models+for+Efficient+Protein+Representation+Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "University of Technology Sydney;University of Macau;Baidu;Zhejiang University", "aff_unique_dep": ";;Baidu, Inc.;", "aff_unique_url": "https://www.uts.edu.au;https://www.um.edu.mo;https://www.baidu.com;https://www.zju.edu.cn", "aff_unique_abbr": "UTS;UM;Baidu;ZJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Macau SAR", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Australia;China" }, { "id": "0y0yOpI4wx", "title": "General-Purpose In-Context Learning by Meta-Learning Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modern machine learning requires system designers to specify aspects of the learning pipeline, such as losses, architectures, and optimizers. Meta-learning, or learning-to-learn, instead aims to learn those aspects, and promises to unlock greater capabilities with less manual effort. One particularly ambitious goal of meta-learning is to train general-purpose in-context learning algorithms from scratch, using only black-box models with minimal inductive bias. Such a model takes in training data, and produces test-set predictions across a wide range of problems, without any explicit definition of an inference model, training loss, or optimization algorithm. In this paper we show that Transformers and other black-box models can be meta-trained to act as general-purpose in-context learners. We characterize transitions between algorithms that generalize, algorithms that memorize, and algorithms that fail to meta-train at all, induced by changes in model size, number of tasks, and meta-optimization. We further show that the capabilities of meta-trained algorithms are bottlenecked by the accessible state size (memory) determining the next prediction, unlike standard models which are thought to be bottlenecked by parameter count. Finally, we propose practical interventions such as biasing the training distribution that improve the meta-training and meta-generalization of general-purpose in-context learning algorithms.", "keywords": "general-purpose;in-context;in-context learning;transformers;black-box;generalization", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Louis Kirsch;James Harrison;Jascha Sohl-Dickstein;Luke Metz", "authorids": "~Louis_Kirsch1;~James_Harrison1;~Jascha_Sohl-Dickstein2;~Luke_Metz1", "gender": ";;M;M", "homepage": "http://louiskirsch.com;;http://lukemetz.com;http://sohldickstein.com", "dblp": "202/2379;;;51/7117", "google_scholar": "w8AkOEAAAAAJ;-tEiRFcAAAAJ;jCOmCb4AAAAJ;-3zYIjQAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Louis_Kirsch1;~James_Harrison1;~Luke_Metz1;~Jascha_Sohl-Dickstein1", "aff": "Scuola universitaria professionale della Svizzera italiana (SUPSI);Google;Google;Google", "aff_domain": "supsi.ch;google.com;google.com;google.com", "position": "PhD student;Researcher;Research Scientist;Research Scientist", "bibtex": "@misc{\nkirsch2024generalpurpose,\ntitle={General-Purpose In-Context Learning by Meta-Learning Transformers},\nauthor={Louis Kirsch and James Harrison and Jascha Sohl-Dickstein and Luke Metz},\nyear={2024},\nurl={https://openreview.net/forum?id=0y0yOpI4wx}\n}", "github": "", "project": "", "reviewers": "9s7H;j4eV;zaZF;ohZB", "site": "https://openreview.net/forum?id=0y0yOpI4wx", "pdf_size": 3489880, "rating": "1;3;5;6", "confidence": "1;4;2;3", "soundness": "1;2;3;3", "contribution": "1;1;2;2", "presentation": "1;1;3;2", "wc_summary": "88;58;223;60", "wc_strengths": "1;32;99;49", "wc_weaknesses": "1;193;130;119", "wc_questions": "1;9;52;9", "wc_review": "91;292;504;237", "wc_reply_reviewers": "10;0;42;0", "wc_reply_authors": "188;1022;792;405", "reply_reviewers": "1;0;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 107.25, 67.87258282988795 ], "wc_strengths_avg": [ 45.25, 35.48503205578375 ], "wc_weaknesses_avg": [ 110.75, 69.3699322473361 ], "wc_questions_avg": [ 17.75, 20.04214309898021 ], "wc_review_avg": [ 281.0, 148.22786512663535 ], "wc_reply_reviewers_avg": [ 13.0, 17.233687939614086 ], "wc_reply_authors_avg": [ 601.75, 325.07874046144576 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40755575681770734, "gs_citation": 83, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5000755914310791476&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Scuola universitaria professionale della Svizzera italiana;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.supsi.ch;https://www.google.com", "aff_unique_abbr": "SUPSI;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Switzerland;United States" }, { "id": "0ypXhS83Lh", "title": "Robust Reinforcement Learning with Structured Adversarial Ensemble", "track": "main", "status": "Reject", "tldr": "", "abstract": "Although reinforcement learning (RL) is considered the gold standard for policy design, it may not always provide a robust solution in various scenarios. This can result in severe performance degradation when the environment is exposed to potential disturbances. Adversarial training using a two-player max-min game has been proven effective in enhancing the robustness of RL agents. However, we observe two severe problems pertaining to this approach: ($\\textit{i}$) the potential $\\textit{over-optimism}$ caused by the difficulty of the inner optimization problem, and ($\\textit{ii}$) the potential $\\textit{over-pessimism}$ caused by the selection of a candidate adversary set that may include unlikely scenarios. To this end, we extend the two-player game by introducing an adversarial ensemble, which involves a group of adversaries. We theoretically establish that an adversarial ensemble can efficiently and effectively obtain improved solutions to the inner optimization problem, alleviating the over-optimism. Then we address the over-pessimism by replacing the worst-case performance in the inner optimization with the average performance over the worst-$k$ adversaries. Our proposed algorithm significantly outperforms other robust RL algorithms that fail to address these two problems, corroborating the importance of the identified problems. Extensive experimental results demonstrate that the proposed algorithm consistently generate policies with enhanced robustness.", "keywords": "Reinforcement Learning;Robustness;Ensemble Methods", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/8a6e2b5cd3b459e26fbb53edb0cfd2a7d8d36d77.zip", "author": "Juncheng Dong;Hao-Lun Hsu;Qitong Gao;Vahid Tarokh;Miroslav Pajic", "authorids": "~Juncheng_Dong1;~Hao-Lun_Hsu1;~Qitong_Gao1;~Vahid_Tarokh1;~Miroslav_Pajic2", "gender": ";M;M;;M", "homepage": ";https://hlhsu.github.io/;http://qitonggao.com;;http://people.duke.edu/~mp275/", "dblp": ";303/0321;238/5422;;74/7446.html", "google_scholar": ";h9qf9vUAAAAJ;Flv4SrsAAAAJ;;Fbn21-8AAAAJ", "orcid": ";;;;", "linkedin": ";hlhsu/;qitong-gao;;", "or_profile": "~Juncheng_Dong1;~Hao-Lun_Hsu1;~Qitong_Gao1;~Vahid_Tarokh1;~Miroslav_Pajic2", "aff": ";Duke University;Duke University;;Duke University", "aff_domain": ";duke.edu;duke.edu;;duke.edu", "position": ";PhD student;PhD student;;Associate Professor", "bibtex": "@misc{\ndong2024robust,\ntitle={Robust Reinforcement Learning with Structured Adversarial Ensemble},\nauthor={Juncheng Dong and Hao-Lun Hsu and Qitong Gao and Vahid Tarokh and Miroslav Pajic},\nyear={2024},\nurl={https://openreview.net/forum?id=0ypXhS83Lh}\n}", "github": "", "project": "", "reviewers": "c2QC;APuc;1Lc9", "site": "https://openreview.net/forum?id=0ypXhS83Lh", "pdf_size": 1058130, "rating": "3;6;6", "confidence": "5;4;4", "soundness": "3;3;3", "contribution": "2;4;3", "presentation": "3;3;3", "wc_summary": "175;49;79", "wc_strengths": "6;64;77", "wc_weaknesses": "185;629;124", "wc_questions": "2;1;4", "wc_review": "368;743;284", "wc_reply_reviewers": "612;40;0", "wc_reply_authors": "2417;1437;453", "reply_reviewers": "2;1;0", "reply_authors": "5;4;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.0, 53.74011537017761 ], "wc_strengths_avg": [ 49.0, 30.865298745786774 ], "wc_weaknesses_avg": [ 312.6666666666667, 225.06344784428137 ], "wc_questions_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_review_avg": [ 465.0, 199.54448125668623 ], "wc_reply_reviewers_avg": [ 217.33333333333334, 279.5488428800154 ], "wc_reply_authors_avg": [ 1435.6666666666667, 801.8001967794443 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wpNqysSE94QJ:scholar.google.com/&scioq=Robust+Reinforcement+Learning+with+Structured+Adversarial+Ensemble&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0zIKlb0prF", "title": "MPPN: Multi-Resolution Periodic Pattern Network For Long-Term Time Series Forecasting", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Long-term time series forecasting plays an important role in various real-world scenarios. Recent deep learning methods for long-term series forecasting tend to capture the intricate patterns of time series by Transformer-based or sampling-based methods. However, most of the extracted patterns are relatively simplistic and may include unpredictable noise. Moreover, the multivariate series forecasting methods usually ignore the individual characteristics of each variate, which may affect the prediction accuracy. To capture the intrinsic patterns of time series, we propose a novel deep learning network architecture, named Multi-resolution Periodic Pattern Network (MPPN), for long-term series forecasting. We first construct context-aware multi-resolution semantic units of time series and employ multi-periodic pattern mining to capture the key patterns of time series. Then, we propose a channel adaptive module to capture the multivariate perceptions towards different patterns. In addition, we adopt an entropy-based method for evaluating the predictability of time series and providing an upper bound on the prediction accuracy before forecasting. Our experimental evaluation on nine real-world benchmarks demonstrated that MPPN significantly outperforms the state-of-the-art Transformer-based, sampling-based and pre-trained methods for long-term series forecasting.", "keywords": "Long-term time series forecasting;Multi-resolution periodic pattern;Channel adaption;Multivariate time series prediction.", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/03b59cd489129bf92c72389d57e0357247186d74.zip", "author": "Xing Wang;Zhendong Wang;Kexin Yang;zhiyan song;Lin Zhu;Chao Deng;Junlan Feng", "authorids": "~Xing_Wang5;~Zhendong_Wang5;~Kexin_Yang4;~zhiyan_song1;~Lin_Zhu6;~Chao_Deng4;~Junlan_Feng3", "gender": "F;M;F;F;F;M;F", "homepage": "https://orcid.org/0000-0003-3148-8291;;;https://scholar.google.com/citations?authuser=1&user=teprBfAAAAAJ;;;", "dblp": "02/3674;153/2385;54/774;302/7504;;;36/3948", "google_scholar": ";;;https://scholar.google.com/citations?authuser=1;;https://scholar.google.com/citations?hl=en;https://scholar.google.es/citations?user=rBjPtmQAAAAJ", "orcid": ";0000-0001-9530-2906;0009-0001-0031-7644;0000-0003-2138-3154;0000-0003-1167-1953;0000-0003-4449-5247;0000-0001-5292-2945", "linkedin": ";;;;;https://www.linkedin.cn/incareer/in/ACoAAB5sppAB_Da2tlvgSyM7NFTWl6d1DhZZe1o;junlan-feng-8968ba11/", "or_profile": "~Xing_Wang5;~Zhendong_Wang5;~Kexin_Yang4;~zhiyan_song1;~Lin_Zhu6;~Chao_Deng4;~Junlan_Feng3", "aff": "China Mobile Research Institute;China Mobile Research Institute;China Mobile Research Institute;China Mobile Research Institute;China Mobile research institute;China Mobile Research Institute;China Mobile", "aff_domain": "chinamobile.com;chinamobile.com;chinamobile.com;chinamobile.com;chinamobile.com;jiutian.10086.cn;ioa.ac.cn", "position": "Researcher;Researcher;Researcher;Employee;Full Professor;Researcher;Principal Researcher", "bibtex": "@misc{\nwang2024mppn,\ntitle={{MPPN}: Multi-Resolution Periodic Pattern Network For Long-Term Time Series Forecasting},\nauthor={Xing Wang and Zhendong Wang and Kexin Yang and zhiyan song and Lin Zhu and Chao Deng and Junlan Feng},\nyear={2024},\nurl={https://openreview.net/forum?id=0zIKlb0prF}\n}", "github": "", "project": "", "reviewers": "saoQ;pRbf;fdFp;FFSt;qZtm", "site": "https://openreview.net/forum?id=0zIKlb0prF", "pdf_size": 693698, "rating": "3;5;5;5;5", "confidence": "4;4;4;3;4", "soundness": "2;2;2;3;3", "contribution": "2;2;2;2;3", "presentation": "3;1;2;3;3", "wc_summary": "74;32;69;131;119", "wc_strengths": "118;28;34;249;36", "wc_weaknesses": "349;228;283;288;290", "wc_questions": "5;5;5;265;26", "wc_review": "546;293;391;933;471", "wc_reply_reviewers": "45;71;37;0;163", "wc_reply_authors": "705;697;708;1169;742", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 85.0, 35.93883693165375 ], "wc_strengths_avg": [ 93.0, 84.75376097849582 ], "wc_weaknesses_avg": [ 287.6, 38.338492406457505 ], "wc_questions_avg": [ 61.2, 102.22406761619301 ], "wc_review_avg": [ 526.8, 219.81119170779272 ], "wc_reply_reviewers_avg": [ 63.2, 54.83210738244519 ], "wc_reply_authors_avg": [ 804.2, 183.04906446087074 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2119790206527010367&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "China Mobile", "aff_unique_dep": "Research Institute", "aff_unique_url": "https://www.chinamobile.com/", "aff_unique_abbr": "CMRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "10BTKkFfhl", "title": "Efficient Backdoor Mitigation in Federated Learning with Contrastive Loss", "track": "main", "status": "Reject", "tldr": "", "abstract": "Due to the data-driven nature of deep neural networks and privacy concerns around user data, a backdoor could be easily injected into deep neural networks in federated learning without attracting the attention of users. An affected global model operates normally as a clean model in regular tasks and behaves differently when the trigger is presented. In this paper, we propose a novel reverse engineering approach to detect and mitigate the backdoor attack in federated learning by adopting a self-supervised Contrastive learning loss. In contrast to existing reverse engineering techniques, such as Neural Cleanse, which involve iterating through each class in the dataset, we employ the contrastive loss as a whole to identify triggers in the backdoored model. Our method compares the last-layer feature outputs of a potentially affected model with these from a clean one preserved beforehand to reconstruct the trigger under the guidance of the contrastive loss. The reverse-engineered trigger is then applied to patch the affected global model to remove the backdoor. If the global model is free from backdoors, the Contrastive loss will lead to either a blank trigger or one with random pattern. We evaluated the proposed method on three datasets under two backdoor attacks and compared it against three existing defense methods. Our results showed that while many popular reverse engineering algorithms were successful in centralized learning settings, they had difficulties detecting backdoors in federated learning, including Neural Cleanse, TABOR, and DeepInspect. Our method successfully detected backdoors in federated learning and was more time-efficient.", "keywords": "Backdoor Defense; Federated Learning; Contrastive Loss", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Hal Ferguson;Rui Ning;Jiang Li;Hongyi Wu;Chunsheng Xin", "authorids": "~Hal_Ferguson1;~Rui_Ning2;~Jiang_Li3;~Hongyi_Wu1;~Chunsheng_Xin1", "gender": "M;;M;M;M", "homepage": ";https://www.lions.odu.edu/~rning/;https://fs.wp.odu.edu/jli/;http://www.u.arizona.edu/~mhwu/;https://ww1.odu.edu/eng/programs/ccni/people", "dblp": ";211/2892;41/3068-1;78/1033;06/6463", "google_scholar": ";oN4NttEAAAAJ;https://scholar.google.com/citations?hl=en;rFLksrwAAAAJ;TaP2oq8AAAAJ", "orcid": ";;;;", "linkedin": "hal-ferguson;;jiang-li-416b0a6/;hongyi-\u201cmichael\u201d-wu-4b51a715/;", "or_profile": "~Hal_Ferguson1;~Rui_Ning2;~Jiang_Li3;~Hongyi_Wu1;~Chunsheng_Xin1", "aff": "Old Dominion University;Old Dominion University;Old Dominion University;University of Arizona;Old Dominion University", "aff_domain": "odu.edu;odu.edu;odu.edu;arizona.edu;odu.edu", "position": "PhD student;Assistant Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nferguson2024efficient,\ntitle={Efficient Backdoor Mitigation in Federated Learning with Contrastive Loss},\nauthor={Hal Ferguson and Rui Ning and Jiang Li and Hongyi Wu and Chunsheng Xin},\nyear={2024},\nurl={https://openreview.net/forum?id=10BTKkFfhl}\n}", "github": "", "project": "", "reviewers": "7bgg;58k3;pCAH;LVhf", "site": "https://openreview.net/forum?id=10BTKkFfhl", "pdf_size": 527904, "rating": "1;3;3;3", "confidence": "4;3;3;5", "soundness": "3;2;2;2", "contribution": "1;2;2;2", "presentation": "1;2;2;2", "wc_summary": "107;103;93;89", "wc_strengths": "98;38;70;6", "wc_weaknesses": "268;148;339;144", "wc_questions": "92;4;21;12", "wc_review": "565;293;523;251", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 7.280109889280518 ], "wc_strengths_avg": [ 53.0, 34.45286635390443 ], "wc_weaknesses_avg": [ 224.75, 82.66612062991706 ], "wc_questions_avg": [ 32.25, 35.01696017646306 ], "wc_review_avg": [ 408.0, 137.6117727521886 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0LzHeHmJ7u0J:scholar.google.com/&scioq=Efficient+Backdoor+Mitigation+in+Federated+Learning+with+Contrastive+Loss&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Old Dominion University;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.odu.edu;https://www.arizona.edu", "aff_unique_abbr": "ODU;UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "10eQ4Cfh8p", "title": "SIMULTANEOUS GENERATION AND IMPROVEMENT: A UNIFIED RL PARADIGM FOR FJSP OPTIMIZATION", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present an end-to-end reinforcement learning framework designed to address the Flexible Job Shop Problem (FJSP). Our approach consists of two primary components: a generative model that produces problem solutions stepwise, and a secondary model that continually refines these (partial) solutions. Importantly, we train both models concurrently, enabling each to be cognizant of the other's policy and make informed decisions. Extensive experimentation demonstrates that our model delivers better performance in shorter time on several public datasets comparing to baseline algorithms. Furthermore, we highlight the superior generalizability of our approach, as it maintains strong performance on large-scale instances even when trained on small-scale instances. It is worth noting that this training paradigm can be readily adapted to other combinatorial optimization problems, such as the traveling salesman problemand beyond.", "keywords": "Reinforcement Learning;Flexible Job Shop Schedule Problem;FJSP", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/6f19b6b9b383ac782203728c6d1a9961bcdca867.zip", "author": "Hongyi Zhou;Lei Song", "authorids": "~Hongyi_Zhou2;~Lei_Song3", "gender": "M;M", "homepage": ";", "dblp": ";76/893-1.html", "google_scholar": ";pXDSOocAAAAJ", "orcid": ";", "linkedin": "%E5%BC%98%E6%AF%85-%E5%91%A8-5b0112251/;", "or_profile": "~Hongyi_Zhou2;~Lei_Song3", "aff": "Microsoft Research;Microsoft", "aff_domain": "research.microsoft.com;microsoft.com", "position": "Intern;Principal Researcher", "bibtex": "@misc{\nzhou2024simultaneous,\ntitle={{SIMULTANEOUS} {GENERATION} {AND} {IMPROVEMENT}: A {UNIFIED} {RL} {PARADIGM} {FOR} {FJSP} {OPTIMIZATION}},\nauthor={Hongyi Zhou and Lei Song},\nyear={2024},\nurl={https://openreview.net/forum?id=10eQ4Cfh8p}\n}", "github": "", "project": "", "reviewers": "kC5r;ZW2D;swqB;9qF7", "site": "https://openreview.net/forum?id=10eQ4Cfh8p", "pdf_size": 440085, "rating": "3;3;3;3", "confidence": "4;3;3;3", "soundness": "1;2;2;1", "contribution": "2;2;3;2", "presentation": "1;1;2;1", "wc_summary": "38;167;92;86", "wc_strengths": "29;72;36;77", "wc_weaknesses": "340;389;38;485", "wc_questions": "70;31;82;123", "wc_review": "477;659;248;771", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 1.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_avg": [ 95.75, 46.15395432679631 ], "wc_strengths_avg": [ 53.5, 21.219095173922945 ], "wc_weaknesses_avg": [ 313.0, 167.11822162768487 ], "wc_questions_avg": [ 76.5, 32.8062494046485 ], "wc_review_avg": [ 538.75, 197.95753963918625 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Mg_4JrTQARgJ:scholar.google.com/&scioq=SIMULTANEOUS+GENERATION+AND+IMPROVEMENT:+A+UNIFIED+RL+PARADIGM+FOR+FJSP+OPTIMIZATION&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "10fsmnw6aD", "title": "How Out-of-Distribution important is", "track": "main", "status": "Reject", "tldr": "", "abstract": "Class Incremental Learning (CIL) has gained significant attention in recent years due to its potential to adaptively learn from a non-stationary data distribution. The challenge of CIL primarily revolves around the model's ability to learn new classes without forgetting previously acquired knowledge. Recent research trends has achieved significant milestones, yet the continuity of learning can be further strengthened by integrating the concepts of \"self-training\", \"out-of-distribution\", and \"data drift\". In this paper, we propose a novel approach that integrates \"Continual Learning\", \"Self-Training\", \"Out-of-Distribution recognition\", and \"Data Drift\" concepts to advance the capabilities of class incremental learning systems. Drawing inspiration from works such as \"A Theoretical Study on Solving Continual Learning\", and \"CSI: Novelty Detection via Contrastive Learning on Distributionally Shifted Instances\". We propose a model that satisfies the four concepts mentioned above. Our experimental results demonstrate the efficacy of this method in mitigating catastrophic forgetting and ensuring consistent performance across a diverse range of classes.", "keywords": "continual learning;data drift;out-of-distribution;self training", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Hyowon Park", "authorids": "~Hyowon_Park2", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "hyowon-park-6a7173290/", "or_profile": "~Hyowon_Park2", "aff": "Yonsei University", "aff_domain": "yonsei.ac.kr", "position": "MS student", "bibtex": "@misc{\npark2024how,\ntitle={How Out-of-Distribution important is},\nauthor={Hyowon Park},\nyear={2024},\nurl={https://openreview.net/forum?id=10fsmnw6aD}\n}", "github": "", "project": "", "reviewers": "6xad;fB3F;DuXE;SGSA", "site": "https://openreview.net/forum?id=10fsmnw6aD", "pdf_size": 2332277, "rating": "1;3;3;3", "confidence": "3;4;4;3", "soundness": "1;3;2;3", "contribution": "1;2;2;2", "presentation": "1;3;1;1", "wc_summary": "29;32;18;55", "wc_strengths": "3;20;14;8", "wc_weaknesses": "52;25;80;114", "wc_questions": "11;56;7;3", "wc_review": "95;133;119;180", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_avg": [ 33.5, 13.46291201783626 ], "wc_strengths_avg": [ 11.25, 6.378675411086537 ], "wc_weaknesses_avg": [ 67.75, 33.03312731183652 ], "wc_questions_avg": [ 19.25, 21.405314760591587 ], "wc_review_avg": [ 131.75, 30.994959267597046 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "id": "11WAKGH8uv", "title": "FedAIoT: A Federated Learning Benchmark for Artificial Intelligence of Things", "track": "main", "status": "Reject", "tldr": "", "abstract": "There is a significant relevance of federated learning (FL) in the realm of Artificial Intelligence of Things (AIoT). However, most of existing FL works are not conducted on datasets collected from authentic IoT devices that capture unique modalities and inherent challenges of IoT data. In this work, we introduce FedAIoT, a FL benchmark for AIoT to fill this critical gap. FedAIoT includes eight well-chosen datatsets collected from a wide range of IoT devices. These datasets cover unique IoT modalities and target representative applications of AIoT. In addition, FedAIoT includes a unified end-to-end FL framework for AIoT that simplifies benchmarking the performance of the datasets. Our benchmark results shed light on the opportunities and challenges of FL for AIoT. We hope that FedAIoT could serve as an invaluable resource for researchers and practitioners to foster advancements in the important field of FL for AIoT.", "keywords": "AIoT;Federated Learning", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/95a9180a3c9425b6d11c888e6b5e0f6dd01cc109.pdf", "author": "Samiul Alam;Tuo Zhang;Tiantian Feng;Hui Shen;Zhichao Cao;Dong Zhao;Jeonggil Ko;Kiran Somasundaram;Shrikanth Narayanan;Salman Avestimehr;Mi Zhang", "authorids": "~Samiul_Alam1;~Tuo_Zhang2;~Tiantian_Feng1;~Hui_Shen2;~Zhichao_Cao1;~Dong_Zhao1;~Jeonggil_Ko1;~Kiran_Somasundaram1;~Shrikanth_Narayanan1;~Salman_Avestimehr1;~Mi_Zhang1", "gender": "M;M;M;M;M;M;;M;M;;M", "homepage": "https://samiul272.github.io;;https://tiantiaf0627.github.io/;https://nastymarcus.github.io/;http://cse.msu.edu/~caozc/;https://hbsl.msu.edu/;http://eis-lab.org;https://www.linkedin.com/in/kiran-somasundaram/;http://sail.usc.edu/people/shri.html;;https://mi-zhang.github.io/", "dblp": "222/1821;;;;;;;;19/3899;;84/2519-2.html", "google_scholar": "2Un1c7QAAAAJ;Rki45F4AAAAJ;p7oF-XIAAAAJ;iblw0zoAAAAJ;;;;https://scholar.google.com/citations?hl=en;8EDHmYkAAAAJ;;https://scholar.google.com.tw/citations?user=r3A90uAAAAAJ", "orcid": "0000-0002-8458-4642;;0000-0002-2053-9068;;;;;;0000-0002-1052-6204;;", "linkedin": "samiul-alam/;tuo-zhang-ultraz/;;hui-shen-8b4a86260/;;;;kiran-somasundaram/;shrikanth-narayanan/;;mizhang/", "or_profile": "~Samiul_Alam1;~Tuo_Zhang2;~Tiantian_Feng1;~Hui_Shen2;~Zhichao_Cao1;~Dong_Zhao1;~Jeonggil_Ko1;~Kiran_Somasundaram1;~Shrikanth_Narayanan1;~Salman_Avestimehr1;~Mi_Zhang1", "aff": "Ohio State University, Columbus;University of Southern California;University of Southern California;Ohio State University, Columbus;Michigan State University;Michigan State University;Yonsei University;Meta Facebook;University of Southern California;;The Ohio State University", "aff_domain": "osu.edu;usc.edu;usc.edu;osu.edu;msu.edu;msu.edu;yonsei.ac.kr;meta.com;usc.edu;;osu.edu", "position": "PhD student;PhD student;Postdoc;Undergrad student;Assistant Professor;Associate Professor;Associate Professor;Researcher;Full Professor;;Associate Professor", "bibtex": "@misc{\nalam2024fedaiot,\ntitle={Fed{AI}oT: A Federated Learning Benchmark for Artificial Intelligence of Things},\nauthor={Samiul Alam and Tuo Zhang and Tiantian Feng and Hui Shen and Zhichao Cao and Dong Zhao and Jeonggil Ko and Kiran Somasundaram and Shrikanth Narayanan and Salman Avestimehr and Mi Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=11WAKGH8uv}\n}", "github": "", "project": "", "reviewers": "N2ym;Vfdi;MJiJ;hEF2", "site": "https://openreview.net/forum?id=11WAKGH8uv", "pdf_size": 835320, "rating": "3;5;5;6", "confidence": "4;4;3;3", "soundness": "2;2;2;2", "contribution": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "302;27;54;56", "wc_strengths": "224;25;78;97", "wc_weaknesses": "706;211;111;13", "wc_questions": "248;190;92;73", "wc_review": "1480;453;335;239", "wc_reply_reviewers": "0;86;0;24", "wc_reply_authors": "30;691;1109;593", "reply_reviewers": "0;2;0;1", "reply_authors": "1;7;6;7", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 109.75, 111.58488920996427 ], "wc_strengths_avg": [ 106.0, 73.05819598101229 ], "wc_weaknesses_avg": [ 260.25, 266.7052446053508 ], "wc_questions_avg": [ 150.75, 71.5799378317696 ], "wc_review_avg": [ 626.75, 498.42069329031676 ], "wc_reply_reviewers_avg": [ 27.5, 35.16745654721137 ], "wc_reply_authors_avg": [ 605.75, 384.76575666241405 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 5.25, 2.48746859276655 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14104236724301259853&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1;0;2;2;3;4;1;0", "aff_unique_norm": "Ohio State University;University of Southern California;Michigan State University;Yonsei University;Meta", "aff_unique_dep": ";;;;Meta Platforms, Inc.", "aff_unique_url": "https://www.osu.edu;https://www.usc.edu;https://www.msu.edu;https://www.yonsei.ac.kr;https://meta.com", "aff_unique_abbr": "OSU;USC;MSU;Yonsei;Meta", "aff_campus_unique_index": "0;1;1;0;1", "aff_campus_unique": "Columbus;Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0;0", "aff_country_unique": "United States;South Korea" }, { "id": "11nZWTg0mS", "title": "Moral High Ground: A text-based games benchmark for moral evaluation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper introduces a benchmark for the evaluation of large language models on moral values and business principles. The main focus of this framework is to evaluate moral and ethical reasoning ability of large language models using text-based games, which can be played by both human player and models. We present these games to the player as an interaction between the player and the environment. Each action in these games is associated with a reward based on the moral and ethical values, i.e., higher reward implies higher moral values and vice versa. We score the game trajectory taken by a player by combining the rewards of the individual action, with highest score corresponding with the most moral or ethical paths possible. This will enable us to compare different models and human players on the moral values. In addition, this framework can be used to teach/tune the large language models using these text-based games on desired moral values and business principles. Through this framework, we hope to expand upon the diverse area of alignment techniques to help ensure future models grasp the often nuanced topics of moral and ethical values.", "keywords": "Text-based Games;LLM Evaluation;LLM Tuning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/09e7c11310bcf66c1cd5ac5c23bec7464def7620.zip", "author": "Matthew Pisano;Keerthiram Murugesan;Lamogha Chiazor;Lan Hoang;Karthikeyan Natesan Ramamurthy;Kush R. Varshney", "authorids": "pisanm2@rpi.edu;~Keerthiram_Murugesan1;~Lamogha_Chiazor1;~Lan_Hoang2;~Karthikeyan_Natesan_Ramamurthy1;~Kush_R._Varshney1", "gender": ";M;F;;;M", "homepage": ";https://keerthi166.github.io;;https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Lan.Hoang;https://nrkarthikeyan.github.io/;http://krvarshney.github.io", "dblp": ";178/2877;;;58/7800;", "google_scholar": ";-698GEMAAAAJ;;MtmdB6sAAAAJ;mG8HuhEAAAAJ;hMZMhLoAAAAJ", "orcid": ";0000-0001-6847-522X;;;0000-0002-6021-5930;", "linkedin": ";https://linkedin.com/in/keerthiram;lamogha/;lannhoang/;;kushvarshney", "or_profile": "pisanm2@rpi.edu;~Keerthiram_Murugesan1;~Lamogha_Chiazor1;~Lan_Hoang2;~Karthikeyan_Natesan_Ramamurthy1;~Kush_R._Varshney1", "aff": ";International Business Machines;International Business Machines;International Business Machines;International Business Machines;International Business Machines", "aff_domain": ";ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "position": ";Researcher;Researcher;Researcher;Research Staff Member;Research Staff Member", "bibtex": "@misc{\npisano2024moral,\ntitle={Moral High Ground: A text-based games benchmark for moral evaluation},\nauthor={Matthew Pisano and Keerthiram Murugesan and Lamogha Chiazor and Lan Hoang and Karthikeyan Natesan Ramamurthy and Kush R. Varshney},\nyear={2024},\nurl={https://openreview.net/forum?id=11nZWTg0mS}\n}", "github": "", "project": "", "reviewers": "6LQq;Uvtk;T1SJ;N9dw", "site": "https://openreview.net/forum?id=11nZWTg0mS", "pdf_size": 1999473, "rating": "1;1;5;5", "confidence": "4;4;4;3", "soundness": "1;1;2;3", "contribution": "1;1;2;3", "presentation": "2;1;3;3", "wc_summary": "80;110;45;89", "wc_strengths": "12;50;28;105", "wc_weaknesses": "140;279;266;404", "wc_questions": "291;14;30;68", "wc_review": "523;453;369;666", "wc_reply_reviewers": "29;31;32;0", "wc_reply_authors": "334;245;124;601", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.0, 2.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 81.0, 23.46273641330013 ], "wc_strengths_avg": [ 48.75, 35.16656793035112 ], "wc_weaknesses_avg": [ 272.25, 93.45152486717379 ], "wc_questions_avg": [ 100.75, 111.57816766733535 ], "wc_review_avg": [ 502.75, 108.88612170520172 ], "wc_reply_reviewers_avg": [ 23.0, 13.322912594474229 ], "wc_reply_authors_avg": [ 326.0, 175.3952678951174 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hZsNKwF6wicJ:scholar.google.com/&scioq=Moral+High+Ground:+A+text-based+games+benchmark+for+moral+evaluation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "11oqo92x2Z", "title": "Detection and Segmentation of Solar Farms in Satellite Imagery: A Study of Deep Neural Network Architectures", "track": "main", "status": "Reject", "tldr": "", "abstract": "In line with global sustainability goals, such as the Paris Agreement, accurate mapping and monitoring of solar farms are critical for achieving net zero emissions by 2050. However, many solar installations remain undocumented, posing a challenge. This work introduces Solis-seg, a Deep Neural Network optimized for detecting solar farms in satellite imagery. Solis-seg achieves a mean Intersection over Union (IoU) of 96.26% on a European dataset, outperforming existing solutions.\n\nThe study leans heavily on advances in semantic segmentation and NAS for solar farm detection. Semantic segmentation has evolved through technologies like Fully Convolutional Network (FCN) and U-Net, which have shown strong performance on satellite imagery. In NAS, Differentiable Architecture Search (DARTS) and its variants like Auto-DeepLab (ADL) have become efficient ways to automate the creation of architectures. This study also challenges the prevailing method of using transfer learning from classification tasks for semantic segmentation, suggesting new avenues for research.\n\nThus, this work contributes to both the field of earth observation machine learning and the global transition to renewable energy by providing an efficient, scalable solution for tracking solar installations. We believe that our research offers valuable insights into the application of advanced machine learning techniques for solar farm detection and also encourages further exploration in earth observation and sustainability.", "keywords": "Solar Farms;Detection;Satellite Images;Image Segmentation;Machine Learning;Deep Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Erling Olweus;Ole Jakob Mengshoel", "authorids": "erlingolweus@gmail.com;~Ole_Jakob_Mengshoel1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "erlingolweus@gmail.com;~Ole_Jakob_Mengshoel1", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nolweus2024detection,\ntitle={Detection and Segmentation of Solar Farms in Satellite Imagery: A Study of Deep Neural Network Architectures},\nauthor={Erling Olweus and Ole Jakob Mengshoel},\nyear={2024},\nurl={https://openreview.net/forum?id=11oqo92x2Z}\n}", "github": "", "project": "", "reviewers": "vhXk;2B7A;Co8H;rV62", "site": "https://openreview.net/forum?id=11oqo92x2Z", "pdf_size": 8031806, "rating": "1;1;3;5", "confidence": "4;4;3;4", "soundness": "1;1;2;3", "contribution": "1;2;1;2", "presentation": "2;1;2;3", "wc_summary": "58;29;72;88", "wc_strengths": "76;17;59;84", "wc_weaknesses": "541;174;64;80", "wc_questions": "3;9;70;402", "wc_review": "678;229;265;654", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "69;0;68;75", "reply_reviewers": "0;0;0;0", "reply_authors": "1;0;1;1", "rating_avg": [ 2.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.75, 21.683807322516035 ], "wc_strengths_avg": [ 59.0, 25.874698065871222 ], "wc_weaknesses_avg": [ 214.75, 192.99141820298644 ], "wc_questions_avg": [ 121.0, 164.33958744015393 ], "wc_review_avg": [ 456.5, 210.05773016006813 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 53.0, 30.71644510681534 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16123498411678315838&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8 }, { "id": "122IP1hqTY", "title": "KLIP: Keyword-Guided Language-Image Pretraining for Data-Efficient Domain-Specific Image Captioning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Image captioning aims to generate natural language descriptions for a given image. While recent vision-language models have shown promising progress on this task, it is still challenging to finetune such models for particular domains with limited image-caption training data. To enable domain-specific few-shot image captioning, we propose a Keyword-Guided Language-Image Pretraining (KLIP) scheme, which learns entity-oriented keywords for aligning visual and textual modalities in each data domain for pre-training and fine-tuning. While our pre-training objectives enables the above alignment for vision-language models, the identified keywords further serve as prompts for regularizing the model during the fine-tuning stage. As a result, potential overfitting problems can be mitigated. Extensive experiments on benchmark datasets show that our KLIP performs favorably against state-of-the-art VLMs with various parameter-efficient fine-tuning techniques for domain-specific yet data-efficient image captioning.", "keywords": "Image Captioning;Vision-Language Pretraining", "primary_area": "generative models", "supplementary_material": "", "author": "Chi-Pin Huang;Kai-Po Chang;Fu-En Yang;Chung-Ting Tsai;Yung-Hsuan Lai;Yu-Chiang Frank Wang", "authorids": "~Chi-Pin_Huang2;~Kai-Po_Chang1;~Fu-En_Yang1;~Chung-Ting_Tsai1;~Yung-Hsuan_Lai1;~Yu-Chiang_Frank_Wang2", "gender": "M;M;M;M;M;M", "homepage": "https://jasper0314-huang.github.io/;;https://fuenyang1127.github.io/;https://github.com/tim901231;https://franklin905.github.io/;http://vllab.ee.ntu.edu.tw/ycwang.html", "dblp": "362/2899;234/8190;218/6244;164/5784-2;348/6382;30/1690", "google_scholar": "https://scholar.google.com.tw/citations?user=s8-yTSwAAAAJ;lXC6HbkAAAAJ;https://scholar.google.com.tw/citations?user=k6Iz9VoAAAAJ;;rU7n-9YAAAAJ;HSGvdtoAAAAJ", "orcid": "0009-0003-7738-3054;;0000-0003-0102-7101;;;0000-0002-2333-157X", "linkedin": "chi-pin-huang-b69704208/;%E5%87%B1%E5%8D%9A-%E5%BC%B5-14051a191/?trk=public_profile_browsemap_profile-result-card_result-card_full-click&originalSubdomain=tw;fu-en-yang-77ba7b175/;chung-ting-tsai-76a74b207/;yung-hsuan-lai-1629a7212/;", "or_profile": "~Chi-Pin_Huang2;~Kai-Po_Chang1;~Fu-En_Yang1;~Chung-Ting_Tsai1;~Yung-Hsuan_Lai1;~Yu-Chiang_Frank_Wang2", "aff": "National Taiwan University;National Taiwan University;NVIDIA;National Technological University;National Taiwan University;National Taiwan University", "aff_domain": "ntu.edu.tw;ntu.edu.tw;nvidia.com;ntu.edu;ntu.edu.tw;ntu.edu.tw", "position": "PhD student;PhD student;Researcher;Undergrad student;Research Assistant;Full Professor", "bibtex": "@misc{\nhuang2024klip,\ntitle={{KLIP}: Keyword-Guided Language-Image Pretraining for Data-Efficient Domain-Specific Image Captioning},\nauthor={Chi-Pin Huang and Kai-Po Chang and Fu-En Yang and Chung-Ting Tsai and Yung-Hsuan Lai and Yu-Chiang Frank Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=122IP1hqTY}\n}", "github": "", "project": "", "reviewers": "NtDh;eL8y;uxeH;g8oh;Qycg", "site": "https://openreview.net/forum?id=122IP1hqTY", "pdf_size": 6661439, "rating": "3;5;5;5;6", "confidence": "4;4;3;4;5", "soundness": "2;2;2;2;4", "contribution": "2;2;3;3;4", "presentation": "3;2;2;3;3", "wc_summary": "33;64;186;72;209", "wc_strengths": "25;26;182;139;71", "wc_weaknesses": "132;384;431;446;269", "wc_questions": "33;67;104;330;4", "wc_review": "223;541;903;987;553", "wc_reply_reviewers": "0;0;0;0;10", "wc_reply_authors": "0;0;0;0;41", "reply_reviewers": "0;0;0;0;1", "reply_authors": "0;0;0;0;1", "rating_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.4, 0.8 ], "contribution_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 112.8, 70.74856889011961 ], "wc_strengths_avg": [ 88.6, 62.50951927506721 ], "wc_weaknesses_avg": [ 332.4, 117.87213411150236 ], "wc_questions_avg": [ 107.6, 116.12166033949049 ], "wc_review_avg": [ 641.4, 275.9808689021759 ], "wc_reply_reviewers_avg": [ 2.0, 4.0 ], "wc_reply_authors_avg": [ 8.2, 16.4 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 0.2, 0.4 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3227486121839514, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YZvBVAZEOPQJ:scholar.google.com/&scioq=KLIP:+Keyword-Guided+Language-Image+Pretraining+for+Data-Efficient+Domain-Specific+Image+Captioning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "National Taiwan University;NVIDIA;National Technological University", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.ntu.edu.tw;https://www.nvidia.com;https://www.ntu.edu", "aff_unique_abbr": "NTU;NVIDIA;NTU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "China;United States" }, { "id": "12Acp6ZcRa", "title": "Evaluating the Robustness of Text-to-image Diffusion Models against Real-world Attacks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Text-to-image (T2I) diffusion models (DMs) have shown promise in generating high-quality images from textual descriptions. The real-world applications of these models require particular attention to their safety and fidelity, but this has not been sufficiently explored. \nOne fundamental question is whether the existing T2I DMs are robust against variations over input texts. To answer it, this work provides the first robustness evaluation of T2I DMs against real-world perturbations. Unlike malicious attacks that involve apocryphal alterations to the input texts, we consider a perturbation space spanned by realistic errors (e.g., typo, glyph, phonetic) that humans can make and adopt adversarial attacks to generate worst-case perturbations for robustness evaluation. Given the inherent randomness of the generation process, we develop novel distribution-based objectives to mislead T2I DMs. We optimize the objectives by black-box attacks without any knowledge of the model. Extensive experiments demonstrate the effectiveness of our method for attacking popular T2I DMs and simultaneously reveal their non-trivial robustness issues. Moreover, we provide an in-depth analysis of our method to show that it is not designed to attack the text encoder in T2I DMs solely.", "keywords": "Diffusion Models;Text to Image Generation;Adversarial Attack;Robustness Evaluation", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/62de1fd0cd0182eca39bfc682c6b52555b45714b.zip", "author": "Hongcheng Gao;Hao Zhang;Yinpeng Dong;Zhijie Deng", "authorids": "~Hongcheng_Gao1;~Hao_Zhang2;~Yinpeng_Dong2;~Zhijie_Deng1", "gender": "M;M;M;M", "homepage": "https://gao-hongcheng.github.io/;https://cseweb.ucsd.edu/~haozhang/;https://dongyp13.github.io;https://thudzj.github.io/", "dblp": "318/1404;55/2270-25;183/0980;209/4959", "google_scholar": "https://scholar.google.com/citations?hl=en;H1d4BS8AAAAJ;6_4ad84AAAAJ;J3dR0sUAAAAJ", "orcid": ";;;0000-0002-0932-1631", "linkedin": ";;;", "or_profile": "~Hongcheng_Gao1;~Hao_Zhang2;~Yinpeng_Dong2;~Zhijie_Deng1", "aff": "University of Chinese Academy of Sciences;Carnegie Mellon University;Tsinghua University;Shanghai Jiaotong University", "aff_domain": "ucas.ac.cn;cmu.edu;tsinghua.edu.cn;sjtu.edu.cn", "position": "MS student;PhD student;Postdoc;Assistant Professor", "bibtex": "@misc{\ngao2024evaluating,\ntitle={Evaluating the Robustness of Text-to-image Diffusion Models against Real-world Attacks},\nauthor={Hongcheng Gao and Hao Zhang and Yinpeng Dong and Zhijie Deng},\nyear={2024},\nurl={https://openreview.net/forum?id=12Acp6ZcRa}\n}", "github": "", "project": "", "reviewers": "BaK3;a3Ko;st1Q;BYdd", "site": "https://openreview.net/forum?id=12Acp6ZcRa", "pdf_size": 14098103, "rating": "3;5;6;8", "confidence": "3;4;4;3", "soundness": "2;3;2;4", "contribution": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "91;45;163;63", "wc_strengths": "72;83;125;126", "wc_weaknesses": "306;195;289;82", "wc_questions": "60;10;46;134", "wc_review": "529;333;623;405", "wc_reply_reviewers": "191;0;0;170", "wc_reply_authors": "1291;468;909;780", "reply_reviewers": "1;0;0;2", "reply_authors": "4;2;3;2", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 90.5, 44.95275297465106 ], "wc_strengths_avg": [ 101.5, 24.315632831575655 ], "wc_weaknesses_avg": [ 218.0, 89.17679070251407 ], "wc_questions_avg": [ 62.5, 45.13036671687922 ], "wc_review_avg": [ 472.5, 111.64564478742554 ], "wc_reply_reviewers_avg": [ 90.25, 90.55488667101295 ], "wc_reply_authors_avg": [ 862.0, 295.0466064878564 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4174072518643267646&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Chinese Academy of Sciences;Carnegie Mellon University;Tsinghua University;Shanghai Jiao Tong University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ucas.ac.cn;https://www.cmu.edu;https://www.tsinghua.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "UCAS;CMU;THU;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "12zKEh2APn", "title": "PROSE: Predicting Operators and Symbolic Expressions using Multimodal Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Approximating nonlinear differential equations using a neural network provides a robust and efficient tool for various scientific computing tasks, including real-time predictions, inverse problems, optimal controls, and surrogate modeling. Previous works have focused on embedding dynamical systems into networks through two approaches: learning a single solution operator (i.e., the mapping from input parametrized functions to solutions) or learning the governing system of equations (i.e., the constitutive model relative to the state variables). Both of these approaches yield different representations for the same underlying data or function. Additionally, observing that families of differential equations often share key characteristics which can be leveraged to train one network representation across a wide range of equations. Our method, called Predicting Operators and Symbolic Expressions (PROSE), learns maps from multimodal inputs to multimodal outputs, capable of generating both numerical predictions and mathematical equations. By using a transformer structure and a feature fusion approach, our network can simultaneously embed sets of solution operators for various parametric differential equations using a single trained network. Detailed experiments demonstrate that the network benefits from its multimodal nature, resulting in improved prediction accuracy and better generalization. The network is shown to be able to handle noise in the data and errors in the symbolic representation, including noisy numerical values, model misspecification, and erroneous addition or deletion of terms. PROSE provides a new neural network framework for differential equations which allows for more flexibility and generality in learning operators and governing equations from data.", "keywords": "Neural networks for differential equations;multi-operator learning;learning governing equations;multimodal transformers;symbolic generation", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "", "author": "Yuxuan Liu;Zecheng Zhang;Hayden Schaeffer", "authorids": "~Yuxuan_Liu12;~Zecheng_Zhang2;~Hayden_Schaeffer2", "gender": "M;;Not Specified", "homepage": "https://felix-lyx.github.io/;https://www.math.fsu.edu/~zhang/;https://www.math.ucla.edu/people/ladder/hayden", "dblp": ";;", "google_scholar": "SbieiN4AAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yuxuan_Liu12;~Zecheng_Zhang2;~Hayden_Schaeffer2", "aff": "University of California, Los Angeles;Florida State University;University of California, Los Angeles", "aff_domain": "ucla.edu;fsu.edu;ucla.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nliu2024prose,\ntitle={{PROSE}: Predicting Operators and Symbolic Expressions using Multimodal Transformers},\nauthor={Yuxuan Liu and Zecheng Zhang and Hayden Schaeffer},\nyear={2024},\nurl={https://openreview.net/forum?id=12zKEh2APn}\n}", "github": "", "project": "", "reviewers": "LoCo;62Ac;45hW", "site": "https://openreview.net/forum?id=12zKEh2APn", "pdf_size": 2052077, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "3;2;3", "wc_summary": "166;163;30", "wc_strengths": "83;24;41", "wc_weaknesses": "363;292;43", "wc_questions": "3;406;40", "wc_review": "615;885;154", "wc_reply_reviewers": "0;54;10", "wc_reply_authors": "1475;1349;257", "reply_reviewers": "0;1;1", "reply_authors": "2;2;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 119.66666666666667, 63.415735867019286 ], "wc_strengths_avg": [ 49.333333333333336, 24.796953217863056 ], "wc_weaknesses_avg": [ 232.66666666666666, 137.2111106612321 ], "wc_questions_avg": [ 149.66666666666666, 181.88335700540486 ], "wc_review_avg": [ 551.3333333333334, 301.806045149677 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 23.456816114345575 ], "wc_reply_authors_avg": [ 1027.0, 546.8966995694891 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1192824799122986362&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Los Angeles;Florida State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.fsu.edu", "aff_unique_abbr": "UCLA;FSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "13D1zn0mpd", "title": "Effective and Parameter-Efficient Reusing Fine-Tuned Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Many pre-trained large-scale models provided online have become highly effective in transferring to downstream tasks. At the same time, various task-specific models fine-tuned on these pre-trained models are available online for public use. In practice, collecting task-specific data is labor-intensive and fine-tuning the large pre-trained models is computationally expensive, one can reuse task-specific fine-tuned models to deal with downstream tasks. However, using a model per task causes a heavy burden on storage and serving. Recently, many training-free and parameter-efficient methods have been proposed for merging multiple fine-tuned task-specific models into a single multi-task model. However, these methods exhibit a large accuracy gap compared with using a fine-tuned model per task. In this paper, we propose parameter-efficient methods for Reusing fine-tuned models. For reusing fully fine-tuned models, we inject sparse task vectors to a merged model by magnitude pruning. For reusing LoRA fine-tuned models, we use a lower-rank matrix to approximate the LoRA matrix by singular value decomposition. Extensive experiments conducted on computer vision and natural language process tasks demonstrate the effectiveness and parameter-efficiency of the proposed methods. The proposed methods outperform existing merging models method by a large margin and achieve comparable performance to using a fine-tuned model per task.", "keywords": "merging models;fine-tune models", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/aeef207d505b1d73898732911fffddda6d0fe873.zip", "author": "Weisen Jiang;Baijiong Lin;Han Shi;Yu Zhang;Zhenguo Li;James Kwok", "authorids": "~Weisen_Jiang1;~Baijiong_Lin1;~Han_Shi1;~Yu_Zhang3;~Zhenguo_Li1;~James_Kwok1", "gender": "M;M;M;M;M;", "homepage": "https://wayson-ust.github.io/;https://baijiong-lin.github.io/;https://han-shi.github.io/;http://cse.sustech.edu.cn/faculty/~zhangy/;http://www.ee.columbia.edu/~zgli/;", "dblp": "302/7625;279/2950;;50/671-6;23/6479;", "google_scholar": "https://scholar.google.com/citations?hl=en;KVdbYTYAAAAJ;https://scholar.google.com.hk/citations?user=Johp_14AAAAJ;https://scholar.google.com.hk/citations?user=jaRS5w4AAAAJ;XboZC1AAAAAJ;", "orcid": ";0000-0002-4257-0226;;;;", "linkedin": ";;;;;", "or_profile": "~Weisen_Jiang1;~Baijiong_Lin1;~Han_Shi1;~Yu_Zhang3;~Zhenguo_Li1;~James_Kwok1", "aff": "Hong Kong University of Science and Technology;The Hong Kong University of Science and Technology (Guangzhou);Huawei Technologies Ltd.;Southern University of Science and Technology;Huawei Noah's Ark Lab;", "aff_domain": "ust.hk;connect.hkust-gz.edu.cn;huawei.com;sustc.edu.cn;huawei.com;", "position": "PhD student;PhD student;Principal Researcher;Associate Professor;Principal Researcher;", "bibtex": "@misc{\njiang2024effective,\ntitle={Effective and Parameter-Efficient Reusing Fine-Tuned Models},\nauthor={Weisen Jiang and Baijiong Lin and Han Shi and Yu Zhang and Zhenguo Li and James Kwok},\nyear={2024},\nurl={https://openreview.net/forum?id=13D1zn0mpd}\n}", "github": "", "project": "", "reviewers": "tpTw;a2T8;wHfz", "site": "https://openreview.net/forum?id=13D1zn0mpd", "pdf_size": 533242, "rating": "5;6;6", "confidence": "4;4;3", "soundness": "2;3;3", "contribution": "2;2;2", "presentation": "3;3;3", "wc_summary": "76;85;78", "wc_strengths": "29;26;26", "wc_weaknesses": "322;33;111", "wc_questions": "6;317;73", "wc_review": "433;461;288", "wc_reply_reviewers": "0;73;0", "wc_reply_authors": "2560;2063;1598", "reply_reviewers": "0;2;0", "reply_authors": "6;6;4", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.66666666666667, 3.858612300930075 ], "wc_strengths_avg": [ 27.0, 1.4142135623730951 ], "wc_weaknesses_avg": [ 155.33333333333334, 122.07738893377794 ], "wc_questions_avg": [ 132.0, 133.64380519375624 ], "wc_review_avg": [ 394.0, 75.81996219114507 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 34.41253001774532 ], "wc_reply_authors_avg": [ 2073.6666666666665, 392.8072753342987 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 5.333333333333333, 0.9428090415820634 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=414454635213371595&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Huawei;Southern University of Science and Technology", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.ust.hk;https://www.huawei.com;https://www.sustech.edu.cn", "aff_unique_abbr": "HKUST;Huawei;SUSTech", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Hong Kong SAR;Guangzhou;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SALMONN: Towards Generic Hearing Abilities for Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19600", "id": "14rn7HpKVk", "author_site": "Changli Tang, Wenyi Yu, Guangzhi Sun, Xianzhao Chen, Tian Tan, Wei Li, Lu Lu, Zejun MA, Chao Zhang", "tldr": "", "abstract": "Hearing is arguably an essential ability of artificial intelligence (AI) agents in the physical world, which refers to the perception and understanding of general auditory information consisting of at least three types of sounds: speech, audio events, and music. In this paper, we propose SALMONN, a speech audio language music open neural network, built by integrating a pre-trained text-based large language model (LLM) with speech and audio encoders into a single multimodal model. SALMONN enables the LLM to directly process and understand general audio inputs and achieve competitive performances on a number of speech and audio tasks used in training, such as \nautomatic speech recognition and translation, auditory-information-based question answering, emotion recognition, speaker verification, and music and audio captioning etc. SALMONN also has a diverse set of emergent abilities unseen in the training, which includes but is not limited to speech translation to untrained languages, speech-based slot filling, spoken-query-based question answering, audio-based storytelling, and speech audio co-reasoning etc. The presence of cross-modal emergent abilities is studied, and a novel few-shot activation tuning approach is proposed to activate such abilities. To our knowledge, SALMONN is the first model of its type and can be regarded as a step towards AI with generic hearing abilities. The source code, model checkpoints and data are available at https://github.com/bytedance/SALMONN.", "keywords": "Multimodal large language models;speech and audio processing;music processing", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Changli Tang;Wenyi Yu;Guangzhi Sun;Xianzhao Chen;Tian Tan;Wei Li;Lu Lu;Zejun MA;Chao Zhang", "authorids": "~Changli_Tang1;~Wenyi_Yu2;~Guangzhi_Sun1;~Xianzhao_Chen1;~Tian_Tan5;~Wei_Li78;~Lu_Lu6;~Zejun_MA1;~Chao_Zhang20", "gender": "M;M;M;;M;M;M;M;M", "homepage": ";https://github.com/Yu-Doit;http://mi.eng.cam.ac.uk/\u223cgs534/;http://chenxianzhao.bytedance.com;;;;;http://mi.eng.cam.ac.uk/~cz277/", "dblp": "331/8719;;236/4543;;;;;;94/3019-31.html", "google_scholar": "RzIjbf0AAAAJ;CGqr-V8AAAAJ;PzPAzf8AAAAJ;;ukL_E5AAAAAJ;q8ZrKVIAAAAJ;IQaR2KoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.uk/citations?view_op=list_works", "orcid": "0000-0002-2009-3078;;;;;;;;", "linkedin": ";;brian-sun-59746b12b/;;;;;zejun-ma-58614365/;", "or_profile": "~Changli_Tang1;~Wenyi_Yu2;~Guangzhi_Sun1;~Xianzhao_Chen1;~Tian_Tan5;~Wei_Li78;~Lu_Lu6;~Zejun_MA1;~Chao_Zhang20", "aff": "Tsinghua University;Tsinghua University;University of Cambridge;;;Bytedance;;ByteDance Inc.;University College London", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;cam.ac.uk;;;bytedance.com;;bytedance.com;ucl.ac.uk", "position": "Undergrad student;PhD student;Junior Research Fellow;;;Researcher;;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\ntang2024salmonn,\ntitle={{SALMONN}: Towards Generic Hearing Abilities for Large Language Models},\nauthor={Changli Tang and Wenyi Yu and Guangzhi Sun and Xianzhao Chen and Tian Tan and Wei Li and Lu Lu and Zejun MA and Chao Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=14rn7HpKVk}\n}", "github": "", "project": "", "reviewers": "bK7i;KyXG;RbXH", "pdf_size": 1377554, "rating": "6;6;8", "confidence": "4;4;3", "soundness": "2;3;3", "contribution": "3;2;3", "presentation": "2;3;2", "wc_summary": "71;89;35", "wc_strengths": "24;263;99", "wc_weaknesses": "155;169;199", "wc_questions": "122;9;11", "wc_review": "372;530;344", "wc_reply_reviewers": "37;122;19", "wc_reply_authors": "897;1357;618", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 65.0, 22.44994432064365 ], "wc_strengths_avg": [ 128.66666666666666, 99.8009129328095 ], "wc_weaknesses_avg": [ 174.33333333333334, 18.354533197248273 ], "wc_questions_avg": [ 47.333333333333336, 52.803619404565644 ], "wc_review_avg": [ 415.3333333333333, 81.88338591490272 ], "wc_reply_reviewers_avg": [ 59.333333333333336, 44.917207788948275 ], "wc_reply_authors_avg": [ 957.3333333333334, 304.69693503910116 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 286, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10631342040411306525&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=14rn7HpKVk", "pdf": "https://openreview.net/pdf?id=14rn7HpKVk", "email": "tsinghua.edu.cn;tsinghua.edu.cn;cam.ac.uk;;;bytedance.com;;bytedance.com;ucl.ac.uk", "author_num": 9, "aff_unique_index": "0;0;1;2;2;3", "aff_unique_norm": "Tsinghua University;University of Cambridge;ByteDance;University College London", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cam.ac.uk;https://www.bytedance.com;https://www.ucl.ac.uk", "aff_unique_abbr": "THU;Cambridge;Bytedance;UCL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;1;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "16IIkb5wYe", "title": "Fast Unsupervised Deep Outlier Model Selection with Hypernetworks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Outlier detection (OD) has a large literature as it finds many applications in the real world. Deep neural network based OD (DOD) has seen a recent surge of attention thanks to the many advances in deep learning. In this paper, we consider a critical-yet-understudied challenge with unsupervised DOD, that is, effective hyperparameter (HP) tuning or model selection. While prior work report the sensitivity of OD models to HP choices, it is ever so critical for the modern DOD models that exhibit a long list of HPs. We introduce Hyper for HP-tuning DOD models, tackling two key challenges: (1) validation without supervision (due to lack of labeled outliers), and (2) efficient search of the HP/model space (due to exponential growth in the number of HPs). A key idea is to design and train a novel hypernetwork (HN) that maps HPs onto optimal weights of the main DOD model. In turn, Hyper capitalizes on a single HN that can dynamically generate weights for many DOD models (corresponding to varying HPs), which offers significant speed-up. In addition, it employs meta-learning on historical OD tasks with labels to train a performance estimator function, \nlikewise trained with our proposed HN efficiently. \nExtensive experiments on a testbed of 35 benchmark datasets show that \nHyper achieves 7\\% performance improvement and 4.2$\\times$ speed up over the latest baseline, establishing the new state-of-the-art.", "keywords": "outlier detection;model selection;automated ML;hypernetworks", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/fbbca5cc9293990ab981bf01472208de93761f3d.pdf", "author": "Xueying Ding;Yue Zhao;Leman Akoglu", "authorids": "~Xueying_Ding1;~Yue_Zhao13;~Leman_Akoglu3", "gender": "F;M;F", "homepage": ";https://viterbi-web.usc.edu/~yzhao010/;http://www.andrew.cmu.edu/user/lakoglu/", "dblp": ";48/76-16;02/6979.html", "google_scholar": "U9CMsh0AAAAJ;https://scholar.google.ca/citations?user=zoGDYsoAAAAJ;4ITkr_kAAAAJ", "orcid": ";0000-0003-3401-4921;", "linkedin": ";yzhao062/;", "or_profile": "~Xueying_Ding1;~Yue_Zhao13;~Leman_Akoglu3", "aff": "Carnegie Mellon University;University of Southern California;Carnegie Mellon University", "aff_domain": "cmu.edu;usc.edu;cmu.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nding2024fast,\ntitle={Fast Unsupervised Deep Outlier Model Selection with Hypernetworks},\nauthor={Xueying Ding and Yue Zhao and Leman Akoglu},\nyear={2024},\nurl={https://openreview.net/forum?id=16IIkb5wYe}\n}", "github": "", "project": "", "reviewers": "LDgS;izNE;Th7t;7fKL;2NEK", "site": "https://openreview.net/forum?id=16IIkb5wYe", "pdf_size": 4765059, "rating": "3;3;5;5;5", "confidence": "3;4;3;3;3", "soundness": "2;2;2;2;2", "contribution": "2;2;2;3;2", "presentation": "1;1;2;2;3", "wc_summary": "68;134;148;30;156", "wc_strengths": "16;33;68;13;125", "wc_weaknesses": "145;279;96;132;159", "wc_questions": "5;56;107;41;90", "wc_review": "234;502;419;216;530", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.2, 0.9797958971132712 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 1.8, 0.7483314773547883 ], "wc_summary_avg": [ 107.2, 49.51928917098871 ], "wc_strengths_avg": [ 51.0, 41.85212061532844 ], "wc_weaknesses_avg": [ 162.2, 62.036763294033975 ], "wc_questions_avg": [ 59.8, 36.085454133209964 ], "wc_review_avg": [ 380.2, 131.99757573531417 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6123724356957947, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12491906036932999309&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.usc.edu", "aff_unique_abbr": "CMU;USC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "16VfAw1fs5", "title": "A Conservative Image Boundary Extraction Method with Application to the ILM Tumor Surgery", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "While infant lymphatic malformation tumors are benign, they are very difficult to remove. The removal process is very delicate and requires the retention of as much healthy tissue as possible. Commonly utilized boundary extraction methods aim to extract boundaries covering the vast majority of the target area which remove more healthy tissue than is desirable. This paper presents a conservative image boundary extraction (CIBE) approach with well-designed iterative boundary shrinkage procedures which are applied to computerized tomography (CT) images for use in ILM tumor resection operations. CIBE incorporates three primary concepts: Fuzzy Degree, Pixel Deepness and Boundary Smoothness. The proposed algorithm first converts the marked CT image into a 0-1 image matrix. Then it shrinks the boundary according to the estimated PD and BS indices for the image in an iterative fashion until the boundary smoothness meets the desired level. Empirical analysis demonstrates that the smooth, conservative tumor boundaries are obtained using the CIBE algorithm. The proposed method can also be easily extended to the three dimensional studies.", "keywords": "Conservative Boundary Extraction;Infant Lymphatic Malformations Tumor;Unsupervised Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Zhenguo Gao;Longqin Lai", "authorids": "~Zhenguo_Gao1;~Longqin_Lai1", "gender": "M;M", "homepage": "https://math.sjtu.edu.cn/Default/teachershow/tags/MDAwMDAwMDAwMLJ4nJg;", "dblp": ";", "google_scholar": ";", "orcid": ";0009-0007-5309-429X", "linkedin": ";", "or_profile": "~Zhenguo_Gao1;~Longqin_Lai1", "aff": "Shanghai Jiaotong University;School of Mathematical Sciences, Shanghai Jiao Tong University", "aff_domain": "sjtu.edu.cn;math.sjtu.edu.cn", "position": "Associate Professor;PhD student", "bibtex": "@misc{\ngao2024a,\ntitle={A Conservative Image Boundary Extraction Method with Application to the {ILM} Tumor Surgery},\nauthor={Zhenguo Gao and Longqin Lai},\nyear={2024},\nurl={https://openreview.net/forum?id=16VfAw1fs5}\n}", "github": "", "project": "", "reviewers": "v2GP;bXZY;4CWF", "site": "https://openreview.net/forum?id=16VfAw1fs5", "pdf_size": 6971717, "rating": "1;3;3", "confidence": "4;4;4", "soundness": "1;1;2", "contribution": "1;1;1", "presentation": "2;2;2", "wc_summary": "56;81;54", "wc_strengths": "24;24;9", "wc_weaknesses": "101;145;41", "wc_questions": "60;1;40", "wc_review": "241;251;144", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 1.3333333333333333, 0.4714045207910317 ], "contribution_avg": [ 1.0, 0.0 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 63.666666666666664, 12.283683848458853 ], "wc_strengths_avg": [ 19.0, 7.0710678118654755 ], "wc_weaknesses_avg": [ 95.66666666666667, 42.62497963505541 ], "wc_questions_avg": [ 33.666666666666664, 24.499433100017278 ], "wc_review_avg": [ 212.0, 48.25626038833372 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:486a-LKhKJkJ:scholar.google.com/&scioq=A+Conservative+Image+Boundary+Extraction+Method+with+Application+to+the+ILM+Tumor+Surgery&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "17BA0Tl2Id", "title": "Meta-Referential Games to Learn Compositional Learning Behaviours", "track": "main", "status": "Reject", "tldr": "", "abstract": "Human beings use compositionality to generalise from past experiences to novel experiences. We assume a separation of our experiences into fundamental atomic components that can be recombined in novel ways to support our ability to engage with novel experiences. We frame this as the ability to learn to generalise compositionally, and we will refer to behaviours making use of this ability as compositional learning behaviours (CLBs).\n\nA central problem to learning CLBs is the resolution of a binding problem (BP). While it is another feat of intelligence that human beings perform with ease, it is not the case for state-of-the-art artificial agents. Thus, in order to build artificial agents able to collaborate with human beings, we propose to develop a novel benchmark to investigate agents\u2019 abilities to exhibit CLBs by solving a domain-agnostic version of the BP.\n\nWe take inspiration from the language emergence and grounding framework of referential games and propose a meta-learning extension of referential games, entitled Meta-Referential Games, and use this framework to build our benchmark, that we name Symbolic Behaviour Benchmark (S2B). We provide baseline results and error analysis showing that our benchmark is a compelling challenge that we hope will spur the research community towards developing more capable artificial agents.", "keywords": "referential game;language grounding;compositionality;systematicity;few-shot learning;meta-learning;reinforcement learning;language emergence", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/0131a7b6feeecb88078bab93dc5c0fce351c17a4.zip", "author": "Kevin Yandoka Denamganai;Sondess Missaoui;James Alfred Walker", "authorids": "~Kevin_Yandoka_Denamganai1;~Sondess_Missaoui1;~James_Alfred_Walker1", "gender": "M;F;M", "homepage": "https://kevindenamganai.netlify.app/;https://digitalcreativity.ac.uk/people/dr-sondess-missaoui;", "dblp": "249/7680;143/1419.html;35/3889", "google_scholar": "PPdQb4QAAAAJ;K2yUNQIAAAAJ;https://scholar.google.co.uk/citations?user=Yl5OycsAAAAJ", "orcid": "0000-0002-8776-4331;;", "linkedin": ";sondess-missaoui-03583531/;", "or_profile": "~Kevin_Yandoka_Denamganai1;~Sondess_Missaoui1;~James_Alfred_Walker1", "aff": "University of York;University of York;University of York", "aff_domain": "york.ac.uk;york.ac.uk;york.ac.uk", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@misc{\ndenamganai2024metareferential,\ntitle={Meta-Referential Games to Learn Compositional Learning Behaviours},\nauthor={Kevin Yandoka Denamganai and Sondess Missaoui and James Alfred Walker},\nyear={2024},\nurl={https://openreview.net/forum?id=17BA0Tl2Id}\n}", "github": "", "project": "", "reviewers": "qdfE;3drE;CPjL;Hoy7", "site": "https://openreview.net/forum?id=17BA0Tl2Id", "pdf_size": 1553270, "rating": "5;5;6;6", "confidence": "2;2;3;3", "soundness": "3;2;3;2", "contribution": "2;3;3;3", "presentation": "3;2;2;2", "wc_summary": "62;23;80;94", "wc_strengths": "45;36;64;80", "wc_weaknesses": "180;189;729;265", "wc_questions": "40;4;648;516", "wc_review": "327;252;1521;955", "wc_reply_reviewers": "262;96;62;222", "wc_reply_authors": "1906;1740;2370;1823", "reply_reviewers": "1;1;1;1", "reply_authors": "5;4;4;5", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.75, 26.639960585556427 ], "wc_strengths_avg": [ 56.25, 17.03489066592445 ], "wc_weaknesses_avg": [ 340.75, 226.57490483281683 ], "wc_questions_avg": [ 302.0, 284.147848839297 ], "wc_review_avg": [ 763.75, 515.422823999869 ], "wc_reply_reviewers_avg": [ 160.5, 83.58678125158308 ], "wc_reply_authors_avg": [ 1959.75, 244.02087513161655 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.5, 0.5 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3952989524077152401&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of York", "aff_unique_dep": "", "aff_unique_url": "https://www.york.ac.uk", "aff_unique_abbr": "York", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "17ZbByq95E", "title": "Memory-Efficient Backpropagation through Large Linear Layers", "track": "main", "status": "Reject", "tldr": "", "abstract": "In modern neural networks like Transformers, linear layers require significant memory to store activations during backward pass. This study proposes a memory reduction approach to perform backpropagation through linear layers. Since the gradients of linear layers are computed by matrix multiplications, we consider methods for randomized matrix multiplications and demonstrate that they require less memory with a moderate decrease of the test accuracy. Also, we investigate the variance of the gradient estimate induced by the randomized matrix multiplication. We compare this variance with the variance coming from gradient estimation based on the batch of samples. We demonstrate the benefits of the proposed method on the fine-tuning of the pretrained RoBERTa model on GLUE tasks.", "keywords": "transformers;large language models;randomized matmul;approximate matmul;memory-efficient training", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/9dbbb74c329dd95f11ba34177f2216105b5b7795.pdf", "author": "Daniel Bershatsky;Aleksandr Mikhalev;Aleksandr Katrutsa;Julia Gusak;Daniil Merkulov;Ivan Oseledets", "authorids": "~Daniel_Bershatsky1;~Aleksandr_Mikhalev1;~Aleksandr_Katrutsa1;~Julia_Gusak1;~Daniil_Merkulov1;~Ivan_Oseledets1", "gender": "M;;;F;;M", "homepage": "https://github.com/daskol;https://faculty.skoltech.ru/people/almikhalev;;https://juliagusak.github.io/about/;;http://oseledets.github.io", "dblp": ";;;179/6722;;56/7175", "google_scholar": "XthC2z8AAAAJ;https://scholar.google.com/citations?hl=ru;;QriHoq4AAAAJ;;https://scholar.google.ru/citations?user=5kMqBQEAAAAJ", "orcid": "0000-0001-8917-8187;0000-0002-9274-7237;;;;", "linkedin": ";;;julia-gusak-0b265688/;;", "or_profile": "~Daniel_Bershatsky1;~Aleksandr_Mikhalev1;~Aleksandr_Katrutsa1;~Julia_Gusak1;~Daniil_Merkulov1;~Ivan_Oseledets1", "aff": "Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;;INRIA;;Institute of Numerical Mathematics", "aff_domain": "skoltech.ru;skoltech.ru;;inria.fr;;inm.ras.ru", "position": "PhD student;Assistant Professor;;Researcher;;Researcher", "bibtex": "@misc{\nbershatsky2024memoryefficient,\ntitle={Memory-Efficient Backpropagation through Large Linear Layers},\nauthor={Daniel Bershatsky and Aleksandr Mikhalev and Aleksandr Katrutsa and Julia Gusak and Daniil Merkulov and Ivan Oseledets},\nyear={2024},\nurl={https://openreview.net/forum?id=17ZbByq95E}\n}", "github": "", "project": "", "reviewers": "JVog;k3XT;aWYY;fccg", "site": "https://openreview.net/forum?id=17ZbByq95E", "pdf_size": 474137, "rating": "3;3;3;6", "confidence": "5;3;4;4", "soundness": "3;2;2;3", "contribution": "2;1;2;2", "presentation": "2;2;3;3", "wc_summary": "24;20;144;114", "wc_strengths": "30;42;34;40", "wc_weaknesses": "163;279;272;129", "wc_questions": "253;201;33;97", "wc_review": "470;542;483;380", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "333;283;187;215", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 75.5, 54.559600438419636 ], "wc_strengths_avg": [ 36.5, 4.769696007084728 ], "wc_weaknesses_avg": [ 210.75, 65.902864125924 ], "wc_questions_avg": [ 146.0, 86.08716512930368 ], "wc_review_avg": [ 468.75, 57.98006122797733 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 254.5, 57.207953992430106 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4859682723757387504&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Skolkovo Institute of Science and Technology;INRIA;Institute of Numerical Mathematics", "aff_unique_dep": ";;", "aff_unique_url": "https://www.skoltech.ru;https://www.inria.fr;", "aff_unique_abbr": "Skoltech;INRIA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Russian Federation;France;" }, { "title": "Tensor Programs VI: Feature Learning in Infinite Depth Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19599", "id": "17pVDnpwwl", "author_site": "Greg Yang, Dingli Yu, Chen Zhu, Soufiane Hayou", "tldr": "", "abstract": "Empirical studies have consistently demonstrated that increasing the size of neural networks often yields superior performance in practical applications. However, there is a lack of consensus regarding the appropriate scaling strategy, particularly when it comes to increasing the depth of neural networks. In practice, excessively large depths can lead to model performance degradation. In this paper, we introduce Depth-$\\mu$P, a principled approach for depth scaling, allowing for the training of arbitrarily deep architectures while maximizing feature learning and diversity among nearby layers. Our method involves dividing the contribution of each residual block and the parameter update by the square root of the depth. Through the use of Tensor Programs, we rigorously establish the existence of a limit for infinitely deep neural networks under the proposed scaling scheme. This scaling strategy ensures more stable training for deep neural networks and guarantees the transferability of hyperparameters from shallow to deep models. To substantiate the efficacy of our scaling method, we conduct empirical validation on neural networks with depths up to $2^{10}$.", "keywords": "Tensor Programs;mup;deep learning;optimization;optimal hyperparameter transfer", "primary_area": "optimization", "supplementary_material": "", "author": "Greg Yang;Dingli Yu;Chen Zhu;Soufiane Hayou", "authorids": "~Greg_Yang1;~Dingli_Yu1;~Chen_Zhu2;~Soufiane_Hayou1", "gender": "M;;M;M", "homepage": ";https://dingliyu.net/;http://www.cs.umd.edu/~chenzhu/;https://www.soufianehayou.com/", "dblp": "153/2097;39/578;59/10522-1.html;220/5617", "google_scholar": "Xz4RAJkAAAAJ;KJLJstYAAAAJ;m-om5O8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-8824-8611;;", "linkedin": ";;;", "or_profile": "~Greg_Yang1;~Dingli_Yu1;~Chen_Zhu2;~Soufiane_Hayou1", "aff": ";Princeton University;NVIDIA;National University of Singapore", "aff_domain": ";princeton.edu;nvidia.com;nus.edu.sg", "position": ";PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nyang2024tensor,\ntitle={Tensor Programs {VI}: Feature Learning in Infinite Depth Neural Networks},\nauthor={Greg Yang and Dingli Yu and Chen Zhu and Soufiane Hayou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=17pVDnpwwl}\n}", "github": "", "project": "", "reviewers": "f9Ax;Mvpr;5jxB;TGji;AGJB", "pdf_size": 2831781, "rating": "5;6;8;8;8", "confidence": "3;4;4;4;4", "soundness": "2;4;2;4;3", "contribution": "2;3;4;3;4", "presentation": "3;3;3;4;3", "wc_summary": "80;113;164;37;103", "wc_strengths": "80;72;194;30;65", "wc_weaknesses": "150;227;440;96;2", "wc_questions": "74;669;188;2;397", "wc_review": "384;1081;986;165;567", "wc_reply_reviewers": "0;300;298;0;0", "wc_reply_authors": "564;1106;1065;327;747", "reply_reviewers": "0;1;2;0;0", "reply_authors": "2;4;2;1;1", "rating_avg": [ 7.0, 1.2649110640673518 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "contribution_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 99.4, 41.572106032771536 ], "wc_strengths_avg": [ 88.2, 55.58560964854123 ], "wc_weaknesses_avg": [ 183.0, 147.92160085667 ], "wc_questions_avg": [ 266.0, 241.7494570831546 ], "wc_review_avg": [ 636.6, 349.4650769390269 ], "wc_reply_reviewers_avg": [ 119.6, 146.48085199096843 ], "wc_reply_authors_avg": [ 761.8, 296.242738307625 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7905694150420949, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6365974675006259933&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=17pVDnpwwl", "pdf": "https://openreview.net/pdf?id=17pVDnpwwl", "email": ";princeton.edu;nvidia.com;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Princeton University;NVIDIA;National University of Singapore", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.princeton.edu;https://www.nvidia.com;https://www.nus.edu.sg", "aff_unique_abbr": "Princeton;NVIDIA;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Singapore" }, { "id": "18TezdB9nk", "title": "Deep Neural Room Acoustics Primitive", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Modeling room acoustics encompasses characterizing the sound propagation dynamics in enclosed 3D spaces and is useful in a variety of settings, including audio-visual simulations, embodied sound source localization, etc. Such dynamics are usually represented using one-dimensional room impulse responses (RIR). However, accurately estimating an RIR is often challenging as sound waves undergo reflections, diffraction, absorption, and scattering along the propagation path. In this paper, we propose a deep learning framework to learn a continuous room acoustic field, dubbed Deep Neural Room Acoustic Primitive (DeepNeRAP), capturing all sound propagation properties in a self-supervised manner; our framework allows the characterization of sound propagation from any source position to any receiver position. Our key idea is to allow two cooperative audio agents to actively probe the 3D space, one emitting and the other receiving sounds at varied positions -- analyzing these emitted and received sounds within our neural framework enables inversely characterizing the room scene acoustically. Our learning formulation is grounded in the physical principles of sound wave propagation, including the properties of globality, reciprocity, superposition, and independence. We present experiments on both synthetic and real-world datasets, demonstrating superior quality of our RIR estimation against closely related methods.", "keywords": "audio rendering;neural room impulse response;spatial audio;neural audio rendering", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/b8ab1fd7eedba45527e94b894d00d8721e207342.pdf", "author": "Yuhang He;Anoop Cherian;Gordon Wichern;Andrew Markham", "authorids": "~Yuhang_He3;~Anoop_Cherian1;~Gordon_Wichern1;~Andrew_Markham2", "gender": "M;;M;M", "homepage": "https://yuhanghe01.github.io/;;;http://users.cecs.anu.edu.au/~cherian/", "dblp": ";72/6049;83/7169;44/7734", "google_scholar": "H1p3ve8AAAAJ;;https://scholar.google.co.uk/citations?user=g3JTO9EAAAAJ;https://scholar.google.com.au/citations?hl=en", "orcid": ";;;0000-0002-5566-0351", "linkedin": ";;;anoop-cherian-4678a04/", "or_profile": "~Yuhang_He3;~Gordon_Wichern1;~Andrew_Markham2;~Anoop_Cherian2", "aff": "University of Oxford;Mitsubishi Electric Research Labs;University of Oxford;Mitsubishi Electric Research Labs", "aff_domain": "ox.ac.uk;merl.com;ox.ac.uk;merl.com", "position": "PhD student;Principal Research Scientist;Associate Professor;Principal Researcher", "bibtex": "@misc{\nhe2024deep,\ntitle={Deep Neural Room Acoustics Primitive},\nauthor={Yuhang He and Anoop Cherian and Gordon Wichern and Andrew Markham},\nyear={2024},\nurl={https://openreview.net/forum?id=18TezdB9nk}\n}", "github": "", "project": "", "reviewers": "CGe5;MG9M;EjEe;HBj4", "site": "https://openreview.net/forum?id=18TezdB9nk", "pdf_size": 13375, "rating": "3;3;5;6", "confidence": "5;3;3;5", "soundness": "2;1;4;3", "contribution": "3;1;2;2", "presentation": "4;1;4;3", "wc_summary": "69;28;70;132", "wc_strengths": "100;21;117;69", "wc_weaknesses": "390;216;305;116", "wc_questions": "47;147;33;11", "wc_review": "606;412;525;328", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 74.75, 37.14414489525907 ], "wc_strengths_avg": [ 76.75, 36.49914382557487 ], "wc_weaknesses_avg": [ 256.75, 101.9249110865445 ], "wc_questions_avg": [ 59.5, 52.12245197609184 ], "wc_review_avg": [ 467.75, 106.09989396790178 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10633725098285414080&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Oxford;Mitsubishi Electric Research Laboratories", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.merl.com", "aff_unique_abbr": "Oxford;MERL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "18TfucMNTr", "title": "Accelerated Deep Learning by Gaussian Continuation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Prior work has shown that incorporating noise into the process of training deep neural networks reduces the risks of getting stuck in local minima, overfitting to the training data, and being limited by poor initialization. In this work we consider noisy training as a special case of optimization by continuation, also known as graduated non-convexity, where a convex version of the objective function is solved first and slowly morphed into the original non-convex function. When using continuation in machine learning problems, we show that saddle points require special consideration, as they may get the optimizer stuck in local minima. With a form of regularization applied to the continuation optimizer, we show on several test problems that this approach reduces the risk of being trapped in local minima, leading to better training for very deep architectures and non-convex loss functions.", "keywords": "homotopy;continuation;optimization;deep learning", "primary_area": "optimization", "supplementary_material": "/attachment/c01b7419ade3037ce34017820a01661f89c07dab.zip", "author": "Andrew Francesco Ilersich;Prasanth B. Nair", "authorids": "~Andrew_Francesco_Ilersich1;~Prasanth_B._Nair1", "gender": ";M", "homepage": ";http://arrow.utias.utoronto.ca/~pbn/index.html", "dblp": ";n/PrasanthBNair", "google_scholar": "C3T92DQAAAAJ;6MXRhVIAAAAJ", "orcid": "0009-0004-5369-5464;", "linkedin": "andrewilersich/;", "or_profile": "~Andrew_Francesco_Ilersich1;~Prasanth_B._Nair1", "aff": "University of Toronto;Toronto University", "aff_domain": "utoronto.ca;utoronto.ca", "position": "PhD student;Full Professor", "bibtex": "@misc{\nilersich2024accelerated,\ntitle={Accelerated Deep Learning by Gaussian Continuation},\nauthor={Andrew Francesco Ilersich and Prasanth B. Nair},\nyear={2024},\nurl={https://openreview.net/forum?id=18TfucMNTr}\n}", "github": "", "project": "", "reviewers": "MQoK;xWFa;jwZM;EAjK", "site": "https://openreview.net/forum?id=18TfucMNTr", "pdf_size": 3873401, "rating": "3;3;3;5", "confidence": "3;4;4;3", "soundness": "1;3;3;2", "contribution": "2;1;1;3", "presentation": "3;1;3;2", "wc_summary": "218;25;110;26", "wc_strengths": "22;30;34;68", "wc_weaknesses": "145;1326;331;875", "wc_questions": "165;4;35;12", "wc_review": "550;1385;510;981", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 94.75, 79.08025986300247 ], "wc_strengths_avg": [ 38.5, 17.57128339080558 ], "wc_weaknesses_avg": [ 669.25, 464.46333278311647 ], "wc_questions_avg": [ 54.0, 65.08840142452416 ], "wc_review_avg": [ 856.5, 356.6570481569094 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wkLL2HyBaz8J:scholar.google.com/&scioq=Accelerated+Deep+Learning+by+Gaussian+Continuation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "1AXvGjfF0V", "title": "Evaluating Hallucinations in Chinese Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we establish a benchmark named HalluQA (Chinese Hallucination Question-Answering) to measure the hallucination phenomenon in Chinese large language models. \nHalluQA contains 450 meticulously designed adversarial questions, spanning multiple domains, and takes into account Chinese historical culture, customs, and social phenomena. \nDuring the construction of HalluQA, we consider two types of hallucinations: imitative falsehoods and factual errors, and we construct adversarial samples based on GLM-130B and ChatGPT.\nFor evaluation, we design an automated evaluation method using GPT-4 to judge whether a model output is hallucinated.\nWe conduct extensive experiments on 24 large language models, including ERNIE-Bot, Baichuan2, ChatGLM, Qwen, SparkDesk and etc. \nOut of the 24 models, 18 achieved non-hallucination rates lower than 50\\%. \nThis indicates that HalluQA is highly challenging.\nWe analyze the primary types of hallucinations in different types of models and their causes. Additionally, we discuss which types of hallucinations should be prioritized for different types of models.", "keywords": "Hallucinations evaluation;Chinese;Large Language Models;Dataset & Benchmark", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Qinyuan Cheng;Tianxiang Sun;Wenwei Zhang;Siyin Wang;Xiangyang Liu;Mozhi Zhang;Junliang He;Mianqiu Huang;Zhangyue Yin;Kai Chen;Xipeng Qiu", "authorids": "~Qinyuan_Cheng1;~Tianxiang_Sun1;~Wenwei_Zhang1;~Siyin_Wang1;~Xiangyang_Liu3;~Mozhi_Zhang2;~Junliang_He2;~Mianqiu_Huang1;~Zhangyue_Yin1;~Kai_Chen4;~Xipeng_Qiu1", "gender": "M;M;M;M;M;M;M;M;M;M;M", "homepage": "https://xiami2019.github.io/;https://txsun1997.github.io/;https://zhangwenwei.cn;https://sinwang20.github.io/;;https://github.com/Zhang-Mozhi;https://luther-sparks.github.io/;https://yinzhangyue.github.io/;https://chenkai.site/;https://xpqiu.github.io/;https://jlhe.github.io", "dblp": "331/9838;254/1189;;326/8437;;;;314/5418;181/2839-26;69/1395;", "google_scholar": "nu_iPXAAAAAJ;puHFkM0AAAAJ;QDXADSEAAAAJ;wllp6o0AAAAJ;https://scholar.google.com.hk/citations?user=U8QD9mwAAAAJ;TPR5JREAAAAJ;;9gRQqSkAAAAJ;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ;Pq4Yp_kAAAAJ;KhHpD54AAAAJ", "orcid": ";;0000-0002-2748-4514;;;;;;0000-0002-6820-2325;0000-0001-7163-5247;", "linkedin": "https://www.linkedin.cn/injobs/in/qinyuan-cheng-5168951ab;;wenweizhang-b9769a124/;;;;;zhangyue-yin-083286288/;;;", "or_profile": "~Qinyuan_Cheng1;~Tianxiang_Sun1;~Wenwei_Zhang1;~Siyin_Wang1;~Xiangyang_Liu3;~Mozhi_Zhang2;~Mianqiu_Huang1;~Zhangyue_Yin1;~Kai_Chen4;~Xipeng_Qiu1;~JunLiang_He1", "aff": "Fudan University;Fudan University;Shanghai AI Laboratory;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Shanghai AI Laboratory;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu;pjlab.org.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;pjlab.org.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;Researcher;Undergrad student;PhD student;MS student;MS student;PhD student;Researcher;Full Professor;MS student", "bibtex": "@misc{\ncheng2024evaluating,\ntitle={Evaluating Hallucinations in Chinese Large Language Models},\nauthor={Qinyuan Cheng and Tianxiang Sun and Wenwei Zhang and Siyin Wang and Xiangyang Liu and Mozhi Zhang and Junliang He and Mianqiu Huang and Zhangyue Yin and Kai Chen and Xipeng Qiu},\nyear={2024},\nurl={https://openreview.net/forum?id=1AXvGjfF0V}\n}", "github": "", "project": "", "reviewers": "svpT;DCpV;fQ9L", "site": "https://openreview.net/forum?id=1AXvGjfF0V", "pdf_size": 1157443, "rating": "5;5;6", "confidence": "4;3;3", "soundness": "2;2;3", "contribution": "3;2;3", "presentation": "3;3;3", "wc_summary": "72;76;81", "wc_strengths": "68;12;45", "wc_weaknesses": "168;50;17", "wc_questions": "13;61;29", "wc_review": "321;199;172", "wc_reply_reviewers": "65;0;0", "wc_reply_authors": "1038;858;421", "reply_reviewers": "1;0;0", "reply_authors": "3;3;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.33333333333333, 3.681787005729087 ], "wc_strengths_avg": [ 41.666666666666664, 22.983085567917602 ], "wc_weaknesses_avg": [ 78.33333333333333, 64.81940724470994 ], "wc_questions_avg": [ 34.333333333333336, 19.955506062794353 ], "wc_review_avg": [ 230.66666666666666, 64.81940724470994 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 30.641293851417057 ], "wc_reply_authors_avg": [ 772.3333333333334, 259.0705609588931 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10748152223918110433&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0;0;0;0;1;0;0", "aff_unique_norm": "Fudan University;Shanghai AI Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "Fudan;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1Akd36hG9z", "title": "Enhancing Offline Reinforcement Learning with an Optimal Supported Dataset", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline Reinforcement Learning (Offline RL) is challenged by distributional shift and value overestimation, which often leads to poor performance. To address this issue, a popular class of methods use behavior regularization to constrain the learned policy to stay close to the behavior policy. However, this approach can be too limiting when the behavior policy is suboptimal. To overcome this limitation, we propose to conduct behavior regularization directly on an optimal supported dataset, which can both ensure that the learned policy is not too far removed from the dataset, and reduce any potential bias towards the optimization objective. We introduce \\textit{\\textbf{O}ptimal \\textbf{S}upported \\textbf{D}ataset generation via Stationary \\textbf{DI}stribution \\textbf{C}orrection \\textbf{E}stimation} (OSD-DICE) to generate such a dataset. OSD-DICE is based on the primal-dual formulation of linear programming for RL. It uses a single minimization objective to avoid poor convergence issues often associated with this formulation, and incorporates two key designs to ensure polynomial sample complexity under general function approximation and single-policy concentrability. After generating the near-optimal supported dataset, we instantiate our framework by two representative behavior regularization-based methods and show safe policy improvement over the near-optimal supported policy. Empirical results validate the efficacy of OSD-DICE on tabular tasks and demonstrate remarkable performance gains of the proposed framework on D4RL benchmarks.", "keywords": "Offline reinforcement learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/db826e1f0ef94b970fc5330553b55cdd2b084aec.zip", "author": "Chen Chen;Zhaoyi Xu;Yixiu Mao;Hongchang Zhang;Xiangyang Ji", "authorids": "~Chen_Chen3;~Zhaoyi_Xu1;~Yixiu_Mao2;~Hongchang_Zhang1;~Xiangyang_Ji1", "gender": "F;M;M;M;", "homepage": ";https://github.com/XuZhaoyi;;;", "dblp": ";;280/1045;https://dblp.uni-trier.de/pid/36/9348;", "google_scholar": "l8_g4oAAAAAJ;;;;", "orcid": ";;0009-0000-7302-5039;;", "linkedin": ";;;;", "or_profile": "~Chen_Chen3;~Zhaoyi_Xu1;~Yixiu_Mao2;~Hongchang_Zhang1;~Xiangyang_Ji1", "aff": "Qiyuan Lab;Tsinghua University;Tsinghua University;Tsinghua University;", "aff_domain": "qiyuanlab.com;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;", "position": "Researcher;PhD student;PhD student;PhD student;", "bibtex": "@misc{\nchen2024enhancing,\ntitle={Enhancing Offline Reinforcement Learning with an Optimal Supported Dataset},\nauthor={Chen Chen and Zhaoyi Xu and Yixiu Mao and Hongchang Zhang and Xiangyang Ji},\nyear={2024},\nurl={https://openreview.net/forum?id=1Akd36hG9z}\n}", "github": "", "project": "", "reviewers": "ZUfb;MfcJ;NpVF;v4Ur;yzkc;tQGo", "site": "https://openreview.net/forum?id=1Akd36hG9z", "pdf_size": 7062385, "rating": "3;5;5;5;6;8", "confidence": "2;4;3;3;4;2", "soundness": "2;2;3;2;3;3", "contribution": "2;3;2;2;2;3", "presentation": "2;1;3;3;2;2", "wc_summary": "32;170;80;102;76;146", "wc_strengths": "25;44;27;20;36;68", "wc_weaknesses": "181;172;83;345;113;35", "wc_questions": "2;64;105;65;119;1", "wc_review": "240;450;295;532;344;250", "wc_reply_reviewers": "0;21;0;36;0;0", "wc_reply_authors": "635;366;899;695;706;78", "reply_reviewers": "0;1;0;1;0;0", "reply_authors": "1;1;2;1;1;1", "rating_avg": [ 5.333333333333333, 1.4907119849998596 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 101.0, 45.85120863546929 ], "wc_strengths_avg": [ 36.666666666666664, 16.038148964959213 ], "wc_weaknesses_avg": [ 154.83333333333334, 98.66849660465199 ], "wc_questions_avg": [ 59.333333333333336, 45.441048501205266 ], "wc_review_avg": [ 351.8333333333333, 106.66523436538367 ], "wc_reply_reviewers_avg": [ 9.5, 14.115594213493104 ], "wc_reply_authors_avg": [ 563.1666666666666, 267.70220810113284 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jC-jyYAVqRIJ:scholar.google.com/&scioq=Enhancing+Offline+Reinforcement+Learning+with+an+Optimal+Supported+Dataset&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Qiyuan Lab;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.tsinghua.edu.cn", "aff_unique_abbr": ";THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";China" }, { "id": "1B4juHRAgt", "title": "Amortized Bayesian Inference with Hybrid Expert-in-the-Loop and Learnable Summary Statistics", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Amortized Bayesian inference (ABI), a subset of simulation-based inference (SBI) fueled by neural networks, has rapidly grown in popularity across diverse scientific fields. Summary statistics are an essential dimensionality reduction component of ABI workflows and most methods to-date rely either on hand-crafted (i.e., based on domain expertise) or end-to-end learned summary statistics. In this work, we explore three hybrid methods to harness the complementary strengths of both sources. The first method directly conditions a neural approximator on both summary types, thereby extending traditional end-to-end approaches in a straightforward way. The second method embeds both expert and learned summaries into a joint representation space which is explicitly optimized to encode decorrelated features. The third method employs an auxiliary generative model to learn a latent summary representation that is statistically independent from the expert summaries. We explore various aspects of our hybrid methodology across different experiments and model instances, including perfect domain expertise and imperfect artificial experts represented by pre-trained neural networks. Our empirical results suggest that hybrid representations can improve parameter estimation and model comparison in settings of scientific interest, warranting the viability of an \"expert-in-the-loop\" approach. The performance gains are especially promising in scenarios with low to medium simulation budgets.", "keywords": "Bayesian inference;summary statistics;generative models;amortized inference;expert-in-the-loop", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/bd6de384e65fc9013629b29000874c9f76f07908.zip", "author": "Stefan T. Radev;Lukas Schumacher", "authorids": "~Stefan_T._Radev1;lukas.schumacher@psychologie.uni-heidelberg.de", "gender": "M;", "homepage": "https://faculty.rpi.edu/stefan-radev;", "dblp": ";", "google_scholar": "JbDfkRkAAAAJ;", "orcid": "0000-0002-6702-9559;", "linkedin": "stefan-radev-21b713187/;", "or_profile": "~Stefan_T._Radev1;lukas.schumacher@psychologie.uni-heidelberg.de", "aff": "Rensselaer Polytechnic Institute;", "aff_domain": "epi.edu;", "position": "Assistant Professor;", "bibtex": "@misc{\nradev2024amortized,\ntitle={Amortized Bayesian Inference with Hybrid Expert-in-the-Loop and Learnable Summary Statistics},\nauthor={Stefan T. Radev and Lukas Schumacher},\nyear={2024},\nurl={https://openreview.net/forum?id=1B4juHRAgt}\n}", "github": "", "project": "", "reviewers": "QZwH;727i;mwUj;u538;qzC3", "site": "https://openreview.net/forum?id=1B4juHRAgt", "pdf_size": 16273364, "rating": "3;3;5;6;6", "confidence": "4;4;4;2;3", "soundness": "3;2;3;3;2", "contribution": "2;1;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "156;98;32;138;107", "wc_strengths": "48;34;38;79;41", "wc_weaknesses": "450;325;229;71;26", "wc_questions": "138;26;146;29;92", "wc_review": "792;483;445;317;266", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.6, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 106.2, 42.57886799810441 ], "wc_strengths_avg": [ 48.0, 16.161683080669537 ], "wc_weaknesses_avg": [ 220.2, 157.38030372317877 ], "wc_questions_avg": [ 86.2, 51.35912771844942 ], "wc_review_avg": [ 460.6, 183.8810485069084 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7740702698132101, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:haZcZb1ef58J:scholar.google.com/&scioq=Amortized+Bayesian+Inference+with+Hybrid+Expert-in-the-Loop+and+Learnable+Summary+Statistics&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Rensselaer Polytechnic Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.rpi.edu", "aff_unique_abbr": "RPI", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Rethinking Complex Queries on Knowledge Graphs with Neural Link Predictors", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19598", "id": "1BmveEMNbG", "author_site": "Hang Yin, Zihao Wang, Yangqiu Song", "tldr": "", "abstract": "Reasoning on knowledge graphs is a challenging task because it utilizes observed information to predict the missing one. Particularly, answering complex queries based on first-order logic is one of the crucial tasks to verify learning to reason abilities for generalization and composition.\nRecently, the prevailing method is query embedding which learns the embedding of a set of entities and treats logic operations as set operations and has shown great empirical success. Though there has been much research following the same formulation, many of its claims lack a formal and systematic inspection. In this paper, we rethink this formulation and justify many of the previous claims by characterizing the scope of queries investigated previously and precisely identifying the gap between its formulation and its goal, as well as providing complexity analysis for the currently investigated queries. Moreover, we develop a new dataset containing ten new types of queries with features that have never been considered and therefore can provide a thorough investigation of complex queries. Finally, we propose a new neural-symbolic method, Fuzzy Inference with Truth value (FIT), where we equip the neural link predictors with fuzzy logic theory to support end-to-end learning using complex queries with provable reasoning capability. Empirical results show that our method outperforms previous methods significantly in the new dataset and also surpasses previous methods in the existing dataset at the same time.", "keywords": "complex query answering;knowledge graph;link prediction", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "", "author": "Hang Yin;Zihao Wang;Yangqiu Song", "authorids": "~Hang_Yin3;~Zihao_Wang11;~Yangqiu_Song1", "gender": ";;M", "homepage": ";https://zihao-wang.github.io;https://www.cse.ust.hk/~yqsong/", "dblp": ";148/9655-1;86/2159", "google_scholar": ";T28rR00AAAAJ;MdQZ-q8AAAAJ", "orcid": ";0000-0002-3919-0396;0000-0002-7818-6090", "linkedin": ";zihao-wang-6a0a3286/;yqsong/", "or_profile": "~Hang_Yin3;~Zihao_Wang11;~Yangqiu_Song1", "aff": ";Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": ";cse.ust.hk;ust.hk", "position": ";PhD student;Associate Professor", "bibtex": "@inproceedings{\nyin2024rethinking,\ntitle={Rethinking Complex Queries on Knowledge Graphs with Neural Link Predictors},\nauthor={Hang Yin and Zihao Wang and Yangqiu Song},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1BmveEMNbG}\n}", "github": "", "project": "", "reviewers": "TtwJ;sXg1;QmUh;Z6xA", "pdf_size": 650596, "rating": "6;6;8;8", "confidence": "4;3;4;3", "soundness": "2;3;4;3", "contribution": "4;2;4;3", "presentation": "3;2;4;3", "wc_summary": "225;98;138;102", "wc_strengths": "107;95;209;40", "wc_weaknesses": "177;181;47;62", "wc_questions": "102;3;19;39", "wc_review": "611;377;413;243", "wc_reply_reviewers": "0;102;0;9", "wc_reply_authors": "553;779;155;316", "reply_reviewers": "0;2;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 140.75, 51.07531204016281 ], "wc_strengths_avg": [ 112.75, 61.042505682516015 ], "wc_weaknesses_avg": [ 116.75, 62.491499421921375 ], "wc_questions_avg": [ 40.75, 37.59238619720754 ], "wc_review_avg": [ 411.0, 131.70421405558744 ], "wc_reply_reviewers_avg": [ 27.75, 43.0254285277904 ], "wc_reply_authors_avg": [ 450.75, 236.5527161120751 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5020512622307914988&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=1BmveEMNbG", "pdf": "https://openreview.net/pdf?id=1BmveEMNbG", "email": ";cse.ust.hk;ust.hk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Enhancing Transferable Adversarial Attacks on Vision Transformers through Gradient Normalization Scaling and High-Frequency Adaptation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19597", "id": "1BuWv9poWz", "author_site": "Zhiyu Zhu, Xinyi Wang, Zhibo Jin, Jiayu Zhang, Huaming Chen", "tldr": "", "abstract": "Vision Transformers (ViTs) have been widely used in various domains. Similar to Convolutional Neural Networks (CNNs), ViTs are prone to the impacts of adversarial samples, raising security concerns in real-world applications. As one of the most effective black-box attack methods, transferable attacks can generate adversarial samples on surrogate models to directly attack the target model without accessing the parameters. However, due to the distinct internal structures of ViTs and CNNs, adversarial samples constructed by traditional transferable attack methods may not be applicable to ViTs. Therefore, it is imperative to propose more effective transferability attack methods to unveil latent vulnerabilities in ViTs. Existing methods have found that applying gradient regularization to extreme gradients across different functional regions in the transformer structure can enhance sample transferability. However, in practice, substantial gradient disparities exist even within the same functional region across different layers. Furthermore, we find that mild gradients therein are the main culprits behind reduced transferability. In this paper, we introduce a novel Gradient Normalization Scaling method for fine-grained gradient editing to enhance the transferability of adversarial attacks on ViTs. More importantly, we highlight that ViTs, unlike traditional CNNs, exhibit distinct attention regions in the frequency domain. Leveraging this insight, we delve into exploring the frequency domain to further enhance the algorithm's transferability. Through extensive experimentation on various ViT variants and traditional CNN models, we substantiate that the new approach achieves state-of-the-art performance, with an average performance improvement of 33.54\\% and 42.05\\% on ViT and CNN models, respectively. Our code is available at: https://github.com/LMBTough/GNS-HFA.", "keywords": "Adversarial attack;transferability;Vit;transformer", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/c0069439cc0a5771415da7f84ab0e26da2dbf333.pdf", "author": "Zhiyu Zhu;Xinyi Wang;Zhibo Jin;Jiayu Zhang;Huaming Chen", "authorids": "~Zhiyu_Zhu2;~Xinyi_Wang9;~Zhibo_Jin1;~Jiayu_Zhang1;~Huaming_Chen1", "gender": ";M;M;M;", "homepage": ";https://github.com/noctisluna;https://github.com/Davidjinzb;https://github.com/KxPlaug;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zhiyu_Zhu2;~Xinyi_Wang9;~Zhibo_Jin1;~Jiayu_Zhang1;~Huaming_Chen1", "aff": ";Universiti Malaya;University of Sydney;Suzhou Yierqi;", "aff_domain": ";um.edu.my;usyd.edu.au;szyierqi.com;", "position": ";MS student;MS student;Researcher;", "bibtex": "@inproceedings{\nzhu2024enhancing,\ntitle={Enhancing Transferable Adversarial Attacks on Vision Transformers through Gradient Normalization Scaling and High-Frequency Adaptation},\nauthor={Zhiyu Zhu and Xinyi Wang and Zhibo Jin and Jiayu Zhang and Huaming Chen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1BuWv9poWz}\n}", "github": "", "project": "", "reviewers": "6tAt;98px;Qpbi", "pdf_size": 1520325, "rating": "5;5;6", "confidence": "3;4;4", "soundness": "3;3;2", "contribution": "2;2;2", "presentation": "2;3;1", "wc_summary": "133;64;45", "wc_strengths": "26;15;35", "wc_weaknesses": "67;50;106", "wc_questions": "151;17;158", "wc_review": "377;146;344", "wc_reply_reviewers": "0;0;9", "wc_reply_authors": "1278;735;1479", "reply_reviewers": "0;0;1", "reply_authors": "3;3;4", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 80.66666666666667, 37.80946383586463 ], "wc_strengths_avg": [ 25.333333333333332, 8.178562764256865 ], "wc_weaknesses_avg": [ 74.33333333333333, 23.442601296689656 ], "wc_questions_avg": [ 108.66666666666667, 64.88108781112996 ], "wc_review_avg": [ 289.0, 102.00980345045274 ], "wc_reply_reviewers_avg": [ 3.0, 4.242640687119285 ], "wc_reply_authors_avg": [ 1164.0, 314.25149164323784 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13904993439380467359&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "openreview": "https://openreview.net/forum?id=1BuWv9poWz", "pdf": "https://openreview.net/pdf?id=1BuWv9poWz", "email": ";um.edu.my;usyd.edu.au;szyierqi.com;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Universiti Malaya;University of Sydney;Suzhou Yierqi", "aff_unique_dep": ";;", "aff_unique_url": "https://www.malaya.edu.my;https://www.sydney.edu.au;", "aff_unique_abbr": "UM;USYD;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Malaysia;Australia;China" }, { "title": "Unsupervised Order Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19596", "id": "1CK45cqkEh", "author_site": "Seon-Ho Lee, Nyeong-Ho Shin, Chang-Su Kim", "tldr": "", "abstract": "A novel clustering algorithm for orderable data, called unsupervised order learning (UOL), is proposed in this paper. First, we develop the ordered $k$-means to group objects into ordered clusters by reducing the deviation of an object from consecutive clusters. Then, we train a network to construct an embedding space, in which objects are sorted compactly along a chain of line segments, determined by the cluster centroids. We alternate the clustering and the network training until convergence. Moreover, we perform unsupervised rank estimation via a simple nearest neighbor search in the embedding space. Extensive experiments on various orderable datasets demonstrate that UOL provides reliable ordered clustering results and decent rank estimation performances with no supervision. The source codes are available at https://github.com/seon92/UOL.", "keywords": "order learning;unsupervised clustering", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/91e282263e10efbd272cdb690a3478d7219965c2.zip", "author": "Seon-Ho Lee;Nyeong-Ho Shin;Chang-Su Kim", "authorids": "~Seon-Ho_Lee1;~Nyeong-Ho_Shin1;~Chang-Su_Kim4", "gender": "M;M;M", "homepage": "https://uhseon.github.io/;http://mcl.korea.ac.kr/people/professor/;", "dblp": "125/9915;;264/2601", "google_scholar": "https://scholar.google.co.kr/citations?user=_LtQ4TcAAAAJ;https://scholar.google.co.kr/citations?user=KOdKwNsAAAAJ;dLCMcXMAAAAJ", "orcid": ";;", "linkedin": "seonho-lee-604679198/;;nyeongho-shin-48529932b/", "or_profile": "~Seon-Ho_Lee1;~Chang-su_Kim2;~Nyeong_Ho_Shin1", "aff": "Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nlee2024unsupervised,\ntitle={Unsupervised Order Learning},\nauthor={Seon-Ho Lee and Nyeong-Ho Shin and Chang-Su Kim},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1CK45cqkEh}\n}", "github": "", "project": "", "reviewers": "rbKP;kD3Z;T1Ca;cgay", "pdf_size": 15013245, "rating": "5;5;6;6", "confidence": "5;3;5;4", "soundness": "3;3;3;3", "contribution": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "78;62;29;86", "wc_strengths": "14;24;58;98", "wc_weaknesses": "156;123;2;158", "wc_questions": "1;106;194;76", "wc_review": "249;315;283;418", "wc_reply_reviewers": "0;19;17;24", "wc_reply_authors": "534;272;509;437", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.75, 21.84462176372024 ], "wc_strengths_avg": [ 48.5, 32.90516676754579 ], "wc_weaknesses_avg": [ 109.75, 63.743136885471834 ], "wc_questions_avg": [ 94.25, 69.13166785200542 ], "wc_review_avg": [ 316.25, 63.21145070317561 ], "wc_reply_reviewers_avg": [ 15.0, 9.027735042633894 ], "wc_reply_authors_avg": [ 438.0, 102.24235912771184 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15681772120320207191&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=1CK45cqkEh", "pdf": "https://openreview.net/pdf?id=1CK45cqkEh", "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Adaptive Retrieval and Scalable Indexing for k-NN Search with Cross-Encoders", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19595", "id": "1CPta0bfN2", "author_site": "Nishant Yadav, Nicholas Monath, Manzil Zaheer, Rob Fergus, Andrew McCallum", "tldr": "", "abstract": "Cross-encoder (CE) models which compute similarity by jointly encoding a query-item pair perform better than using dot-product with embedding-based models (dual-encoders) at estimating query-item relevance. Existing approaches perform k-NN search with cross-encoders by approximating the CE similarity with a vector embedding space fit either with dual-encoders (DE) or CUR matrix factorization. DE-based retrieve-and-rerank approaches suffer from poor recall as DE generalizes poorly to new domains and the test-time retrieval with DE is decoupled from the CE. While CUR-based approaches can be more accurate than the DE-based retrieve-and-rerank approach, such approaches require a prohibitively large number of CE calls to compute item embeddings, thus making it impractical for deployment at scale. In this paper, we address these shortcomings with our proposed sparse-matrix factorization based method that efficiently computes latent query and item representations to approximate CE scores and performs k-NN search with the approximate CE similarity. In an offline indexing stage, we compute item embeddings by factorizing a sparse matrix containing query-item CE scores for a set of train queries. Our method produces a high-quality approximation while requiring only a fraction of CE similarity calls as compared to CUR-based methods, and allows for leveraging DE models to initialize the embedding space while avoiding compute- and resource-intensive finetuning of DE via distillation. At test time, we keep item embeddings fixed and perform retrieval over multiple rounds, alternating between a) estimating the test query embedding by minimizing error in approximating CE scores of items retrieved thus far, and b) using the updated test query embedding for retrieving more items in the next round. Our proposed k-NN search method can achieve up to 5 and 54 improvement in k-NN recall for k=1 and 100 respectively over the widely-used DE-based retrieve-and-rerank approach. Furthermore, our proposed approach to index the items by aligning item embeddings with the CE achieves up to 100x and 5x speedup over CUR-based and dual-encoder distillation based approaches respectively while matching or improving k-NN search recall over baselines.", "keywords": "cross-encoder;kNN;retrieval;nearest-neighbor search", "primary_area": "metric learning, kernel learning, and sparse coding", "supplementary_material": "", "author": "Nishant Yadav;Nicholas Monath;Manzil Zaheer;Rob Fergus;Andrew McCallum", "authorids": "~Nishant_Yadav1;~Nicholas_Monath1;~Manzil_Zaheer1;~Rob_Fergus1;~Andrew_McCallum1", "gender": "M;M;M;M;M", "homepage": "https://people.cs.umass.edu/~nishantyadav/;https://nmonath.github.io/;https://www.aclweb.org/anthology/people/m/manzil-zaheer/;http://cs.nyu.edu/fergus/;http://www.cs.umass.edu/~mccallum", "dblp": "230/4155;131/4309;40/10701;77/3763;m/AndrewMcCallum", "google_scholar": "Korn2JAAAAAJ;PTfhfCQAAAAJ;A33FhJMAAAAJ;https://scholar.google.com.tw/citations?user=GgQ9GEkAAAAJ;yILa1y0AAAAJ", "orcid": ";0000-0002-5135-2423;;;0009-0004-5487-2848", "linkedin": ";nicholas-monath-8627581aa/;;;andrew-mccallum-a412", "or_profile": "~Nishant_Yadav1;~Nicholas_Monath1;~Manzil_Zaheer1;~Rob_Fergus1;~Andrew_McCallum1", "aff": "Department of Computer Science, University of Massachusetts, Amherst;Google;Google DeepMind;Google;University of Massachusetts Amherst", "aff_domain": "cs.umass.edu;google.com;deepmind.com;google.com;cs.umass.edu", "position": "PhD student;Researcher;Researcher;Research scientist;Distinguished Professor", "bibtex": "@inproceedings{\nyadav2024adaptive,\ntitle={Adaptive Retrieval and Scalable Indexing for k-{NN} Search with Cross-Encoders},\nauthor={Nishant Yadav and Nicholas Monath and Manzil Zaheer and Rob Fergus and Andrew McCallum},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1CPta0bfN2}\n}", "github": "", "project": "", "reviewers": "hzee;ujET;fa41;ja98", "pdf_size": 2418271, "rating": "5;6;6;8", "confidence": "4;3;3;3", "soundness": "3;3;3;4", "contribution": "2;3;3;3", "presentation": "2;4;2;3", "wc_summary": "39;118;197;78", "wc_strengths": "35;38;35;55", "wc_weaknesses": "232;33;78;24", "wc_questions": "23;107;62;55", "wc_review": "329;296;372;212", "wc_reply_reviewers": "96;20;0;10", "wc_reply_authors": "1931;787;1051;292", "reply_reviewers": "1;1;0;1", "reply_authors": "4;1;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 108.0, 58.48504082241885 ], "wc_strengths_avg": [ 40.75, 8.317902379807062 ], "wc_weaknesses_avg": [ 91.75, 83.5175879680442 ], "wc_questions_avg": [ 61.75, 29.978117018918983 ], "wc_review_avg": [ 302.25, 58.66163567443376 ], "wc_reply_reviewers_avg": [ 31.5, 37.90448522272793 ], "wc_reply_authors_avg": [ 1015.25, 594.7824707403539 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZT2vxG-gQ2EJ:scholar.google.com/&scioq=Adaptive+Retrieval+and+Scalable+Indexing+for+k-NN+Search+with+Cross-Encoders&hl=en&as_sdt=0,44", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1CPta0bfN2", "pdf": "https://openreview.net/pdf?id=1CPta0bfN2", "email": "cs.umass.edu;google.com;deepmind.com;google.com;cs.umass.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Massachusetts Amherst;Google", "aff_unique_dep": "Department of Computer Science;Google", "aff_unique_url": "https://www.umass.edu;https://www.google.com", "aff_unique_abbr": "UMass Amherst;Google", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Amherst;Mountain View;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "1EyS6udrLa", "title": "Towards Bringing Advanced Restoration Networks into Self-Supervised Image Denoising", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Self-supervised image denoising (SSID) has witnessed significant progress in recent years. Therein, most methods focus on exploring blind-spot techniques while only employing a simple network architecture (\\eg, plain CNN or U-Net) as a denoising backbone. However, with the ongoing advancements in image restoration networks, these architectures have become somewhat outdated. In this work, we aim to migrate the advanced restoration network designs (\\eg, SwinIR, Restormer, NAFNet, and HAT) into SSID methods. We begin by conducting an analysis of the fundamental concepts in existing typical blind-spot networks (BSN). Subsequently, we introduce a series of approaches to adapt restoration networks into various blind-spot ones. In particular, we suggest effective adjustment for window attention to mimic the convolution layers in BSN. And we discourage the adoption of channel attention, as it can potentially lead to the leakage of blind-spot information, consequently impeding performance. Experiments on both synthetic and real-world RGB noisy images demonstrate our methods substantially enhance SSID performance. Furthermore, we hope this study could enable SIDD methods to keep pace with the progress in restoration networks, and serve as benchmarks for future works. The code and pre-trained models will be publicly available.", "keywords": "Self-Supervised Denoising; Restoration Networks", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Junyi Li;Zhilu Zhang;Dongsheng Jiang;XIAOPENG ZHANG;Wangmeng Zuo;Qi Tian", "authorids": "~Junyi_Li5;~Zhilu_Zhang2;~Dongsheng_Jiang2;~XIAOPENG_ZHANG7;~Wangmeng_Zuo3;~Qi_Tian3", "gender": "M;M;M;M;M;M", "homepage": ";https://github.com/cszhilu1998;https://sites.google.com/site/zxphistory/;;https://www.qitian1987.com/index.html;https://sites.google.com/site/dongshengjiangbme/", "dblp": ";;;93/2671;78/1467-1.html;85/8729", "google_scholar": "77mdLl8AAAAJ;8pIq2N0AAAAJ;Ud6aBAcAAAAJ;rUOpCEYAAAAJ;https://scholar.google.com/citations?hl=en;-eGIgsoAAAAJ", "orcid": ";0000-0002-5758-5949;;0000-0002-3330-783X;0000-0002-7252-5047;", "linkedin": ";;;;;", "or_profile": "~Junyi_Li5;~Zhilu_Zhang2;~XIAOPENG_ZHANG7;~Wangmeng_Zuo3;~Qi_Tian3;~Dongsheng_Jiang1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Huawei Technologies Ltd.;Harbin Institute of Technology;Huawei Technologies Ltd.;Huawei Technologies Ltd.", "aff_domain": "hit.edu.cn;hit.edu.cn;huawei.com;hit.edu.cn;huawei.com;huawei.com", "position": "PhD student;PhD student;Principal Researcher;Full Professor;Principal Researcher;Principal Researcher", "bibtex": "@misc{\nli2024towards,\ntitle={Towards Bringing Advanced Restoration Networks into Self-Supervised Image Denoising},\nauthor={Junyi Li and Zhilu Zhang and Dongsheng Jiang and XIAOPENG ZHANG and Wangmeng Zuo and Qi Tian},\nyear={2024},\nurl={https://openreview.net/forum?id=1EyS6udrLa}\n}", "github": "", "project": "", "reviewers": "iVws;uBSH;jHRD;xn1Q", "site": "https://openreview.net/forum?id=1EyS6udrLa", "pdf_size": 3948107, "rating": "3;3;5;5", "confidence": "4;5;3;5", "soundness": "3;2;2;3", "contribution": "1;1;2;3", "presentation": "3;3;2;3", "wc_summary": "97;46;80;56", "wc_strengths": "63;20;43;81", "wc_weaknesses": "120;69;124;130", "wc_questions": "33;347;2;3", "wc_review": "313;482;249;270", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 20.004686950812303 ], "wc_strengths_avg": [ 51.75, 22.730761095924613 ], "wc_weaknesses_avg": [ 110.75, 24.365703355331238 ], "wc_questions_avg": [ 96.25, 145.30549714308816 ], "wc_review_avg": [ 328.5, 91.57647077715978 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bD4cpC_HtesJ:scholar.google.com/&scioq=Towards+Bringing+Advanced+Restoration+Networks+into+Self-Supervised+Image+Denoising&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;1;1", "aff_unique_norm": "Harbin Institute of Technology;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.hit.edu.cn/;https://www.huawei.com", "aff_unique_abbr": "HIT;Huawei", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1FWDEIGm33", "title": "Large Language Models as superpositions of cultural perspectives", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) are sometimes viewed as if they were individuals, with given values, personality, knowledge and abilities. We argue that this \u201dLLM as an individual\u201d metaphor misrepresents their nature. As opposed to humans, they exhibit highly context-dependent values and personality traits. We propose a new metaphor, \u201dLLM as a superposition of perspectives\u201d : LLMs simulate a multiplicity of behaviors, e.g. expressing values, which can be triggered by a given context. As a case study, we conduct experiments on how values vary as a function of context using psychology questionnaires. Crucially, we demonstrate that changes in the context that are unrelated to the topic of questionnaires - varying articles, simulated conversations on other topics, and textual formats - all result in significant unwanted, hard-to-predict changes in the expressed values. We refer to this as the unexpected perspective shift effect. We discuss how this questions the interpretations of studies using psychology questionnaires (and more generally benchmarks) to draw general conclusions about LLMs\u2019 values, knowledge and abilities. Indeed, expressing some values on a questionnaire says little about which values a model would express in other contexts. Instead, models should be studied in terms of how the expressed values change over contexts in both expected and unexpected ways. Following this insight, we introduce the concept of perspective controllability - a model\u2019s affordance to adopt various perspectives. We conduct a systematic comparison of the controllability of 16 different models over three questionnaires (PVQ, VSM, IPIP) and different methods for inducing perspectives. We conclude by examining the broader implications of our work and outline a variety of associated scientific questions.", "keywords": "Large Language Models;context-dependence;controllability;cultural values;personal values;personality traits;societal considerations;Shalom H Schwartz;Geert Hofstede;Big Five", "primary_area": "generative models", "supplementary_material": "/attachment/8b6e4447a0a20dc4db890536b580aba32afc8767.zip", "author": "Grgur Kova\u010d;Masataka Sawayama;R\u00e9my Portelas;C\u00e9dric Colas;Peter Ford Dominey;Pierre-Yves Oudeyer", "authorids": "~Grgur_Kova\u010d1;~Masataka_Sawayama1;~R\u00e9my_Portelas1;~C\u00e9dric_Colas1;~Peter_Ford_Dominey1;~Pierre-Yves_Oudeyer1", "gender": "M;M;M;;M;M", "homepage": "https://www.mswym.com/;;https://cedriccolas.com;;http://www.pyoudeyer.com;", "dblp": "165/9982;;215/3872;76/218;33/5513;272/4188", "google_scholar": "https://scholar.google.co.jp/citations?user=AagXxCYAAAAJ;8xxuvpoAAAAJ;https://scholar.google.fr/citations?user=VBz8gZ4AAAAJ;;https://scholar.google.fr/citations?user=gCqGj4sAAAAJ;ZLA7iioAAAAJ", "orcid": ";;0000-0003-0212-427X;0000-0002-9318-179X;;0000-0001-8974-6228", "linkedin": ";;;;pierreyvesoudeyer/;", "or_profile": "~Masataka_Sawayama1;~R\u00e9my_Portelas1;~C\u00e9dric_Colas1;~Peter_Ford_Dominey1;~Pierre-Yves_Oudeyer1;~Grgur_Kovac1", "aff": "The University of Tokyo;Ubisoft;Massachusetts Institute of Technology;CNRS;Inria;INRIA", "aff_domain": "g.ecc.u-tokyo.ac.jp;ubisoft.com;mit.edu;cnrs.fr;inria.fr;inria.fr", "position": "Lecturer;Researcher;Postdoc;Full Professor;Research director;PhD student", "bibtex": "@misc{\nkova{\\v{c}}2024large,\ntitle={Large Language Models as superpositions of cultural perspectives},\nauthor={Grgur Kova{\\v{c}} and Masataka Sawayama and R{\\'e}my Portelas and C{\\'e}dric Colas and Peter Ford Dominey and Pierre-Yves Oudeyer},\nyear={2024},\nurl={https://openreview.net/forum?id=1FWDEIGm33}\n}", "github": "", "project": "", "reviewers": "Fkgw;7GoP;P79N;B9Pb", "site": "https://openreview.net/forum?id=1FWDEIGm33", "pdf_size": 676158, "rating": "3;3;3;5", "confidence": "4;4;4;3", "soundness": "1;2;2;2", "contribution": "2;2;2;2", "presentation": "2;2;2;2", "wc_summary": "114;75;123;89", "wc_strengths": "17;31;25;57", "wc_weaknesses": "720;223;318;71", "wc_questions": "78;211;1;49", "wc_review": "929;540;467;266", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1052;722;934;403", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 100.25, 19.17517926904466 ], "wc_strengths_avg": [ 32.5, 14.99166435056495 ], "wc_weaknesses_avg": [ 333.0, 240.17597715008884 ], "wc_questions_avg": [ 84.75, 77.90499021243761 ], "wc_review_avg": [ 550.5, 240.4604957160323 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 777.75, 246.56274556388277 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2800038415228220148&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3;4;4", "aff_unique_norm": "University of Tokyo;Ubisoft;Massachusetts Institute of Technology;Centre National de la Recherche Scientifique;INRIA", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.ubisoft.com;https://web.mit.edu;https://www.cnrs.fr;https://www.inria.fr", "aff_unique_abbr": "UTokyo;Ubisoft;MIT;CNRS;Inria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;1;1", "aff_country_unique": "Japan;France;United States" }, { "id": "1GUTzm2a4v", "title": "Greedy PIG: Adaptive Integrated Gradients", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep learning has become the standard approach for most machine learning tasks. Although its great success is undeniable, interpreting the predictions of deep learning models from a human perspective remains a challenge. In contrast to model training, model interpretability is harder to quantify or pose as an explicit optimization problem. Inspired by the AUC softmax information curve (AUC SIC) metric for evaluating feature attribution methods, we propose a unified discrete optimization framework for feature attribution and feature selection based on subset selection. This leads to a natural adaptive generalization of the path integrated gradients (PIG) method for feature attribution, which we call Greedy PIG. We show that Greedy PIG achieves an extremely high AUC SIC for feature attribution tasks on images, which could also hint at the limitations of this metric for multi-class classification, and we propose a more robust metric. We demonstrate the success of Greedy PIG on a variety of tasks, including image feature attribution, graph compression/explanation, and post-hoc feature selection on tabular data. Our results show that introducing adaptivity is a versatile method for making attribution methods more powerful.", "keywords": "feature saliency;feature attribution;feature selection;graph neural networks", "primary_area": "optimization", "supplementary_material": "/attachment/95496e31c4258efe031be9a87e9606f86477d0ff.pdf", "author": "Kyriakos Axiotis;Sami Abu-El-Haija;Lin Chen;Matthew Fahrbach;Gang Fu", "authorids": "~Kyriakos_Axiotis1;~Sami_Abu-El-Haija1;~Lin_Chen14;~Matthew_Fahrbach1;~Gang_Fu3", "gender": ";M;;;", "homepage": ";http://www.haija.org;;;", "dblp": "176/5139;127/6620;;;", "google_scholar": "Xhv2tkcAAAAJ;t80qlTcAAAAJ;;;", "orcid": ";;;;", "linkedin": ";samihaija/;;;", "or_profile": "~Kyriakos_Axiotis1;~Sami_Abu-El-Haija1;~Lin_Chen14;~Matthew_Fahrbach1;~Gang_Fu3", "aff": "Google;Research, Google;;;", "aff_domain": "google.com;research.google.com;;;", "position": "Researcher;Research Scientist;;;", "bibtex": "@misc{\naxiotis2024greedy,\ntitle={Greedy {PIG}: Adaptive Integrated Gradients},\nauthor={Kyriakos Axiotis and Sami Abu-El-Haija and Lin Chen and Matthew Fahrbach and Gang Fu},\nyear={2024},\nurl={https://openreview.net/forum?id=1GUTzm2a4v}\n}", "github": "", "project": "", "reviewers": "mYhW;dYDX;yEX4;WqYq", "site": "https://openreview.net/forum?id=1GUTzm2a4v", "pdf_size": 1037202, "rating": "3;3;5;6", "confidence": "4;3;3;3", "soundness": "2;2;2;3", "contribution": "3;2;2;2", "presentation": "2;1;2;4", "wc_summary": "54;130;97;66", "wc_strengths": "75;60;48;40", "wc_weaknesses": "467;214;174;104", "wc_questions": "5;28;38;46", "wc_review": "601;432;357;256", "wc_reply_reviewers": "0;277;37;0", "wc_reply_authors": "1048;1370;287;317", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;1;2", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 86.75, 29.49046456059992 ], "wc_strengths_avg": [ 55.75, 13.198011213815512 ], "wc_weaknesses_avg": [ 239.75, 136.98243500536847 ], "wc_questions_avg": [ 29.25, 15.384651442265437 ], "wc_review_avg": [ 411.5, 125.97718047329047 ], "wc_reply_reviewers_avg": [ 78.5, 115.59519886223649 ], "wc_reply_authors_avg": [ 755.5, 467.6914046676505 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-s22YlOF9TEJ:scholar.google.com/&scioq=Greedy+PIG:+Adaptive+Integrated+Gradients&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "1GdAJ3GsOw", "title": "DISTPAR:TENSOR PARTITIONING FOR DISTRIBUTED NEURAL NETWORK COMPUTING", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Existing distributed training systems suffer from the difficulties of adapting to diverse model architectures and balancing the trade-off between computational and communication costs. We introduce Distributed Partitioning (DistPar), a framework that allows users to develop parallel models with the ease of writing single-device programs. We establish the basic properties of tensor partitioning, which significantly expand the search space for optimal parallel strategies. The process of distributing global tensors from a single-device perspective is driven by the innovative use of collective communication primitives and their extensions which represent conversions between arbitrary tensor distribution properties. To further address the challenge of parallel scheme optimization, we carry out a cost function that considers both computational and communication costs. Guided by the cost function, the best-performing parallel scheme is automatically selected with configurable parameters, thus simplifying the process of developing parallel models. We demonstrate state-of-the-art results on extensive experiments. Moreover, DistPar reaches 50% higher throughput in large-scale face recognition tasks and a 20% improvement in language modeling tasks compared to data parallelism provided by PyTorch. This performance improvement aligns with the expected speedup and is particularly notable as the number of computing devices increases. The code will be released at https://github.com/DistPar.", "keywords": "Deep Learning Framework;Tensor Partitioning;Parallel Computation", "primary_area": "infrastructure, software libraries, hardware, etc.", "supplementary_material": "/attachment/574b80ab752a77ec57748ad95395c9cdaf008599.pdf", "author": "Hongsheng Wang;Jinhui Yuan;Shun Liu;Weijie Xia;Xinyi Zhou;Shengyu Zhang;Fei Wu;Mohamed Jaward Bah;Feng Lin", "authorids": "~Hongsheng_Wang1;~Jinhui_Yuan1;~Shun_Liu1;~Weijie_Xia1;~Xinyi_Zhou5;~Shengyu_Zhang2;~Fei_Wu1;~Mohamed_Jaward_Bah1;~Feng_Lin10", "gender": "M;M;M;M;;M;M;M;M", "homepage": "https://wanghongsheng01.github.io/;;https://shunliu01.github.io;https://github.com/xiaweijiexox;;https://shengyuzhang.github.io/;https://person.zju.edu.cn/wufei;;http://www.zhejianglab.com", "dblp": ";58/3397;;;;47/3459-1;84/3254-1;https://dblp.org/search?q=Mohamed+Jaward+Bah;75/2611", "google_scholar": ";HtfAR1IAAAAJ;https://scholar.google.com/citations?hl=en;;;l4Dyt7EAAAAJ;XJLn4MYAAAAJ;WRRMPzMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0009-0007-5240-9114;;;0000-0002-0030-8289;;;", "linkedin": ";jinhui-yuan-291aa847/;shun-liu-5285492a2/;;;;;;", "or_profile": "~Hongsheng_Wang1;~Jinhui_Yuan1;~Shun_Liu1;~Weijie_Xia1;~Xinyi_Zhou5;~Shengyu_Zhang2;~Fei_Wu1;~Mohamed_Jaward_Bah1;~Feng_Lin10", "aff": "Zhejiang University;;Dartmouth College;Hangzhou Dianzi University;;Zhejiang University;Zhejiang University;Zhejiang Lab;Zhejiang Lab", "aff_domain": "zju.edu.cn;;cs.dartmouth.edu;hdu.edu.cn;;zju.edu.cn;zju.edu.cn;zhejianglab.com;zhejianglab.com", "position": "PhD student;;Intern;Undergrad student;;ZJU100 Young Professor;Full Professor;Researcher;Principal Researcher", "bibtex": "@misc{\nwang2024distpartensor,\ntitle={{DISTPAR}:{TENSOR} {PARTITIONING} {FOR} {DISTRIBUTED} {NEURAL} {NETWORK} {COMPUTING}},\nauthor={Hongsheng Wang and Jinhui Yuan and Shun Liu and Weijie Xia and Xinyi Zhou and Shengyu Zhang and Fei Wu and Mohamed Jaward Bah and Feng Lin},\nyear={2024},\nurl={https://openreview.net/forum?id=1GdAJ3GsOw}\n}", "github": "", "project": "", "reviewers": "Q7ud;ovib;pTBS", "site": "https://openreview.net/forum?id=1GdAJ3GsOw", "pdf_size": 1109568, "rating": "1;1;3", "confidence": "4;5;4", "soundness": "1;3;1", "contribution": "2;1;1", "presentation": "2;2;2", "wc_summary": "107;64;28", "wc_strengths": "93;19;5", "wc_weaknesses": "353;180;322", "wc_questions": "109;75;3", "wc_review": "662;338;358", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 1.6666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 1.6666666666666667, 0.9428090415820634 ], "contribution_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 66.33333333333333, 32.293790252754306 ], "wc_strengths_avg": [ 39.0, 38.60915262818736 ], "wc_weaknesses_avg": [ 285.0, 75.31710739710246 ], "wc_questions_avg": [ 62.333333333333336, 44.19150245113747 ], "wc_review_avg": [ 452.6666666666667, 148.24604173093084 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OrQtgl86lfQJ:scholar.google.com/&scioq=DISTPAR:TENSOR+PARTITIONING+FOR+DISTRIBUTED+NEURAL+NETWORK+COMPUTING&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0;3;3", "aff_unique_norm": "Zhejiang University;Dartmouth College;Hangzhou Dianzi University;Zhejiang Lab", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.dartmouth.edu;http://www.hdu.edu.cn/;http://www.zhejianglab.com", "aff_unique_abbr": "ZJU;Dartmouth;HGHDU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "1HgJZl3HgT", "title": "Learning to Stylize Soundscapes from In-the-Wild Videos", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Speech recordings convey a great deal of information about the scenes, resulting in a variety of effects ranging from reverberation to additional ambient sounds. In this paper, we learn to restyle input speech to sound as though it was recorded within a different scene, given an audio (or audio-visual) example recorded from that scene. Our model learns through self-supervision, taking advantage of the fact that natural video contains recurring sound events and textures. We extract an audio clip from a video and apply speech enhancement. We then train a latent diffusion model to recover the original sound, using another audio-visual clip taken from elsewhere in the video as a conditional hint. Through this process, the model learns to transfer the conditional example's sound properties to the input sound. We show that our model can be successfully trained using unlabeled, in-the-wild videos, and that an additional visual signal can improve its sound prediction abilities.", "keywords": "Audio Generation and Stylization;Audio-visual Learning;Multimodal Learning", "primary_area": "generative models", "supplementary_material": "/attachment/23a9bbe0d8b2ff2dc55be33292112a8c6e0a18e4.zip", "author": "Tingle Li;Renhao Wang;Po-Yao Huang;Andrew Owens;Gopala Anumanchipalli", "authorids": "~Tingle_Li1;~Renhao_Wang1;~Po-Yao_Huang2;~Andrew_Owens1;~Gopala_Anumanchipalli1", "gender": "M;;M;M;M", "homepage": "https://tinglok.netlify.app/;;http://andrewowens.com;http://people.eecs.berkeley.edu/~gopala/;https://berniebear.github.io/", "dblp": "248/9136;243/7150;85/2697;54/7824;154/3943-1", "google_scholar": "UGpC1zgAAAAJ;q4RlE2oAAAAJ;9hX-JksAAAAJ;VecEj6kAAAAJ;E8K25LIAAAAJ", "orcid": ";;;0000-0002-9714-7740;", "linkedin": ";;;;", "or_profile": "~Tingle_Li1;~Renhao_Wang1;~Andrew_Owens1;~Gopala_Anumanchipalli1;~Po-Yao_Huang1", "aff": "University of California, Berkeley;University of California, Berkeley;University of Michigan;University of California, Berkeley;Meta", "aff_domain": "eecs.berkeley.edu;berkeley.edu;umich.edu;berkeley.edu;meta.com", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Researcher", "bibtex": "@misc{\nli2024learning,\ntitle={Learning to Stylize Soundscapes from In-the-Wild Videos},\nauthor={Tingle Li and Renhao Wang and Po-Yao Huang and Andrew Owens and Gopala Anumanchipalli},\nyear={2024},\nurl={https://openreview.net/forum?id=1HgJZl3HgT}\n}", "github": "", "project": "", "reviewers": "UAzL;HkqM;V8de;tbtJ", "site": "https://openreview.net/forum?id=1HgJZl3HgT", "pdf_size": 1481535, "rating": "3;3;5;5", "confidence": "3;4;3;3", "soundness": "2;2;2;2", "contribution": "1;1;3;3", "presentation": "2;2;3;4", "wc_summary": "102;57;108;38", "wc_strengths": "31;51;64;38", "wc_weaknesses": "151;315;408;157", "wc_questions": "2;94;25;313", "wc_review": "286;517;605;546", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 1.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 76.25, 29.600464523382062 ], "wc_strengths_avg": [ 46.0, 12.62933094031509 ], "wc_weaknesses_avg": [ 257.75, 108.85626991588495 ], "wc_questions_avg": [ 108.5, 122.82609657560563 ], "wc_review_avg": [ 488.5, 121.13731877501664 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ubQRJShe0ooJ:scholar.google.com/&scioq=Learning+to+Stylize+Soundscapes+from+In-the-Wild+Videos&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of California, Berkeley;University of Michigan;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.berkeley.edu;https://www.umich.edu;https://meta.com", "aff_unique_abbr": "UC Berkeley;UM;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1Htbe2fiQU", "title": "Learning with Counterfactual Explanations for Radiology Report Generation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Due to the common content of anatomy, radiology images with their corresponding reports exhibit highly similarity. Such inherent data bias can predispose automatic report generation models to learn entangled and spurious representations resulting in misdiagnostic reports. Moreover, the lack of explainability hinders the acceptance by radiologists in clinical practice.\nTo tackle these, we propose a novel \\textbf{Co}unter\\textbf{F}actual \\textbf{E}xplanations-based framework (CoFE) for radiology report generation. Counterfactual explanations serve as a potent tool for understanding how decisions made by algorithms can be changed by asking ``what if'' scenarios. By leveraging this concept, CoFE can learn non-spurious visual representations by contrasting the representations between factual and counterfactual images. Specifically, we derive counterfactual images by swapping a patch between positive and negative samples until a predicted diagnosis shift occurs. Here, positive and negative samples are the most semantically similar but have different diagnosis labels. Additionally, CoFE employs a learnable prompt to efficiently fine-tune the pretrained large language model, encapsulating both factual and counterfactual content to provide a more generalizable prompt representation. Extensive experiments on two benchmarks demonstrate that leveraging the counterfactual explanations enables CoFE to generate semantically coherent and factually complete reports and outperform in terms of language generation and clinical efficacy metrics.", "keywords": "Counterfactual Explanations;Radiology Report Generation;Contrastive Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Mingjie Li;Haokun Lin;Xiaodan Liang;Ling Chen;Abdulmotaleb El Saddik;Xiaojun Chang", "authorids": "~Mingjie_Li2;~Haokun_Lin3;~Xiaodan_Liang2;~Ling_Chen5;~Abdulmotaleb_El_Saddik1;~Xiaojun_Chang4", "gender": "M;M;F;F;;", "homepage": "http://www.mmvg.org/member/mingjie_li/;https://blog.csdn.net/qq_46192381;https://www.sysu-hcp.net/;https://profiles.uts.edu.au/Ling.Chen;;", "dblp": "48/10103-6;;;17/1237-6;;", "google_scholar": "ag0m3aoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;voxznZAAAAAJ;https://scholar.google.com.au/citations?user=L5aYWQcAAAAJ;;", "orcid": ";0009-0008-6831-2859;;0000-0002-6468-5729;;", "linkedin": ";;;;;", "or_profile": "~Mingjie_Li2;~Haokun_Lin3;~Xiaodan_Liang2;~Ling_Chen5;~Abdulmotaleb_El_Saddik1;~Xiaojun_Chang4", "aff": "Stanford University;Mohamed bin Zayed University of Artificial Intelligence;SUN YAT-SEN UNIVERSITY;University of Technology Sydney;;", "aff_domain": "stanford.edu;mbzuai.ac.ae;sysu.edu.cn;uts.edu.au;;", "position": "Postdoc;MS student;Associate Professor;Full Professor;;", "bibtex": "@misc{\nli2024learning,\ntitle={Learning with Counterfactual Explanations for Radiology Report Generation},\nauthor={Mingjie Li and Haokun Lin and Xiaodan Liang and Ling Chen and Abdulmotaleb El Saddik and Xiaojun Chang},\nyear={2024},\nurl={https://openreview.net/forum?id=1Htbe2fiQU}\n}", "github": "", "project": "", "reviewers": "sgJL;nNsA;QvNC;u7dz", "site": "https://openreview.net/forum?id=1Htbe2fiQU", "pdf_size": 5155127, "rating": "3;5;5;5", "confidence": "4;5;3;4", "soundness": "2;2;3;3", "contribution": "1;2;2;3", "presentation": "2;3;3;2", "wc_summary": "108;32;53;87", "wc_strengths": "9;12;39;85", "wc_weaknesses": "131;138;172;234", "wc_questions": "56;4;47;71", "wc_review": "304;186;311;477", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 70.0, 29.436372059070052 ], "wc_strengths_avg": [ 36.25, 30.474374480865066 ], "wc_weaknesses_avg": [ 168.75, 40.739262389002576 ], "wc_questions_avg": [ 44.5, 24.904818810824544 ], "wc_review_avg": [ 319.5, 103.6110515340907 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_kwaKu5AFXAJ:scholar.google.com/&scioq=Learning+with+Counterfactual+Explanations+for+Radiology+Report+Generation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Stanford University;Mohamed bin Zayed University of Artificial Intelligence;Sun Yat-sen University;University of Technology Sydney", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://mbzuai.ac.ae;http://www.sysu.edu.cn;https://www.uts.edu.au", "aff_unique_abbr": "Stanford;MBZUAI;SYSU;UTS", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "United States;United Arab Emirates;China;Australia" }, { "id": "1IIiQnLRe8", "title": "Diversity Modeling for Semantic Shift Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Semantic shift detection faces a big challenge of modeling non-semantic feature diversity while suppressing generalization to unseen semantic shifts. Existing reconstruction-based approaches are either not constrained well to avoid over-generalization or not general enough to model diversity-agnostic in-distribution samples. Both may lead to feature confusion near the decision boundary and fail to identify various semantic shifts. In this work, we propose Bi-directional Regularized Diversity Modulation (BiRDM) to model restricted feature diversity for semantic shift detection so as to address the challenging issues in reconstruction-based detection methods. BiDRM modulates feature diversity by controlling spatial transformation with learnable dynamic modulation parameters in latent space. Smoothness Regularization (SmoReg) is introduced to avoid undesired generalization to semantic shift samples. Furthermore, Batch Normalization Simulation (BNSim) coordinating with auxiliary data is leveraged to separately transform different semantic distributions and push potential semantic shift samples away implicitly, making the feature more discriminative. Compared with previous works, BiRDM can successfully model diversity-agnostic non-semantic pattern while alleviating feature confusion in latent space. Experimental results demonstrate the effectiveness of our method.", "keywords": "semantic shift detection;diversity modeling", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Wenrui Liu;Hong Chang;Bingpeng Ma;Shiguang Shan;Xilin CHEN", "authorids": "~Wenrui_Liu1;~Hong_Chang1;~Bingpeng_Ma1;~Shiguang_Shan2;~Xilin_CHEN2", "gender": ";F;M;M;M", "homepage": "http://vipl.ict.ac.cn/edu/student/master/202205/t20220518_36050.html;;http://people.ucas.edu.cn/~bpma;http://vipl.ict.ac.cn/people/sgshan/;http://vipl.ict.ac.cn/people/_xlchen/", "dblp": "156/8975-4;;62/1822;s/ShiguangShan;c/XilinChen", "google_scholar": ";LX6MnNsAAAAJ;;https://scholar.google.com.tw/citations?user=Vkzd7MIAAAAJ;vVx2v20AAAAJ", "orcid": ";;0000-0001-8984-205X;0000-0002-8348-392X;0000-0003-3024-4404", "linkedin": ";;;;", "or_profile": "~Wenrui_Liu1;~Hong_Chang1;~Bingpeng_Ma1;~Shiguang_Shan2;~Xilin_Chen4", "aff": "Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology", "aff_domain": "ict.ac.cn;ict.ac.cn;ucas.ac.cn;ict.ac.cn;ict.ac.cn", "position": "MS student;Full Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nliu2024diversity,\ntitle={Diversity Modeling for Semantic Shift Detection},\nauthor={Wenrui Liu and Hong Chang and Bingpeng Ma and Shiguang Shan and Xilin CHEN},\nyear={2024},\nurl={https://openreview.net/forum?id=1IIiQnLRe8}\n}", "github": "", "project": "", "reviewers": "k7Jp;BvVD;CShN;d4iz;oBBb", "site": "https://openreview.net/forum?id=1IIiQnLRe8", "pdf_size": 12798916, "rating": "3;3;5;6;6", "confidence": "3;4;4;3;4", "soundness": "2;3;2;3;3", "contribution": "2;2;2;3;4", "presentation": "2;2;3;2;3", "wc_summary": "223;85;54;40;59", "wc_strengths": "40;65;52;42;49", "wc_weaknesses": "640;176;244;151;38", "wc_questions": "11;3;27;2;44", "wc_review": "914;329;377;235;190", "wc_reply_reviewers": "0;0;211;105;30", "wc_reply_authors": "733;457;555;543;504", "reply_reviewers": "0;0;2;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 4.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 92.2, 67.00268651330335 ], "wc_strengths_avg": [ 49.6, 8.867919710958146 ], "wc_weaknesses_avg": [ 249.8, 206.08580737158977 ], "wc_questions_avg": [ 17.4, 16.032467059064864 ], "wc_review_avg": [ 409.0, 261.03103263788387 ], "wc_reply_reviewers_avg": [ 69.2, 80.62605038075473 ], "wc_reply_authors_avg": [ 558.4, 93.78187458139232 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.06019292654288467, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ij_hPtLp8ZoJ:scholar.google.com/&scioq=Diversity+Modeling+for+Semantic+Shift+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cas.cn;http://www.ucas.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "CAS;UCAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1IaoWBqB6K", "title": "DiffDock-Pocket: Diffusion for Pocket-Level Docking with Sidechain Flexibility", "track": "main", "status": "Reject", "tldr": "", "abstract": "When a small molecule binds to a protein, the 3D structure of the protein and its function change. Understanding this process, called molecular docking, can be crucial in areas such as drug design. Recent learning-based attempts have shown promising results at this task, yet lack features that traditional approaches support. In this work, we close this gap by proposing DiffDock-Pocket, a diffusion-based docking algorithm that is conditioned on a binding target to predict ligand poses only in a specific binding pocket. On top of this, our model supports receptor flexibility and predicts the position of sidechains close to the binding site. Empirically, we improve the state-of-the-art in site-specific-docking on the PDBBind benchmark. Especially when using in-silico generated structures, we achieve more than twice the performance of current methods while being more than 20 times faster than other flexible approaches. Although the model was not trained for cross-docking to different structures, it yields competitive results in this task.", "keywords": "diffusion;diffusion models;docking;generative model", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Michael Plainer;Marcella Toth;Simon Dobers;Hannes Stark;Gabriele Corso;C\u00e9line Marquet;Regina Barzilay", "authorids": "~Michael_Plainer1;~Marcella_Toth1;~Simon_Dobers1;~Hannes_Stark1;~Gabriele_Corso1;~C\u00e9line_Marquet1;~Regina_Barzilay1", "gender": "M;;M;;female;;M", "homepage": "https://plainer.dev;;http://linkedin.com/in/simon-dobers;https://gcorso.github.io/;https://www.regina.csail.mit.edu/;;https://hannes-stark.com/", "dblp": "364/7250.html;;;262/6499;b/ReginaBarzilay;;300/4627", "google_scholar": "7Bnt9kUAAAAJ;;;LUrAYgEAAAAJ;;;bnXfJdEAAAAJ", "orcid": ";0009-0002-0685-8852;;;;0000-0002-8691-5791;0000-0002-4463-326X", "linkedin": "https://linkedin.com/in/plainer/;;;gcorso/;;;hannes-stark/", "or_profile": "~Michael_Plainer1;~Marcella_Toth1;~Simon_Dobers1;~Gabriele_Corso1;~Regina_Barzilay1;~C_Marquet1;~Hannes_St\u00e4rk1", "aff": "Freie Universit\u00e4t Berlin;Technische Universit\u00e4t M\u00fcnchen;;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Technical University Munich;Massachusetts Institute of Technology", "aff_domain": "fu-berlin.de;tum.de;;mit.edu;mit.edu;tum.de;mit.edu", "position": "PhD student;MS student;;PhD student;Professor;PhD student;PhD student", "bibtex": "@misc{\nplainer2024diffdockpocket,\ntitle={DiffDock-Pocket: Diffusion for Pocket-Level Docking with Sidechain Flexibility},\nauthor={Michael Plainer and Marcella Toth and Simon Dobers and Hannes Stark and Gabriele Corso and C{\\'e}line Marquet and Regina Barzilay},\nyear={2024},\nurl={https://openreview.net/forum?id=1IaoWBqB6K}\n}", "github": "", "project": "", "reviewers": "rxhe;Mo9h;TENj;XNuQ", "site": "https://openreview.net/forum?id=1IaoWBqB6K", "pdf_size": 4902112, "rating": "3;5;6;6", "confidence": "4;4;3;3", "soundness": "3;2;3;3", "contribution": "2;2;2;2", "presentation": "3;4;3;3", "wc_summary": "25;52;87;67", "wc_strengths": "46;69;124;40", "wc_weaknesses": "98;285;166;75", "wc_questions": "191;48;145;198", "wc_review": "360;454;522;380", "wc_reply_reviewers": "0;51;122;0", "wc_reply_authors": "1333;929;1370;1798", "reply_reviewers": "0;1;1;0", "reply_authors": "3;2;2;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 57.75, 22.620510604316607 ], "wc_strengths_avg": [ 69.75, 33.138912172852024 ], "wc_weaknesses_avg": [ 156.0, 81.64863746566749 ], "wc_questions_avg": [ 145.5, 59.860253925288355 ], "wc_review_avg": [ 429.0, 64.10148204214939 ], "wc_reply_reviewers_avg": [ 43.25, 50.006874527408726 ], "wc_reply_authors_avg": [ 1357.5, 307.5747876533446 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8164965809277259, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1742131594736248720&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "Freie Universit\u00e4t Berlin;Technische Universit\u00e4t M\u00fcnchen;Massachusetts Institute of Technology;Technical University of Munich", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.fu-berlin.de;https://www.tum.de;https://web.mit.edu;https://www.tum.de", "aff_unique_abbr": "FU Berlin;TUM;MIT;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;1", "aff_country_unique": "Germany;United States" }, { "id": "1JPfHljXL4", "title": "When, Why and How Much? Adaptive Learning Rate Scheduling by Refinement", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we present a refined study of learning rate schedules for stochastic gradient descent (SGD). In contrast to most prior works that study the convergence of the average iterate, we study the last iterate, which is what most people use in practice. Furthermore, we break away from the tradition of replacing the gradients with crude upper bounds, which allows us to obtain a \\emph{problem-adaptive} learning rate schedule. Our method is the first systematic approach to \\emph{automatically} yield learning rate warm-up and rapid learning rate annealing near the end of training. In cases where gradient norm information is not available, our theory predicts that the best choice is the linear-decay schedule that sets the stepsize proportionally to $1 - t/T$, where $t$ is the current iteration and $T$ is the total number of steps. Our final theoretical result is an extension of our methodology to coordinate-wise methods. We perform the most comprehensive evaluation of learning rate schedules to date, evaluating across 10 diverse deep learning problems, a series of LLMs, and a suite of logistic regression problems. We validate that overall, the linear-decay schedule outperforms all commonly used default schedules including cosine annealing, and that our schedule refinement method gives further improvements.", "keywords": "learning rates; linear decay; deep learning; online learning", "primary_area": "optimization", "supplementary_material": "", "author": "Aaron Defazio;Ashok Cutkosky;Harsh Mehta;Konstantin Mishchenko", "authorids": "~Aaron_Defazio1;~Ashok_Cutkosky1;~Harsh_Mehta1;~Konstantin_Mishchenko1", "gender": "M;;M;", "homepage": "https://www.aarondefazio.com/;http://www.cs.stanford.edu/~ashokc;;https://konstmish.com/", "dblp": "116/2969;191/6725;122/1475;222/9853", "google_scholar": "KEzJsdkAAAAJ;h4AbGp0AAAAJ;murJPNoAAAAJ;Z8Y8nhQAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Aaron_Defazio1;~Ashok_Cutkosky1;~Harsh_Mehta1;~Konstantin_Mishchenko1", "aff": "Meta;Boston University;Google Research;Samsung", "aff_domain": "meta.com;bu.edu;google.com;samsung.com", "position": "Research Scientist;Assistant Professor;Software Engineer;Researcher", "bibtex": "@misc{\ndefazio2024when,\ntitle={When, Why and How Much? Adaptive Learning Rate Scheduling by Refinement},\nauthor={Aaron Defazio and Ashok Cutkosky and Harsh Mehta and Konstantin Mishchenko},\nyear={2024},\nurl={https://openreview.net/forum?id=1JPfHljXL4}\n}", "github": "", "project": "", "reviewers": "CaT7;NM9Z;BqWo;HeFS;eGse", "site": "https://openreview.net/forum?id=1JPfHljXL4", "pdf_size": 1740080, "rating": "5;5;5;6;8", "confidence": "3;4;3;2;4", "soundness": "2;3;4;3;4", "contribution": "2;2;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "51;44;55;67;81", "wc_strengths": "62;59;53;64;47", "wc_weaknesses": "20;79;62;109;43", "wc_questions": "153;3;170;26;89", "wc_review": "286;185;340;266;260", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "508;451;647;352;210", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "contribution_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.6, 13.047605144240073 ], "wc_strengths_avg": [ 57.0, 6.2289646009589745 ], "wc_weaknesses_avg": [ 62.6, 30.40131576099956 ], "wc_questions_avg": [ 88.2, 66.36083182118801 ], "wc_review_avg": [ 267.4, 49.926345750515324 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 433.6, 146.99197256993324 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2750095491084634, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10553505582998704311&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Meta;Boston University;Google;Samsung", "aff_unique_dep": "Meta Platforms, Inc.;;Google Research;Samsung", "aff_unique_url": "https://meta.com;https://www.bu.edu;https://research.google;https://www.samsung.com", "aff_unique_abbr": "Meta;BU;Google Research;Samsung", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;South Korea" }, { "id": "1JR20YOE0H", "title": "On Feature Diversity in Energy-based Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Energy-based learning is a powerful learning paradigm that encapsulates various discriminative and generative approaches. An energy-based model (EBM) is typically formed of inner-model(s) that learn a combination of the different features to generate an energy mapping for each input configuration. In this paper, we focus on the diversity of the produced feature set. We extend the probably approximately correct (PAC) theory of EBMs and analyze the effect of redundancy reduction on the performance of EBMs. We derive novel generalization bounds for various learning contexts, i.e., regression, classification, and implicit regression, with different energy functions and we show that indeed reducing redundancy of the feature set can consistently decrease the gap between the true and empirical expectation of the energy and boosts the performance of the model.", "keywords": "energy-based models;continual learning;redundancy reduction;feature diversity", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/5756c93f3c3852ed2c699584b082a052cdf6601f.zip", "author": "Firas Laakom;Jenni Raitoharju;Alexandros Iosifidis;Moncef Gabbouj", "authorids": "~Firas_Laakom1;~Jenni_Raitoharju1;~Alexandros_Iosifidis2;~Moncef_Gabbouj1", "gender": "M;;M;M", "homepage": ";;https://www.tuni.fi/en/people/alexandros-iosifidis;https://www.tuni.fi/en/moncef-gabbouj", "dblp": "242/8179;;01/9539;08/6597", "google_scholar": "VPWIyx8AAAAJ;;KjsL0KEAAAAJ;cHukfSUAAAAJ", "orcid": "0000-0001-7436-5692;;0000-0003-4807-1345;0000-0002-9788-2323", "linkedin": ";;;moncef-gabbouj-2186282/?originalSubdomain=fi", "or_profile": "~Firas_Laakom1;~Jenni_Raitoharju1;~Alexandros_Iosifidis2;~Moncef_Gabbouj1", "aff": "Tampere University;;Aarhus University;Tampere University", "aff_domain": "tuni.fi;;au.dk;tuni.fi", "position": "PhD student;;Full Professor;Full Professor", "bibtex": "@misc{\nlaakom2024on,\ntitle={On Feature Diversity in Energy-based Models},\nauthor={Firas Laakom and Jenni Raitoharju and Alexandros Iosifidis and Moncef Gabbouj},\nyear={2024},\nurl={https://openreview.net/forum?id=1JR20YOE0H}\n}", "github": "", "project": "", "reviewers": "ZAxa;oMAe;M7of;qchL;UTkN", "site": "https://openreview.net/forum?id=1JR20YOE0H", "pdf_size": 839403, "rating": "5;6;6;6;6", "confidence": "4;2;3;2;3", "soundness": "2;3;3;3;3", "contribution": "2;3;2;2;3", "presentation": "1;3;3;2;2", "wc_summary": "80;78;103;52;137", "wc_strengths": "28;53;36;123;109", "wc_weaknesses": "90;42;167;146;63", "wc_questions": "260;94;132;97;1", "wc_review": "458;267;438;418;310", "wc_reply_reviewers": "158;0;0;30;0", "wc_reply_authors": "304;366;778;697;180", "reply_reviewers": "2;0;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 90.0, 28.51666179622012 ], "wc_strengths_avg": [ 69.8, 38.82988539771911 ], "wc_weaknesses_avg": [ 101.6, 47.8020920044301 ], "wc_questions_avg": [ 116.8, 83.73625260303926 ], "wc_review_avg": [ 378.2, 75.55766010140864 ], "wc_reply_reviewers_avg": [ 37.6, 61.31101043042758 ], "wc_reply_authors_avg": [ 465.0, 231.8361490363399 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8017837257372734, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7411738941659034268&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tampere University;Aarhus University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tuni.fi;https://au.dk", "aff_unique_abbr": "Tuni;AU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Finland;Denmark" }, { "title": "Denoising Diffusion via Image-Based Rendering", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19594", "id": "1JbsdayvhO", "author_site": "Titas Anciukevi\u010dius, Fabian Manhardt, Federico Tombari, Paul Henderson", "tldr": "", "abstract": "Generating 3D scenes is a challenging open problem, which requires synthesizing plausible content that is fully consistent in 3D space. While recent methods such as neural radiance fields excel at view synthesis and 3D reconstruction, they cannot synthesize plausible details in unobserved regions since they lack a generative capability. Conversely, existing generative methods are typically not capable of reconstructing detailed, large-scale scenes in the wild, as they use limited-capacity 3D scene representations, require aligned camera poses, or rely on additional regularizers. In this work, we introduce the first diffusion model able to perform fast, detailed reconstruction and generation of real-world 3D scenes. To achieve this, we make three contributions. First, we introduce a new neural scene representation, IB-planes, that can efficiently and accurately represent large 3D scenes, dynamically allocating more capacity as needed to capture details visible in each image. Second, we propose a denoising-diffusion framework to learn a prior over this novel 3D scene representation, using only 2D images without the need for any additional supervision signal such as masks or depths. This supports 3D reconstruction and generation in a unified architecture. Third, we develop a principled approach to avoid trivial 3D solutions when integrating the image-based rendering with the diffusion model, by dropping out representations of some images. We evaluate the model on several challenging datasets of real and synthetic images, and demonstrate superior results on generation, novel view synthesis and 3D reconstruction.", "keywords": "Neural Scene Representations;Generative Models;Denoising Diffusion;3D Reconstruction", "primary_area": "generative models", "supplementary_material": "/attachment/13f97cec1247b17971e7bab7e4dd3526c4b1a2ea.zip", "author": "Titas Anciukevi\u010dius;Fabian Manhardt;Federico Tombari;Paul Henderson", "authorids": "~Titas_Anciukevi\u010dius1;~Fabian_Manhardt1;~Federico_Tombari1;~Paul_Henderson1", "gender": "M;M;M;", "homepage": "https://www.anciukevicius.com/;http://campar.in.tum.de/Main/FabianManhardt;https://federicotombari.github.io/;http://www.pmh47.net", "dblp": "262/3972;173/9271;16/3539;172/1394", "google_scholar": "N8xNl8kAAAAJ;https://scholar.google.de/citations?user=bERItx8AAAAJ;TFsE4BIAAAAJ;https://scholar.google.co.uk/citations?user=HN7fd4MAAAAJ", "orcid": ";0000-0002-4577-4590;0000-0001-5598-5212;", "linkedin": "titas-anciukevicius/;;fedet/;", "or_profile": "~Titas_Anciukevi\u010dius1;~Fabian_Manhardt1;~Federico_Tombari1;~Paul_Henderson1", "aff": "University of Edinburgh;Google;Technical University Munich (TUM);Institute of Science and Technology Austria", "aff_domain": "ed.ac.uk;google.com;in.tum.de;ist.ac.at", "position": "PhD student;Researcher;Lecturer;Postdoc", "bibtex": "@inproceedings{\nanciukevi{\\v{c}}ius2024denoising,\ntitle={Denoising Diffusion via Image-Based Rendering},\nauthor={Titas Anciukevi{\\v{c}}ius and Fabian Manhardt and Federico Tombari and Paul Henderson},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1JbsdayvhO}\n}", "github": "", "project": "", "reviewers": "a83P;C6XK;MQQ6;Wz4z", "pdf_size": 19513544, "rating": "6;6;6;8", "confidence": "5;3;4;3", "soundness": "3;3;2;3", "contribution": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "83;81;61;82", "wc_strengths": "40;34;73;77", "wc_weaknesses": "249;131;116;188", "wc_questions": "215;69;64;213", "wc_review": "587;315;314;560", "wc_reply_reviewers": "941;80;0;44", "wc_reply_authors": "2099;423;266;1492", "reply_reviewers": "3;2;0;1", "reply_authors": "4;2;1;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 9.12071817347735 ], "wc_strengths_avg": [ 56.0, 19.170289512680814 ], "wc_weaknesses_avg": [ 171.0, 52.4356748788456 ], "wc_questions_avg": [ 140.25, 73.77457217768192 ], "wc_review_avg": [ 444.0, 129.8518386469749 ], "wc_reply_reviewers_avg": [ 266.25, 390.59593891898055 ], "wc_reply_authors_avg": [ 1070.0, 758.6089242818067 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16540492762930564730&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1JbsdayvhO", "pdf": "https://openreview.net/pdf?id=1JbsdayvhO", "email": "ed.ac.uk;google.com;in.tum.de;ist.ac.at", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Edinburgh;Google;Technical University Munich;Institute of Science and Technology Austria", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.ed.ac.uk;https://www.google.com;https://www.tum.de;https://www.ist.ac.at", "aff_unique_abbr": "Edinburgh;Google;TUM;IST Austria", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "United Kingdom;United States;Germany;Austria" }, { "id": "1JiIKjcwrr", "title": "Robust Self-supervised Learning in Heterogeneous Graph Based on Feature-Topology Balancing", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, graph neural network (GNN) based self-supervised learning in heterogeneous information networks (HINs) has gathered considerable attention. Most of the past studies followed a message passing approach where the features of a central node are updated based on the features of its neighboring nodes. Since these methods depend on informative graph topology and node features, their performance significantly deteriorates when there is an issue in one factor. Moreover, since real-world HINs are highly noisy and validating the importance of attributes is challenging, it is rare to find cases where both the graph topology and node features are of good quality. To address this problem, we make the first model that can explicitly separate the graph topology and features in the heterogeneous graph by proposing the novel framework BFTNet (robust self-supervised heterogeneous graph learning using the Balance between node Features and graph Topology). BFTNet employs a knowledge graph embedding module focusing on global graph topology and a contrastive learning module dedicated to learning node features. Thanks to the novel structure that handles graph topology and node features separately, BFTNet can assign higher importance to one factor, thereby allowing it to effectively respond to skewed datasets in real-world situations. Moreover, BFTNet can improve performance by designing the optimal module suited for learning the topology and features, without sacrificing the performance of one modality to reflect the characteristics of the other modality. Lastly, BFTNet implemented a novel graph conversion scheme and representation fusion method to ensure that the representation of topology and features are effectively learned and integrated. The self-supervised learning performance of BFTNet is verified by extensive experiments on four real-world benchmark datasets, and the robustness of BFTNet is demonstrated with the experiments on noisy datasets. The source code of BFTNet will be available in the final version.", "keywords": "Heterogeneous Graph;Knowledge graph;Self-supervised learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/a8e6c444cea675e439d445a1845569e0dfd43c0e.zip", "author": "Junyong Ahn;Bong Gyun Kang;Hyeongrok Han;Sungroh Yoon", "authorids": "~Junyong_Ahn2;~Bong_Gyun_Kang1;~Hyeongrok_Han1;~Sungroh_Yoon1", "gender": "M;M;M;", "homepage": "https://github.com/jyahn215;https://github.com/Pusheen-cat;;http://ailab.snu.ac.kr", "dblp": ";369/7132;285/5845;99/1474", "google_scholar": ";iuMRdnIAAAAJ;VpV7QEkAAAAJ;Bphl_fIAAAAJ", "orcid": ";0000-0002-2287-4564;;0000-0002-2367-197X", "linkedin": ";;;", "or_profile": "~Junyong_Ahn2;~Bong_Gyun_Kang1;~Hyeongrok_Han1;~Sungroh_Yoon1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@misc{\nahn2024robust,\ntitle={Robust Self-supervised Learning in Heterogeneous Graph Based on Feature-Topology Balancing},\nauthor={Junyong Ahn and Bong Gyun Kang and Hyeongrok Han and Sungroh Yoon},\nyear={2024},\nurl={https://openreview.net/forum?id=1JiIKjcwrr}\n}", "github": "", "project": "", "reviewers": "jyGu;69NY;KdXi", "site": "https://openreview.net/forum?id=1JiIKjcwrr", "pdf_size": 508664, "rating": "3;3;6", "confidence": "4;5;3", "soundness": "2;3;4", "contribution": "2;3;3", "presentation": "2;3;4", "wc_summary": "90;56;55", "wc_strengths": "37;40;127", "wc_weaknesses": "214;184;265", "wc_questions": "2;2;62", "wc_review": "343;282;509", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "228;118;269", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 67.0, 16.268579122549905 ], "wc_strengths_avg": [ 68.0, 41.737273509418415 ], "wc_weaknesses_avg": [ 221.0, 33.436506994600975 ], "wc_questions_avg": [ 22.0, 28.284271247461902 ], "wc_review_avg": [ 378.0, 95.9201056435337 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 205.0, 63.75473838599502 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eM5Lv-gU-UwJ:scholar.google.com/&scioq=Robust+Self-supervised+Learning+in+Heterogeneous+Graph+Based+on+Feature-Topology+Balancing&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Neural Architecture Retrieval", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19593", "id": "1JtTPYBKqt", "author_site": "Xiaohuan Pei, Yanxi Li, Minjing Dong, Chang Xu", "tldr": "", "abstract": "With the increasing number of new neural architecture designs and substantial existing neural architectures, it becomes difficult for the researchers to situate their contributions compared with existing neural architectures or establish the connections between their designs and other relevant ones. To discover similar neural architectures in an efficient and automatic manner, we define a new problem Neural Architecture Retrieval which retrieves a set of existing neural architectures which have similar designs to the query neural architecture. Existing graph pre-training strategies cannot address the computational graph in neural architectures due to the graph size and motifs. To fulfill this potential, we propose to divide the graph into motifs which are used to rebuild the macro graph to tackle these issues, and introduce multi-level contrastive learning to achieve accurate graph representation learning. Extensive evaluations on both human-designed and synthesized neural architectures demonstrate the superiority of our algorithm. Such a dataset which contains 12k real-world network architectures, as well as their embedding, is built for neural architecture retrieval.", "keywords": "Information Retrieval;Vector Database;Neural Architecture Search", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/cca9b9f3c82ab4d835a6c262d56f627308c858d8.zip", "author": "Xiaohuan Pei;Yanxi Li;Minjing Dong;Chang Xu", "authorids": "~Xiaohuan_Pei1;~Yanxi_Li1;~Minjing_Dong1;~Chang_Xu4", "gender": ";M;M;", "homepage": ";;https://www.cs.cityu.edu.hk/~minjdong/;", "dblp": ";24/5261-1;246/2900.html;", "google_scholar": ";;https://scholar.google.com.au/citations?user=gJJRqlsAAAAJ;", "orcid": ";;0009-0003-1717-818X;", "linkedin": ";yanxi-li-3245a511a/;;", "or_profile": "~Xiaohuan_Pei1;~Yanxi_Li1;~Minjing_Dong1;~Chang_Xu4", "aff": ";University of Sydney;City University of Hong Kong;", "aff_domain": ";uni.sydney.edu.au;cityu.edu.hk;", "position": ";PhD student;Assistant Professor;", "bibtex": "@inproceedings{\npei2024neural,\ntitle={Neural Architecture Retrieval},\nauthor={Xiaohuan Pei and Yanxi Li and Minjing Dong and Chang Xu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1JtTPYBKqt}\n}", "github": "", "project": "", "reviewers": "NBZG;BEfk;wPum", "pdf_size": 4869531, "rating": "6;8;8", "confidence": "5;5;5", "soundness": "3;4;3", "contribution": "2;4;3", "presentation": "3;3;3", "wc_summary": "65;223;99", "wc_strengths": "86;182;148", "wc_weaknesses": "126;114;277", "wc_questions": "23;46;20", "wc_review": "300;565;544", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1061;561;532", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 129.0, 67.90189000805992 ], "wc_strengths_avg": [ 138.66666666666666, 39.74362282877035 ], "wc_weaknesses_avg": [ 172.33333333333334, 74.17247168293339 ], "wc_questions_avg": [ 29.666666666666668, 11.61416759345623 ], "wc_review_avg": [ 469.6666666666667, 120.27838080423635 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 718.0, 242.82641262158174 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9O_pp0lZQCcJ:scholar.google.com/&scioq=Neural+Architecture+Retrieval&hl=en&as_sdt=0,33", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1JtTPYBKqt", "pdf": "https://openreview.net/pdf?id=1JtTPYBKqt", "email": ";uni.sydney.edu.au;cityu.edu.hk;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Sydney;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;https://www.cityu.edu.hk", "aff_unique_abbr": "USYD;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1", "aff_country_unique": "Australia;China" }, { "id": "1JuMFjSkpD", "title": "Fair Attribute Classification via Distance Covariance", "track": "main", "status": "Reject", "tldr": "", "abstract": "With the increasing prevalence of machine learning, concerns about fairness have emerged. Mitigating potential discrimination risks and preventing machine learning algorithms from making unfair predictions are essential goals in fairness machine learning. We tackle this challenge from a statistical perspective, utilizing distance covariance\u2014a powerful statistical method for measuring both linear and non-linear correlations\u2014as a measure to assess the independence between predictions and sensitive attributes. To enhance fairness in classification, we integrate the sample distance covariance as a manageable penalty term into the machine learning process to promote independence. Additionally, we optimize this constrained problem using the Lagrangian dual method, offering a better trade-off between accuracy and fairness. Theoretically, we provide a proof for the convergence between sample and population distance covariance, establishing necessary guarantees for batch computations. Through experiments conducted on a range of real-world datasets, we demonstrate that our approach can seamlessly extend to existing machine learning models and deliver competitive results.", "keywords": "Fair classification;distance covariance;Lagrange dual optimization;convergence in probability", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/eb522602cfe7be7ae88865bcbfeb59937ab0a8d9.pdf", "author": "Ruifan Huang;Haixia Liu", "authorids": "~Ruifan_Huang1;~Haixia_Liu1", "gender": "M;F", "homepage": "https://ruistarlit.github.io/;http://faculty.hust.edu.cn/liuhaixia1/en/index.htm", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Ruifan_Huang1;~Haixia_Liu1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn", "position": "MS student;Associate Professor", "bibtex": "@misc{\nhuang2024fair,\ntitle={Fair Attribute Classification via Distance Covariance},\nauthor={Ruifan Huang and Haixia Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=1JuMFjSkpD}\n}", "github": "", "project": "", "reviewers": "zY1Z;uH68;eHoC;8JtY", "site": "https://openreview.net/forum?id=1JuMFjSkpD", "pdf_size": 1033743, "rating": "5;5;5;5", "confidence": "3;4;3;4", "soundness": "2;3;3;3", "contribution": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "40;75;95;60", "wc_strengths": "56;58;28;101", "wc_weaknesses": "151;176;87;271", "wc_questions": "138;26;531;21", "wc_review": "385;335;741;453", "wc_reply_reviewers": "0;40;0;93", "wc_reply_authors": "860;1419;0;1316", "reply_reviewers": "0;1;0;1", "reply_authors": "2;3;0;3", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.5, 20.155644370746373 ], "wc_strengths_avg": [ 60.75, 26.089988501338976 ], "wc_weaknesses_avg": [ 171.25, 66.10739368633436 ], "wc_questions_avg": [ 179.0, 208.54136280364142 ], "wc_review_avg": [ 478.5, 157.23469718862947 ], "wc_reply_reviewers_avg": [ 33.25, 38.16657569130351 ], "wc_reply_authors_avg": [ 898.75, 559.9131070978782 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lvflX29AApEJ:scholar.google.com/&scioq=Fair+Attribute+Classification+via+Distance+Covariance&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "1M0qIxVKf6", "title": "Uncovering hidden geometry in Transformers via disentangling position and context", "track": "main", "status": "Reject", "tldr": "", "abstract": "Transformers are widely used to extract complex semantic meanings from input tokens, yet they usually operate as black-box models. In this paper, we present a simple yet informative decomposition of hidden states (or embeddings) of trained transformers into interpretable components. For any layer, embedding vectors of input sequence samples are a tensor $h \\in R^{C \\times T \\times d}$. Given embedding vector $h_{c,t} \\in R^d$ at sequence position $t \\le T$ in a sequence (or context) $c \\le C$, extracting the mean effects yields the decomposition \n$$\nh_{c,t} = \\mu + pos_t + ctx_c + resid_{c,t}\n$$\nwhere $\\mu$ is the global mean vector, $pos_t$ and $ctx_c$ are the mean vectors across contexts and across positions respectively, and $resid_{c,t}$ is the residual vector. For popular transformer architectures and diverse text datasets, empirically we find pervasive mathematical structure: (1) $(pos_t)_t$ forms a low-dimensional, continuous, and often spiral shape across layers, (2) $(ctx_c)_c$ shows \nclear cluster structure that falls into context topics, and (3) $(pos_t)_t$ and $(ctx_c)_c$ are mutually incoherent---namely $pos_t$ is almost orthogonal to $ctx_c$---which is canonical in compressed sensing and dictionary learning. This decomposition offers structural insights about input formats in in-context learning (especially for induction heads) and in length generalization (especially for arithmetic tasks).", "keywords": "Transformers;Positional embeddings;Incoherence;Induction head;Attention;Interpreting neural nets;Visualization", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "/attachment/bd2c2ad65b25ecc5a786e93647fbff84b987b2b0.zip", "author": "Jiajun Song;Yiqiao Zhong", "authorids": "~Jiajun_Song3;~Yiqiao_Zhong1", "gender": "M;M", "homepage": "https://jiajunsong629.github.io/;https://pages.stat.wisc.edu/~zhong35/", "dblp": ";140/7265", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Jiajun_Song3;~Yiqiao_Zhong1", "aff": "Beijing Institute for General Artificial Intelligence;University of Wisconsin - Madison", "aff_domain": "bigai.ai;wisc.edu", "position": "Researcher;Assistant Professor", "bibtex": "@misc{\nsong2024uncovering,\ntitle={Uncovering hidden geometry in Transformers via disentangling position and context},\nauthor={Jiajun Song and Yiqiao Zhong},\nyear={2024},\nurl={https://openreview.net/forum?id=1M0qIxVKf6}\n}", "github": "", "project": "", "reviewers": "rGTL;K9yz;Jv3k", "site": "https://openreview.net/forum?id=1M0qIxVKf6", "pdf_size": 14405346, "rating": "5;5;6", "confidence": "3;3;3", "soundness": "2;2;2", "contribution": "2;3;3", "presentation": "3;2;2", "wc_summary": "59;61;95", "wc_strengths": "33;88;73", "wc_weaknesses": "128;5;248", "wc_questions": "73;201;130", "wc_review": "293;355;546", "wc_reply_reviewers": "64;0;0", "wc_reply_authors": "486;448;742", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 71.66666666666667, 16.519348924485158 ], "wc_strengths_avg": [ 64.66666666666667, 23.21398046197353 ], "wc_weaknesses_avg": [ 127.0, 99.20685460188726 ], "wc_questions_avg": [ 134.66666666666666, 52.359865885576475 ], "wc_review_avg": [ 398.0, 107.669246615116 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 30.169889330626027 ], "wc_reply_authors_avg": [ 558.6666666666666, 130.5611819118616 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14630901236651546134&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Beijing Institute for General Artificial Intelligence;University of Wisconsin-Madison", "aff_unique_dep": ";", "aff_unique_url": "http://www.bigaiai.org/;https://www.wisc.edu", "aff_unique_abbr": "BIGAI;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "1M8yDTa0Pp", "title": "Cross-Model Semi-Supervised Prompt Learning for Vision-Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Prompt learning, which focuses on learning continuous soft prompts, has emerged as a promising approach for\nefficiently adapting pretrained vision-language models (VLMs) to multiple downstream tasks. While prior works have shown promising performances on common benchmarks, they typically rely on labeled data samples only. This greatly discredits the information gain from the vast collection of otherwise unlabeled samples available in the wild. To mitigate this, we propose a simple yet efficient cross-model framework to leverage on the unlabeled samples achieving significant gain in model performance. Specifically, we employ a semi-supervised prompt learning approach which makes the learned prompts invariant to the different views of a given unlabeled sample. The multiple views are obtained using different augmentations on the images as well as by varying the lengths of visual and text prompts attached to these samples. Experimenting with this simple yet surprisingly effective approach over a large number of benchmark datasets, we observe a considerable improvement in the quality of soft prompts thereby making an immense gain in image classification performance. Interestingly, our approach also benefits from out-of-domain unlabeled images highlighting the robustness and generalization capabilities. Our code will be made publicly available.", "keywords": "Semi-supervised learning;prompt learning;multi-modal learning;vision-language models", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/db10f3d5d8691e6f71ca4c0b745c7f4a44b306fd.zip", "author": "Omprakash Chakraborty;Aadarsh Sahoo;Rameswar Panda;Abir Das", "authorids": "~Omprakash_Chakraborty1;~Aadarsh_Sahoo1;~Rameswar_Panda1;~Abir_Das4", "gender": "M;M;M;M", "homepage": ";https://aadsah.github.io/;https://rpand002.github.io/;http://cse.iitkgp.ac.in/~adas/", "dblp": "182/4466.html;272/5285;126/0986;141/1311", "google_scholar": "https://scholar.google.co.in/citations?user=Z0uiqiIAAAAJ;https://scholar.google.co.in/citations?user=1nPhcH0AAAAJ;_ySuu6gAAAAJ;L4yEk2UAAAAJ", "orcid": ";;;", "linkedin": ";aadsah/;;", "or_profile": "~Omprakash_Chakraborty1;~Aadarsh_Sahoo1;~Rameswar_Panda1;~Abir_Das4", "aff": "Indian Institute of Technology Kharagpur, Dhirubhai Ambani Institute Of Information and Communication Technology;California Institute of Technology;MIT-IBM Watson AI Lab;Indian Institute of Technology Kharagpur", "aff_domain": "iitkgp.ac.in;caltech.edu;ibm.com;iitkgp.ac.in", "position": "PhD student;PhD student;Research Scientist;Assistant Professor", "bibtex": "@misc{\nchakraborty2024crossmodel,\ntitle={Cross-Model Semi-Supervised Prompt Learning for Vision-Language Models},\nauthor={Omprakash Chakraborty and Aadarsh Sahoo and Rameswar Panda and Abir Das},\nyear={2024},\nurl={https://openreview.net/forum?id=1M8yDTa0Pp}\n}", "github": "", "project": "", "reviewers": "MsJj;f2tm;xki8;18u3", "site": "https://openreview.net/forum?id=1M8yDTa0Pp", "pdf_size": 10629665, "rating": "3;3;6;6", "confidence": "4;4;4;5", "soundness": "1;3;3;3", "contribution": "1;2;3;3", "presentation": "2;2;3;4", "wc_summary": "97;65;66;140", "wc_strengths": "49;36;71;56", "wc_weaknesses": "742;59;98;135", "wc_questions": "16;98;31;58", "wc_review": "904;258;266;389", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1882;1229;375;1535", "reply_reviewers": "0;0;0;0", "reply_authors": "4;3;2;3", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 92.0, 30.553232234904378 ], "wc_strengths_avg": [ 53.0, 12.62933094031509 ], "wc_weaknesses_avg": [ 258.5, 280.43938739057324 ], "wc_questions_avg": [ 50.75, 31.155858197135252 ], "wc_review_avg": [ 454.25, 264.8040549160832 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1255.25, 558.2572771581218 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Leqmo6cva9YJ:scholar.google.com/&scioq=Cross-Model+Semi-Supervised+Prompt+Learning+for+Vision-Language+Models&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Indian Institute of Technology Kharagpur;California Institute of Technology;Massachusetts Institute of Technology", "aff_unique_dep": ";;IBM Watson AI Lab", "aff_unique_url": "https://www.iitkgp.ac.in;https://www.caltech.edu;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "IIT Kharagpur;Caltech;MIT-IBM AI Lab", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Kharagpur;Pasadena;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "India;United States" }, { "id": "1MRfyGLCcU", "title": "Graph Representation Learning enhanced Semi-supervised Feature Selection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Feature selection process is essential in machine learning by discovering the most relevant features to the modeling target. By exploring the potential complex correlations among features of unlabeled data, recently introduced self-supervision-enhanced feature selection greatly reduces the reliance on the labeled samples. However, they are generally based on the autoencoder with sample-wise self-supervision, which can hardly exploit relations among samples. To address this limitation, this paper proposes Graph representation learning enhanced Semi-supervised Feature Selection(G-FS) which performs feature selection based on the discovery and exploitation of the non-Euclidean relations among features and samples by translating unlabeled ``plain\" tabular data into a bipartite graph. A self-supervised edge prediction task is designed to distill rich information on the graph into low-dimensional embeddings, which remove redundant features and noise. Guided by the condensed graph representation, we propose a batch-attention feature weight generation mechanism that generates more robust weights according to batch-based selection patterns rather than individual samples. The results show that G-FS achieves significant performance edges in 12 datasets compared to ten state-of-the-art baselines, including two recent self-supervised baselines.", "keywords": "Feature Selection\uff1bGraph Representation Learning; Batch Attention", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Jun Tan;Zhifeng Qiu;Ning Gui", "authorids": "~Jun_Tan3;~Zhifeng_Qiu2;~Ning_Gui1", "gender": ";;M", "homepage": ";https://faculty.csu.edu.cn/qiuzhifeng/zh_CN/index.htm;https://faculty.csu.edu.cn/guining/zh_CN/index.htm", "dblp": ";;30/3048", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-3236-7275;;", "linkedin": ";;", "or_profile": "~Jun_Tan3;~Zhifeng_Qiu2;~Ning_Gui1", "aff": "Central South University;;Central South University", "aff_domain": "csu.edu.cn;;csu.edu.cn", "position": "MS student;;Full Professor", "bibtex": "@misc{\ntan2024graph,\ntitle={Graph Representation Learning enhanced Semi-supervised Feature Selection},\nauthor={Jun Tan and Zhifeng Qiu and Ning Gui},\nyear={2024},\nurl={https://openreview.net/forum?id=1MRfyGLCcU}\n}", "github": "", "project": "", "reviewers": "AraV;s9w1;n2md;KtqB", "site": "https://openreview.net/forum?id=1MRfyGLCcU", "pdf_size": 10610776, "rating": "3;5;5;6", "confidence": "4;4;3;4", "soundness": "2;2;3;3", "contribution": "1;3;2;3", "presentation": "2;3;2;2", "wc_summary": "83;58;76;120", "wc_strengths": "17;23;30;54", "wc_weaknesses": "91;59;19;124", "wc_questions": "8;59;78;44", "wc_review": "199;199;203;342", "wc_reply_reviewers": "0;38;0;0", "wc_reply_authors": "370;577;563;799", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 22.56518335843961 ], "wc_strengths_avg": [ 31.0, 14.053469322555197 ], "wc_weaknesses_avg": [ 73.25, 38.84826250940961 ], "wc_questions_avg": [ 47.25, 25.66490794840301 ], "wc_review_avg": [ 235.75, 61.36519779158216 ], "wc_reply_reviewers_avg": [ 9.5, 16.454482671904334 ], "wc_reply_authors_avg": [ 577.25, 151.92823141207165 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lni0FW7_iHwJ:scholar.google.com/&scioq=Graph+Representation+Learning+enhanced+Semi-supervised+Feature+Selection&hl=en&as_sdt=0,44", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Central South University", "aff_unique_dep": "", "aff_unique_url": "https://www.csu.edu.cn", "aff_unique_abbr": "CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "1MXQBsHA4Q", "title": "Revisiting Supervision for Continual Representation Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In the field of continual learning, models are designed to learn tasks one after the other. While most research has centered on supervised continual learning, there is a growing interest in unsupervised continual learning, which makes use of the vast amounts of unlabeled data. Recent studies have highlighted the strengths of unsupervised methods, particularly self-supervised learning, in providing robust representations. The improved transferability of those representations built with self-supervised methods is often associated with the role played by the multi-layer perceptron projector. In this work, we depart from this observation and reexamine the role of supervision in continual representation learning. We reckon that additional information, such as human annotations, should not deteriorate the quality of representations. Our findings show that supervised models when enhanced with a multi-layer perceptron head, can outperform self-supervised models in continual representation learning. This highlights the importance of the multi-layer perceptron projector in shaping feature transferability across a sequence of tasks in continual learning.", "keywords": "continual learning;self-supervised representation learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/bdc8ba3e3ea936fda0476f4ddaf7679ba78196e0.zip", "author": "Daniel Marczak;Sebastian Cygert;Tomasz Trzcinski;Bart\u0142omiej Twardowski", "authorids": "~Daniel_Marczak2;~Sebastian_Cygert1;~Tomasz_Trzcinski2;~Bart\u0142omiej_Twardowski1", "gender": ";M;M;M", "homepage": ";https://pg.edu.pl/en/p/sebastian-cygert-1128802;https://cvlab.ii.pw.edu.pl/ttrzcins/;", "dblp": ";138/3693;05/11408;156/6628", "google_scholar": "Vs4kBzQAAAAJ;https://scholar.google.pl/citations?user=wLH9PP8AAAAJ;https://scholar.google.pl/citations?user=bJMRBFoAAAAJ;https://scholar.google.pl/citations?user=8yywECgAAAAJ", "orcid": ";;;0000-0003-2117-8679", "linkedin": ";sebastiancygert/;;bartlomiejtwardowski/", "or_profile": "~Daniel_Marczak2;~Sebastian_Cygert1;~Tomasz_Trzcinski2;~Bart\u0142omiej_Twardowski1", "aff": "Warsaw University of Technology;IDEAS NCBR;Warsaw University of Technology;Computer Vision Center, Universitat Aut\u00f2noma de Barcelona", "aff_domain": "pw.edu.pl;ideas-ncbr.pl;pw.edu.pl;cvc.uab.es", "position": "PhD student;Postdoc;Full Professor;Postdoc", "bibtex": "@misc{\nmarczak2024revisiting,\ntitle={Revisiting Supervision for Continual Representation Learning},\nauthor={Daniel Marczak and Sebastian Cygert and Tomasz Trzcinski and Bart{\\l}omiej Twardowski},\nyear={2024},\nurl={https://openreview.net/forum?id=1MXQBsHA4Q}\n}", "github": "", "project": "", "reviewers": "Zari;6fut;vMLn", "site": "https://openreview.net/forum?id=1MXQBsHA4Q", "pdf_size": 1605415, "rating": "3;3;5", "confidence": "5;3;2", "soundness": "2;2;3", "contribution": "2;2;2", "presentation": "3;3;2", "wc_summary": "102;81;155", "wc_strengths": "138;40;55", "wc_weaknesses": "384;172;177", "wc_questions": "225;5;95", "wc_review": "849;298;482", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 112.66666666666667, 31.13768706175132 ], "wc_strengths_avg": [ 77.66666666666667, 43.099368389287974 ], "wc_weaknesses_avg": [ 244.33333333333334, 98.7803399242762 ], "wc_questions_avg": [ 108.33333333333333, 90.30811456096045 ], "wc_review_avg": [ 543.0, 229.0429362950682 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9566271489219903541&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Warsaw University of Technology;Institute for Development, Economic Analysis, and Simulation (IDEAS);Universitat Aut\u00f2noma de Barcelona", "aff_unique_dep": ";;Computer Vision Center", "aff_unique_url": "https://www.pw.edu.pl;https://www.ideas-ncbr.gov.pl;https://www.uab.cat", "aff_unique_abbr": "WUT;IDEAS;UAB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Poland;Spain" }, { "title": "Data Distillation Can Be Like Vodka: Distilling More Times For Better Quality", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19592", "id": "1NHgmKqOzZ", "author_site": "Xuxi Chen, Yu Yang, Zhangyang Wang, Baharan Mirzasoleiman", "tldr": "", "abstract": "Dataset distillation aims to minimize the time and memory needed for training deep networks on large datasets, by creating a small set of synthetic images that has a similar generalization performance to that of the full dataset. However, current dataset distillation techniques fall short, showing a notable performance gap compared to training on the original data. In this work, we are the first to argue that the use of only one synthetic subset for distillation may not yield optimal generalization performance. This is because the training dynamics of deep networks drastically changes during training. Therefore, multiple synthetic subsets are required to capture the dynamics of training in different stages. To address this issue, we propose Progressive Dataset Distillation (PDD). PDD synthesizes multiple small sets of synthetic images, each conditioned on the previous sets, and trains the model on the cumulative union of these subsets without requiring additional training time. Our extensive experiments show that PDD can effectively improve the performance of existing dataset distillation methods by up to 4.3%. In addition, our method for the first time enables generating considerably larger synthetic datasets. Our codes are available at https://github.com/VITA-Group/ProgressiveDD.", "keywords": "dataset distillation;dataset condensation", "primary_area": "optimization", "supplementary_material": "", "author": "Xuxi Chen;Yu Yang;Zhangyang Wang;Baharan Mirzasoleiman", "authorids": "~Xuxi_Chen1;~Yu_Yang4;~Zhangyang_Wang1;~Baharan_Mirzasoleiman1", "gender": "Unspecified;F;M;F", "homepage": ";https://sites.google.com/view/yuyang0901/home;https://vita-group.github.io;http://web.cs.ucla.edu/~baharan/", "dblp": "267/9662;16/4505-7;119/4026;52/10075", "google_scholar": "afsDlKYAAAAJ;KK6Yj4IAAAAJ;pxFyKAIAAAAJ;x63j7HEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xuxi_Chen1;~Yu_Yang4;~Zhangyang_Wang1;~Baharan_Mirzasoleiman1", "aff": "University of Texas at Austin;University of California, Los Angeles;University of Texas at Austin;University of California, Los Angeles", "aff_domain": "utexas.edu;ucla.edu;utexas.edu;ucla.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024data,\ntitle={Data Distillation Can Be Like Vodka: Distilling More Times For Better Quality},\nauthor={Xuxi Chen and Yu Yang and Zhangyang Wang and Baharan Mirzasoleiman},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1NHgmKqOzZ}\n}", "github": "", "project": "", "reviewers": "rp5C;S7ej;xT2K", "pdf_size": 4454536, "rating": "5;6;8", "confidence": "4;5;3", "soundness": "3;2;3", "contribution": "3;2;3", "presentation": "3;2;3", "wc_summary": "42;70;100", "wc_strengths": "27;52;115", "wc_weaknesses": "153;196;1", "wc_questions": "2;36;10", "wc_review": "224;354;226", "wc_reply_reviewers": "118;95;0", "wc_reply_authors": "1608;836;160", "reply_reviewers": "2;1;0", "reply_authors": "5;3;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 70.66666666666667, 23.683092891108814 ], "wc_strengths_avg": [ 64.66666666666667, 37.025516726831626 ], "wc_weaknesses_avg": [ 116.66666666666667, 83.65139302818308 ], "wc_questions_avg": [ 16.0, 14.514360704718161 ], "wc_review_avg": [ 268.0, 60.81666438293592 ], "wc_reply_reviewers_avg": [ 71.0, 51.07510809255979 ], "wc_reply_authors_avg": [ 868.0, 591.5764250430088 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 1.632993161855452 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6546536707079772, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1440553960173365966&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=1NHgmKqOzZ", "pdf": "https://openreview.net/pdf?id=1NHgmKqOzZ", "email": "utexas.edu;ucla.edu;utexas.edu;ucla.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Texas at Austin;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.ucla.edu", "aff_unique_abbr": "UT Austin;UCLA", "aff_campus_unique_index": "0;1;0;1", "aff_campus_unique": "Austin;Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "1OP4crhgkD", "title": "Semantically Aligned Task Decomposition in Multi-Agent Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "The difficulty of appropriately assigning credit is particularly heightened in cooperative MARL with sparse reward, due to the concurrent time and structural scales involved. Automatic subgoal generation (ASG) has recently emerged as a viable MARL approach inspired by utilizing sub-goals in intrinsically motivated reinforcement learning. However, end-to-end learning of complex task planning from sparse rewards without prior knowledge, undoubtedly requires massive training samples. Moreover, the diversity-promoting nature of existing ASG methods can lead to the \"over-representation\" of sub-goals, generating numerous spurious sub-goals of limited relevance to the actual task reward and thus decreasing the sample efficiency of the algorithm. To address this problem and inspired by the disentangled representation learning, we propose a novel \"disentangled\" decision-making method, $\\textbf{S}$emantically $\\textbf{A}$ligned task decomposition in $\\textbf{MA}$RL ($\\textbf{SAMA}$), that prompts pretrained language models with chain-of-thought that can suggest potential goals, provide suitable goal decomposition and subgoal allocation as well as self-reflection-based replanning. Additionally, SAMA incorporates language-grounded RL to train each agent's subgoal-conditioned policy. SAMA demonstrates considerable advantages in sample efficiency compared to state-of-the-art ASG methods, as evidenced by its performance on two challenging sparse-reward tasks, $\\texttt{Overcooked}$ and $\\texttt{MiniRTS}$.", "keywords": "Pretrained Language Models;Multi-Agent Reinforcement Learning;Language-Grounded Reinforcement Learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Wenhao Li;Dan Qiao;Xiangfeng Wang;Bo Jin;Baoxiang Wang;Hongyuan Zha", "authorids": "~Wenhao_Li2;~Dan_Qiao3;~Xiangfeng_Wang1;~Bo_Jin1;~Baoxiang_Wang1;~Hongyuan_Zha1", "gender": "M;;M;;;", "homepage": "https://tomaxent.com;https://qiaodan-cuhk.github.io;https://xfwang87.github.io/;;;", "dblp": ";152/4915-3.html;84/4695;;;z/HongyuanZha", "google_scholar": "HAtzuaYAAAAJ;;YpGMkgsAAAAJ;;;n1DQMIsAAAAJ", "orcid": ";0000-0002-2364-6897;;;;", "linkedin": ";;;;;", "or_profile": "~Wenhao_Li2;~Dan_Qiao3;~Xiangfeng_Wang1;~Bo_Jin1;~Baoxiang_Wang1;~Hongyuan_Zha1", "aff": "The Chinese University of Hong Kong, Shenzhen;Chinese University of Hong Kong, Shen Zhen;East China Normal University;;;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;link.cuhk.edu.cn;ecnu.edu.cn;;;cuhk.edu.cn", "position": "Postdoc;PhD student;Full Professor;;;Full Professor", "bibtex": "@misc{\nli2024semantically,\ntitle={Semantically Aligned Task Decomposition in Multi-Agent Reinforcement Learning},\nauthor={Wenhao Li and Dan Qiao and Xiangfeng Wang and Bo Jin and Baoxiang Wang and Hongyuan Zha},\nyear={2024},\nurl={https://openreview.net/forum?id=1OP4crhgkD}\n}", "github": "", "project": "", "reviewers": "b4Ao;T35w;DBEC;Um1j", "site": "https://openreview.net/forum?id=1OP4crhgkD", "pdf_size": 3540389, "rating": "3;5;6;6", "confidence": "4;2;4;3", "soundness": "3;2;3;3", "contribution": "2;2;3;3", "presentation": "3;2;2;3", "wc_summary": "43;85;48;82", "wc_strengths": "52;43;84;138", "wc_weaknesses": "609;93;123;148", "wc_questions": "128;32;171;140", "wc_review": "832;253;426;508", "wc_reply_reviewers": "95;86;643;80", "wc_reply_authors": "3324;2069;3529;2167", "reply_reviewers": "1;1;3;1", "reply_authors": "8;5;8;5", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 64.5, 19.11151485361639 ], "wc_strengths_avg": [ 79.25, 37.18450618201081 ], "wc_weaknesses_avg": [ 243.25, 212.06175397746762 ], "wc_questions_avg": [ 117.75, 51.934453881792194 ], "wc_review_avg": [ 504.75, 210.1682361823499 ], "wc_reply_reviewers_avg": [ 226.0, 240.814243764774 ], "wc_reply_authors_avg": [ 2772.25, 659.163627258058 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 6.5, 1.5 ], "replies_avg": [ 39, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.24618298195866545, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1403065174946763393&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;East China Normal University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;http://www.ecnu.edu.cn", "aff_unique_abbr": "CUHK;ECNU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Backdoor Secrets Unveiled: Identifying Backdoor Data with Optimized Scaled Prediction Consistency", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19591", "id": "1OfAO2mes1", "author_site": "Soumyadeep Pal, Yuguang Yao, Ren Wang, Bingquan Shen, Sijia Liu", "tldr": "", "abstract": "Modern machine learning (ML) systems demand substantial training data, often resorting to external sources. Nevertheless, this practice renders them vulnerable to backdoor poisoning attacks. Prior backdoor defense strategies have primarily focused on the identification of backdoored models or poisoned data characteristics, typically operating under the assumption of access to clean data. In this work, we delve into a relatively underexplored challenge: the automatic identification of backdoor data within a poisoned dataset, all under realistic conditions, *i.e.*, without the need for additional clean data or without manually defining a threshold for backdoor detection. We draw an inspiration from the scaled prediction consistency (SPC) technique, which exploits the prediction invariance of poisoned data to an input scaling factor. Based on this, we pose the backdoor data identification problem as a hierarchical data splitting optimization problem, leveraging a novel SPC-based loss function as the primary optimization objective. Our innovation unfolds in several key aspects. First, we revisit the vanilla SPC method, unveiling its limitations in addressing the proposed backdoor identification problem. Subsequently, we develop a bi-level optimization-based approach to precisely identify backdoor data by minimizing the advanced SPC loss. Finally, we demonstrate the efficacy of our proposal against a spectrum of backdoor attacks, encompassing basic label-corrupted attacks as well as more sophisticated clean-label attacks, evaluated across various benchmark datasets. Experiment results show that our approach often surpasses the performance of current baselines in identifying backdoor data points, resulting in about 4\\%-36\\% improvement in average AUROC. Codes are available at https://github.com/OPTML-Group/BackdoorMSPC.", "keywords": "Backdoor Detection;Backdoor Attack;Data Poisoning;AI Security;Deep learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/d2c5665558b4c332568545796e2d21302eb03e53.zip", "author": "Soumyadeep Pal;Yuguang Yao;Ren Wang;Bingquan Shen;Sijia Liu", "authorids": "~Soumyadeep_Pal1;~Yuguang_Yao1;~Ren_Wang1;~Bingquan_Shen1;~Sijia_Liu1", "gender": ";M;M;;M", "homepage": ";https://www.cse.msu.edu/~yaoyugua/;https://wangren09.github.io/;;https://lsjxjtu.github.io/", "dblp": "236/2130.html;238/9467;29/50-8;151/9308;128/6972-1", "google_scholar": "https://scholar.google.ca/citations?user=c2VU-_4AAAAJ;-chIdAkAAAAJ;TY_3K48AAAAJ;https://scholar.google.com.sg/citations?user=zrJdj6YAAAAJ;C7dO_UgAAAAJ", "orcid": ";;0000-0002-6366-8898;;", "linkedin": ";tonyyaomsu/;ren-wang-715525106/;;", "or_profile": "~Soumyadeep_Pal1;~Yuguang_Yao1;~Ren_Wang1;~Bingquan_Shen1;~Sijia_Liu1", "aff": "Michigan State University;Michigan State University;Illinois Institute of Technology;DSO National Labs;Michigan State University", "aff_domain": "msu.edu;msu.edu;iit.edu;dso.org.sg;msu.edu", "position": "PhD student;PhD student;Assistant Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\npal2024backdoor,\ntitle={Backdoor Secrets Unveiled: Identifying Backdoor Data with Optimized Scaled Prediction Consistency},\nauthor={Soumyadeep Pal and Yuguang Yao and Ren Wang and Bingquan Shen and Sijia Liu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1OfAO2mes1}\n}", "github": "", "project": "", "reviewers": "nd7k;wHYC;V1zd;YD42", "pdf_size": 3995781, "rating": "6;6;6;6", "confidence": "4;4;4;5", "soundness": "3;2;3;2", "contribution": "2;2;2;2", "presentation": "2;2;3;2", "wc_summary": "89;203;35;65", "wc_strengths": "45;33;58;23", "wc_weaknesses": "186;366;69;196", "wc_questions": "32;3;6;5", "wc_review": "352;605;168;289", "wc_reply_reviewers": "104;116;32;422", "wc_reply_authors": "742;798;187;1069", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 63.56886030125127 ], "wc_strengths_avg": [ 39.75, 13.102957681378658 ], "wc_weaknesses_avg": [ 204.25, 105.89706086572942 ], "wc_questions_avg": [ 11.5, 11.884864324004713 ], "wc_review_avg": [ 353.5, 159.5501488560885 ], "wc_reply_reviewers_avg": [ 168.5, 149.842417225564 ], "wc_reply_authors_avg": [ 699.0, 320.4270587824942 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5843399313782917939&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=1OfAO2mes1", "pdf": "https://openreview.net/pdf?id=1OfAO2mes1", "email": "msu.edu;msu.edu;iit.edu;dso.org.sg;msu.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Michigan State University;Illinois Institute of Technology;DSO National Laboratories", "aff_unique_dep": ";;", "aff_unique_url": "https://www.msu.edu;https://www.iit.edu;https://www.dso.org.sg", "aff_unique_abbr": "MSU;IIT;DSO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Singapore" }, { "id": "1P1nxem1jU", "title": "Through the Dual-Prism: A Spectral Perspective on Graph Data Augmentation for Graph Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have become the preferred tool to process graph data, with their efficacy being boosted through graph data augmentation techniques. Despite the evolution of augmentation methods, issues like graph property distortions and restricted structural changes persist. This leads to the question: Is it possible to develop more property-conserving and structure-sensitive augmentation methods? Through a spectral lens, we investigate the interplay between graph properties, their augmentation, and their spectral behavior, and found that keeping the low-frequency eigenvalues unchanged can preserve the critical properties at a large scale when generating augmented graphs. These observations inform our introduction of the Dual-Prism (DP) augmentation method, comprising DP-Noise and DP-Mask, which adeptly retains essential graph properties while diversifying augmented graphs. Extensive experiments validate the efficiency of our approach, providing a new and promising direction for graph data augmentation.", "keywords": "Graph Data Augmentation;Graph Neural Network;Graph Classification", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/dca49022b18da49aeab43748c4e1ab9b55e516d8.zip", "author": "Yutong Xia;Runpeng Yu;Yuxuan Liang;Xavier Bresson;Xinchao Wang;Roger Zimmermann", "authorids": "~Yutong_Xia1;~Runpeng_Yu2;~Yuxuan_Liang1;~Xavier_Bresson6;~Xinchao_Wang1;~Roger_Zimmermann1", "gender": "F;M;M;M;;M", "homepage": "https://yutong-xia.github.io/;https://yuxuanliang.com;https://www.comp.nus.edu.sg/cs/people/xaviercs/;https://www.comp.nus.edu.sg/cs/bio/rogerz/;https://yu-rp.github.io/;https://sites.google.com/site/sitexinchaowang/", "dblp": "307/5917;183/0977;95/378;79/1490;290/7625-1;", "google_scholar": "V7b4y2oAAAAJ;n9cODgcAAAAJ;https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com.tw/citations?user=IDREwXEAAAAJ;;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": "0000-0001-9026-0049;0000-0003-2817-7337;;0000-0002-7410-2590;;", "linkedin": "yutong-xia/;yoshall/;;roger-zimmermann-76b56b6/;;", "or_profile": "~Yutong_Xia1;~Yuxuan_Liang1;~Xavier_Bresson6;~Roger_Zimmermann1;~R_Yu1;~Xinchao_WANG3", "aff": "National University of Singapore;The Hong Kong University of Science and Technology (Guangzhou);National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;hkust-gz.edu.cn;nus.edu.sg;nus.edu.sg;u.nus.edu;nus.edu", "position": "PhD student;Assistant Professor;Associate Professor;Full Professor;PhD student;Assistant Professor", "bibtex": "@misc{\nxia2024through,\ntitle={Through the Dual-Prism: A Spectral Perspective on Graph Data Augmentation for Graph Classification},\nauthor={Yutong Xia and Runpeng Yu and Yuxuan Liang and Xavier Bresson and Xinchao Wang and Roger Zimmermann},\nyear={2024},\nurl={https://openreview.net/forum?id=1P1nxem1jU}\n}", "github": "", "project": "", "reviewers": "oQQa;7Xfr;48h8;oF9L", "site": "https://openreview.net/forum?id=1P1nxem1jU", "pdf_size": 726271, "rating": "5;5;6;8", "confidence": "4;3;4;4", "soundness": "2;3;4;3", "contribution": "2;2;3;4", "presentation": "3;4;3;3", "wc_summary": "64;38;62;80", "wc_strengths": "26;42;48;151", "wc_weaknesses": "213;66;156;99", "wc_questions": "2;294;40;74", "wc_review": "305;440;306;404", "wc_reply_reviewers": "11;64;18;0", "wc_reply_authors": "1343;1121;747;577", "reply_reviewers": "1;1;1;0", "reply_authors": "4;4;3;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.0, 15.0 ], "wc_strengths_avg": [ 66.75, 49.30200300190653 ], "wc_weaknesses_avg": [ 133.5, 56.06469477309227 ], "wc_questions_avg": [ 102.5, 113.4581420612906 ], "wc_review_avg": [ 363.75, 59.62539308046531 ], "wc_reply_reviewers_avg": [ 23.25, 24.38621536852326 ], "wc_reply_authors_avg": [ 947.0, 301.65874759403215 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10391579270727837642&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "National University of Singapore;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.ust.hk", "aff_unique_abbr": "NUS;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Guangzhou", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "Singapore;China" }, { "id": "1P92J25hdf", "title": "Going Deeper with General and Specific Inductive Bias for Real-Time Stereo Matching", "track": "main", "status": "Reject", "tldr": "", "abstract": "Inductive Bias (IB) has sparked a revolutionary transformation by incorporating the advantages of CNNs and Transformers, including scale invariance and integration of locality and long-range dependencies, which is called general IB for its wide applicability. However, its efficacy is currently not enjoyed by stereo matching, one of the geometric vision tasks, because of the ignorance of volume-level scale invariance and the limitation of high real-time requirement. In contrast, a specific IB is adopted by constructing volume structure in stereo matching task, which helps to finally generate a confidence volume to predict disparity map (output), but fewer studies go into the specific volume structure. Based on the above issues, this paper develops a novel model named UStereo to introduce the general IB to stereo matching. Technically, we adopt inter-layer fusion to break down volume-level scale invariance to a recurrence strategy in initialization for information at low resolution and refinement process for the high, which further extends to capture long-range dependencies after shallow stacks of convolutions and normalization without time-consuming Transformers. Additionally, to reveal the role that the volume structure constructed by specific IB plays during inference, we propose the first-time in-depth study of volume at low resolution through varying degrees of restraint as well as 3 original statistic indicators to reflect the characteristics of representation within volumes. Experiments demonstrate UStereo has competitive performance with both fast speed and robust generalization, and ablation studies show the effectiveness of introducing general IB. Moreover, our analysis of the volumes at low resolution suggests they can be viewed as confidence volumes and a concentrated distribution of the disparity within volumes leads to enhanced performance, which could extend the role of the specific IB.", "keywords": "Stereo Matching;Inductive Bias;Deep Supervision", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Renjie Ding;Yaonan Wang;Min Liu;Jiazheng Wang;zhe zhang", "authorids": "~Renjie_Ding1;~Yaonan_Wang3;~Min_Liu3;~Jiazheng_Wang1;~zhe_zhang21", "gender": "M;M;M;M;M", "homepage": "https://github.com/EliottDJay;https://eeit.hnu.edu.cn/info/1277/4490.htm;http://eeit.hnu.edu.cn/info/1291/5218.htm;;https://www.researchgate.net/login?_sg=elMhO2GcXYO82O_IF-XlefOkM8rHoMFdlwKPDT3wfsTmGNAHiXSr1Mec4-Uw1GVQpzIkAURi-i3u4eEkHwErug&_tp=eyJjb250ZXh0Ijp7ImZpcnN0UGFnZSI6ImluZGV4IiwicGFnZSI6ImluZGV4In19", "dblp": ";90/548-1;99/76-8;;", "google_scholar": ";;njVL32IAAAAJ;;", "orcid": ";0009-0004-5365-6254;0000-0001-6406-4896;0000-0003-2534-4232;", "linkedin": ";;;;", "or_profile": "~Renjie_Ding1;~Yaonan_Wang3;~Min_Liu3;~Jiazheng_Wang1;~zhe_zhang21", "aff": "Hunan University;Hunan University;Hunan University;Hunan University;Hunan University", "aff_domain": "hnu.edu.cn;hnu.edu.cn;hnu.edu.cn;hnu.edu.cn;hnu.edu.cn", "position": "PhD student;Full Professor;Full Professor;PhD student;Researcher", "bibtex": "@misc{\nding2024going,\ntitle={Going Deeper with General and Specific Inductive Bias for Real-Time Stereo Matching},\nauthor={Renjie Ding and Yaonan Wang and Min Liu and Jiazheng Wang and zhe zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=1P92J25hdf}\n}", "github": "", "project": "", "reviewers": "CP2t;ocBs;DNk5;J9Ts;1LXD", "site": "https://openreview.net/forum?id=1P92J25hdf", "pdf_size": 2362220, "rating": "1;3;3;3;3", "confidence": "4;4;4;3;5", "soundness": "2;3;2;2;3", "contribution": "2;2;2;2;3", "presentation": "1;1;1;2;3", "wc_summary": "66;87;104;9;106", "wc_strengths": "101;34;71;9;70", "wc_weaknesses": "294;92;317;82;50", "wc_questions": "69;49;94;46;11", "wc_review": "530;262;586;146;237", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 2.6, 0.8000000000000002 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 1.6, 0.8 ], "wc_summary_avg": [ 74.4, 35.724501396100685 ], "wc_strengths_avg": [ 57.0, 32.04372013359248 ], "wc_weaknesses_avg": [ 167.0, 114.16479317197575 ], "wc_questions_avg": [ 53.8, 27.43282705081633 ], "wc_review_avg": [ 352.2, 173.32097391833454 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pDJzbkPlzEUJ:scholar.google.com/&scioq=Going+Deeper+with+General+and+Specific+Inductive+Bias+for+Real-Time+Stereo+Matching&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hunan University", "aff_unique_dep": "", "aff_unique_url": "http://www.hunu.edu.cn/", "aff_unique_abbr": "HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1PPjf4wife", "title": "Leveraging Large Language Models for Optimised Coordination in Textual Multi-Agent Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Cooperative multi-agent reinforcement learning (MARL) presents unique challenges, amongst which fostering general cooperative behaviour across various tasks is critical. Recently, large language models (LLMs) have excelled at dealing with challenges in the general RL paradigm, showcasing remarkable sample efficiency and adaptability across tasks through domain specific fine-tuning, or functional alignment. However, neither LLMs nor these fine-tuning approaches are designed with coordination-centric solutions in mind, and the challenge of how to achieve greater coordination, and hence performance, with LLMs in MARL has not yet been tackled. To address this, we introduce the 'Functionally-Aligned Multi-Agents' (FAMA) framework. FAMA harnesses LLMs' inherent knowledge for cooperative decision-making via two primary mechanisms. Firstly, it aligns the LLM with the necessary functional knowledge through a centralised on-policy MARL update rule. Secondly, it recognises the pivotal role of communication in coordination and exploits the linguistic strengths of LLMs for intuitive, natural language inter-agent message-passing. Evaluations of FAMA in two multi-agent textual environments, namely BabyAI-Text and an autonomous driving junction environment, over four coordination tasks show it consistently outperforms independent learning LLMs and traditional symbolic RL methods.", "keywords": "llm;marl", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Oliver Slumbers;David Henry Mguni;Kun Shao;Jun Wang", "authorids": "~Oliver_Slumbers1;~David_Henry_Mguni1;~Kun_Shao1;~Jun_Wang2", "gender": ";M;;M", "homepage": ";;;http://www0.cs.ucl.ac.uk/staff/jun.wang/", "dblp": "285/5044;217/2369;;w/JunWang12", "google_scholar": "obYGSVIAAAAJ;K-_yzBsAAAAJ;;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Oliver_Slumbers1;~David_Henry_Mguni1;~Kun_Shao1;~Jun_Wang2", "aff": "University College London;Queen Mary University, London;;University College London", "aff_domain": "ucl.ac.uk;qmul.ac.uk;;ucl.ac.uk", "position": "PhD student;Lecturer;;Professor", "bibtex": "@misc{\nslumbers2024leveraging,\ntitle={Leveraging Large Language Models for Optimised Coordination in Textual Multi-Agent Reinforcement Learning},\nauthor={Oliver Slumbers and David Henry Mguni and Kun Shao and Jun Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=1PPjf4wife}\n}", "github": "", "project": "", "reviewers": "dP6r;q4e1;Mrhw;cs97", "site": "https://openreview.net/forum?id=1PPjf4wife", "pdf_size": 1953557, "rating": "3;3;5;6", "confidence": "4;3;3;4", "soundness": "2;2;4;3", "contribution": "2;2;2;3", "presentation": "2;2;4;2", "wc_summary": "70;54;35;80", "wc_strengths": "64;71;25;47", "wc_weaknesses": "110;267;24;87", "wc_questions": "36;8;172;4", "wc_review": "280;400;256;218", "wc_reply_reviewers": "26;22;0;12", "wc_reply_authors": "454;929;623;368", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 59.75, 17.03489066592445 ], "wc_strengths_avg": [ 51.75, 17.73943347460679 ], "wc_weaknesses_avg": [ 122.0, 89.43992397134515 ], "wc_questions_avg": [ 55.0, 68.66585760041157 ], "wc_review_avg": [ 288.5, 68.06430782723056 ], "wc_reply_reviewers_avg": [ 15.0, 10.04987562112089 ], "wc_reply_authors_avg": [ 593.5, 214.32510352266252 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16409135025345974855&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University College London;Queen Mary University of London", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.qmul.ac.uk", "aff_unique_abbr": "UCL;QMUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Bespoke Solvers for Generative Flow Models", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19590", "id": "1PXEY7ofFX", "author_site": "Neta Shaul, Juan Perez, Ricky T. Q. Chen, Ali Thabet, Albert Pumarola, Yaron Lipman", "tldr": "", "abstract": "Diffusion or flow-based models are powerful generative paradigms that are notoriously hard to sample as samples are defined as solutions to high-dimensional Ordinary or Stochastic Differential Equations (ODEs/SDEs) which require a large Number of Function Evaluations (NFE) to approximate well. Existing methods to alleviate the costly sampling process include model distillation and designing dedicated ODE solvers. However, distillation is costly to train and sometimes can deteriorate quality, while dedicated solvers still require relatively large NFE to produce high quality samples. In this paper we introduce ``Bespoke solvers'', a novel framework for constructing custom ODE solvers tailored to the ODE of a given pre-trained flow model. Our approach optimizes an order consistent and parameter-efficient solver (e.g., with 80 learnable parameters), is trained for roughly 1\\% of the GPU time required for training the pre-trained model, and significantly improves approximation and generation quality compared to dedicated solvers. For example, a Bespoke solver for a CIFAR10 model produces samples with Fr\u00e9chet Inception Distance (FID) of 2.73 with 10 NFE, and gets to 1\\% of the Ground Truth (GT) FID (2.59) for this model with only 20 NFE. On the more challenging ImageNet-64$\\times$64, Bespoke samples at 2.2 FID with 10 NFE, and gets within 2\\% of GT FID (1.71) with 20 NFE.", "keywords": "generative models;flow matching;diffusion models;normalizing flows;ode solver;fast sampling;distillation", "primary_area": "generative models", "supplementary_material": "", "author": "Neta Shaul;Juan Perez;Ricky T. Q. Chen;Ali Thabet;Albert Pumarola;Yaron Lipman", "authorids": "~Neta_Shaul1;~Juan_Perez1;~Ricky_T._Q._Chen1;~Ali_Thabet1;~Albert_Pumarola2;~Yaron_Lipman1", "gender": "M;;;M;;", "homepage": ";https://dblp.org/pid/196/0064.htmll;;https://www.alithabet.com/;;", "dblp": ";;;161/1812;;", "google_scholar": ";;;7T0CPEkAAAAJ;;", "orcid": ";;;;;", "linkedin": "neta-shaul-3364aa235/;;;akthabet/;;", "or_profile": "~Neta_Shaul1;~Juan_Perez1;~Ricky_T._Q._Chen1;~Ali_Thabet1;~Albert_Pumarola2;~Yaron_Lipman1", "aff": "Weizmann Institute of Science;;;Meta;;", "aff_domain": "weizmann.ac.il;;;fb.com;;", "position": "PhD student;;;Applied Research Manager;;", "bibtex": "@inproceedings{\nshaul2024bespoke,\ntitle={Bespoke Solvers for Generative Flow Models},\nauthor={Neta Shaul and Juan Perez and Ricky T. Q. Chen and Ali Thabet and Albert Pumarola and Yaron Lipman},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1PXEY7ofFX}\n}", "github": "", "project": "", "reviewers": "ovsR;vykk;KhFN;Zfm7;7aEk", "pdf_size": 20965462, "rating": "6;6;8;8;8", "confidence": "3;4;3;4;3", "soundness": "3;3;3;4;3", "contribution": "2;3;4;4;3", "presentation": "2;3;3;3;4", "wc_summary": "44;148;225;206;213", "wc_strengths": "25;92;90;141;62", "wc_weaknesses": "106;279;69;46;58", "wc_questions": "107;89;181;96;113", "wc_review": "282;608;565;489;446", "wc_reply_reviewers": "24;0;0;0;0", "wc_reply_authors": "796;1153;621;511;346", "reply_reviewers": "1;0;0;0;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 7.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "contribution_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 167.2, 67.0683233725132 ], "wc_strengths_avg": [ 82.0, 38.19424040349539 ], "wc_weaknesses_avg": [ 111.6, 86.07578056573172 ], "wc_questions_avg": [ 117.2, 32.97514215283992 ], "wc_review_avg": [ 478.0, 113.16359838746733 ], "wc_reply_reviewers_avg": [ 4.8, 9.600000000000001 ], "wc_reply_authors_avg": [ 685.4, 275.9120149612916 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13011832480330181919&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1PXEY7ofFX", "pdf": "https://openreview.net/pdf?id=1PXEY7ofFX", "email": "weizmann.ac.il;;;fb.com;;", "author_num": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Weizmann Institute of Science;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.weizmann.org.il;https://meta.com", "aff_unique_abbr": "Weizmann;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Israel;United States" }, { "id": "1PaDPHDhwe", "title": "Group Robustness via Adaptive Class-Specific Scaling", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Group distributionally robust optimization, which aims to improve robust accuracies such as worst-group or unbiased accuracy, is one of the mainstream algorithms to mitigate spurious correlation and handle dataset bias. Existing approaches have apparently improved robust accuracy, but, in fact, these performance gains mainly come from trade-offs at the expense of average accuracy. To control the trade-off flexibly and efficiently, we first propose a simple class-specific scaling strategy, directly applicable to existing debiasing algorithms without additional training. We also develop an instance-wise adaptive scaling technique to overcome the trade-off and improve the performance even further in terms of both accuracies. Our approach reveals that a na\\\"ive ERM baseline matches or even outperforms the recent debiasing methods by only adopting the class-specific scaling technique. Then, we employ this technique to evaluate the performance of existing algorithms in a comprehensive manner by introducing a novel unified metric that summarizes the trade-off between the two accuracies as a scalar value. By considering the inherent trade-off and providing a performance landscape, our approach delivers meaningful insights into existing robust methods beyond the robust accuracy only. We perform experiments on the datasets in computer vision and natural language processing domains and verify the effectiveness of the proposed frameworks.", "keywords": "group robustness;debiasing", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Seonguk Seo;Bohyung Han", "authorids": "~Seonguk_Seo1;~Bohyung_Han1", "gender": "M;Not Specified", "homepage": "https://seoseong.uk;http://cvlab.snu.ac.kr/~bhhan", "dblp": "227/2319;73/4880.html", "google_scholar": "dhVCLrwAAAAJ;9aaeCToAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Seonguk_Seo1;~Bohyung_Han1", "aff": "Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr", "position": "PhD student;Full Professor", "bibtex": "@misc{\nseo2024group,\ntitle={Group Robustness via Adaptive Class-Specific Scaling},\nauthor={Seonguk Seo and Bohyung Han},\nyear={2024},\nurl={https://openreview.net/forum?id=1PaDPHDhwe}\n}", "github": "", "project": "", "reviewers": "SBij;mojf;cWjo;SN9B", "site": "https://openreview.net/forum?id=1PaDPHDhwe", "pdf_size": 1077918, "rating": "3;5;5;6", "confidence": "4;4;4;3", "soundness": "1;2;3;3", "contribution": "1;2;2;3", "presentation": "1;2;2;3", "wc_summary": "90;48;46;61", "wc_strengths": "18;125;23;47", "wc_weaknesses": "266;446;71;110", "wc_questions": "2;522;43;3", "wc_review": "376;1141;183;221", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.25, 17.5695048308141 ], "wc_strengths_avg": [ 53.25, 42.85075845303091 ], "wc_weaknesses_avg": [ 223.25, 147.86036487172618 ], "wc_questions_avg": [ 142.5, 219.72767235830813 ], "wc_review_avg": [ 480.25, 388.27398509300104 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-EJcB71XEIsJ:scholar.google.com/&scioq=Group+Robustness+via+Adaptive+Class-Specific+Scaling&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "MAMBA: an Effective World Model Approach for Meta-Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19589", "id": "1RE0H6mU7M", "author_site": "Zohar Rimon, Tom Jurgenson, Orr Krupnik, Gilad Adler, Aviv Tamar", "tldr": "", "abstract": "Meta-reinforcement learning (meta-RL) is a promising framework for tackling challenging domains requiring efficient exploration. Existing meta-RL algorithms are characterized by low sample efficiency, and mostly focus on low-dimensional task distributions. In parallel, model-based RL methods have been successful in solving partially observable MDPs, of which meta-RL is a special case.\nIn this work, we leverage this success and propose a new model-based approach to meta-RL, based on elements from existing state-of-the-art model-based and meta-RL methods. We demonstrate the effectiveness of our approach on common meta-RL benchmark domains, attaining greater return with better sample efficiency (up to $15\\times$) while requiring very little hyperparameter tuning. In addition, we validate our approach on a slate of more challenging, higher-dimensional domains, taking a step towards real-world generalizing agents.", "keywords": "Meta Reinforcement Learning;World Models;Model Based Reinforcement Learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/b4b8a274431d39250f38bd36f23f2a37eefaac1d.zip", "author": "Zohar Rimon;Tom Jurgenson;Orr Krupnik;Gilad Adler;Aviv Tamar", "authorids": "~Zohar_Rimon1;~Tom_Jurgenson1;~Orr_Krupnik1;~Gilad_Adler1;~Aviv_Tamar2", "gender": "M;;M;M;M", "homepage": "https://zoharri.github.io/;;https://www.orrkrup.com;;https://avivt.github.io/avivt/", "dblp": "322/9338;https://dblp.uni-trier.de/pers/hd/j/Jurgenson:Tom;234/8503;;49/10622", "google_scholar": "GV8imVUAAAAJ;1YjIvioAAAAJ;E2lnAzIAAAAJ;;https://scholar.google.co.il/citations?user=kppa2vgAAAAJ", "orcid": ";;;;", "linkedin": ";;;gilad-adler/;", "or_profile": "~Zohar_Rimon1;~Tom_Jurgenson1;~Orr_Krupnik1;~Gilad_Adler1;~Aviv_Tamar2", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;;Technion, Technion", "aff_domain": "campus.technion.ac.il;technion.ac.il;campus.technion.ac.il;;technion.ac.il", "position": "PhD student;PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nrimon2024mamba,\ntitle={{MAMBA}: an Effective World Model Approach for Meta-Reinforcement Learning},\nauthor={Zohar Rimon and Tom Jurgenson and Orr Krupnik and Gilad Adler and Aviv Tamar},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1RE0H6mU7M}\n}", "github": "", "project": "", "reviewers": "tjN7;X5No;fKkT;cu12", "pdf_size": 2571119, "rating": "6;6;6;6", "confidence": "4;4;4;3", "soundness": "3;2;3;3", "contribution": "3;2;3;2", "presentation": "3;3;3;3", "wc_summary": "126;131;161;73", "wc_strengths": "69;42;140;40", "wc_weaknesses": "239;557;462;150", "wc_questions": "51;12;4;141", "wc_review": "485;742;767;404", "wc_reply_reviewers": "261;16;192;50", "wc_reply_authors": "800;388;1019;695", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 122.75, 31.68891762114951 ], "wc_strengths_avg": [ 72.75, 40.48070528041724 ], "wc_weaknesses_avg": [ 352.0, 164.08686723805778 ], "wc_questions_avg": [ 52.0, 54.37370688117557 ], "wc_review_avg": [ 599.5, 157.87099163557565 ], "wc_reply_reviewers_avg": [ 129.75, 100.49968905424534 ], "wc_reply_authors_avg": [ 725.5, 227.22730909818037 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10294479951427099315&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=1RE0H6mU7M", "pdf": "https://openreview.net/pdf?id=1RE0H6mU7M", "email": "campus.technion.ac.il;technion.ac.il;campus.technion.ac.il;;technion.ac.il", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Israel" }, { "title": "Network Memory Footprint Compression Through Jointly Learnable Codebooks and Mappings", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19588", "id": "1RrOtCmuKr", "author_site": "Edouard YVINEC, Arnaud Dapogny, Kevin Bailly", "tldr": "", "abstract": "The massive interest in deep neural networks (DNNs) for both computer vision and natural language processing has been sparked by the growth in computational power. However, this led to an increase in the memory footprint, to a point where it can be challenging to simply load a model on commodity devices such as mobile phones. To address this limitation, quantization is a favored solution as it maps high precision tensors to a low precision, memory efficient format. In terms of memory footprint reduction, its most effective variants are based on codebooks. These methods, however, suffer from two limitations. First, they either define a single codebook for each tensor, or use a memory-expensive mapping to multiple codebooks. Second, gradient descent optimization of the mapping favors jumps toward extreme values, hence not defining a proximal search. In this work, we propose to address these two limitations. First, we initially group similarly distributed neurons and leverage the re-ordered structure to either apply different scale factors to the different groups, or map weights that fall in these groups to several codebooks, without any mapping overhead. Second, stemming from this initialization, we propose a joint learning of the codebook and weight mappings that bears similarities with recent gradient-based post-training quantization techniques. Third, drawing estimation from straight-through estimation techniques, we introduce a novel gradient update definition to enable a proximal search of the codebooks and their mappings. The proposed jointly learnable codebooks and mappings (JLCM) method allows a very efficient approximation of any DNN: as such, a Llama 7B can be compressed down to 2Go and loaded on 5-year-old smartphones.", "keywords": "quantization;codebooks;hashlists;compression;efficient inference;deep learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/2bd02732ded46eff81c3ed814a014e698b9e97f2.pdf", "author": "Edouard YVINEC;Arnaud Dapogny;Kevin Bailly", "authorids": "~Edouard_YVINEC1;~Arnaud_Dapogny1;~Kevin_Bailly3", "gender": "M;M;M", "homepage": "https://edouardyvinec.netlify.app/;;https://sites.google.com/view/bailly/", "dblp": "263/2218.html;165/8156;41/3712", "google_scholar": "https://scholar.google.fr/citations?user=q-J6Tz0AAAAJ;;oy59G9AAAAAJ", "orcid": "0000-0002-4318-612X;;", "linkedin": "edouard-yvinec-aa8333158/;;", "or_profile": "~Edouard_YVINEC1;~Arnaud_Dapogny1;~Kevin_Bailly3", "aff": ";Apple;Sorbonne University", "aff_domain": ";apple.com;sorbonne-universite.fr", "position": ";Researcher;Associate Professor", "bibtex": "@inproceedings{\nyvinec2024network,\ntitle={Network Memory Footprint Compression Through Jointly Learnable Codebooks and Mappings},\nauthor={Edouard YVINEC and Arnaud Dapogny and Kevin Bailly},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1RrOtCmuKr}\n}", "github": "", "project": "", "reviewers": "kEDS;fJGw;P3Eg", "pdf_size": 479684, "rating": "5;6;8", "confidence": "4;2;2", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "2;2;3", "wc_summary": "71;87;109", "wc_strengths": "24;44;62", "wc_weaknesses": "142;73;45", "wc_questions": "5;43;96", "wc_review": "242;247;312", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "951;693;309", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 89.0, 15.57776192739723 ], "wc_strengths_avg": [ 43.333333333333336, 15.520595635763755 ], "wc_weaknesses_avg": [ 86.66666666666667, 40.762182909598394 ], "wc_questions_avg": [ 48.0, 37.31844941401862 ], "wc_review_avg": [ 267.0, 31.88521078284832 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 651.0, 263.77262936097065 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7559289460184546, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17361935470188294799&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1RrOtCmuKr", "pdf": "https://openreview.net/pdf?id=1RrOtCmuKr", "email": ";apple.com;sorbonne-universite.fr", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Apple;Sorbonne University", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.sorbonne.universite.fr", "aff_unique_abbr": "Apple;Sorbonne", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;France" }, { "id": "1SEXzogsET", "title": "LeetPrompt: Leveraging Collective Human Intelligence to Study LLMs", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "With the advent of pre-trained large language models (LLMs), natural language prompts are now becoming a de-facto method for interacting with language models. However, prompting as a technique is an esoteric art, involving cumbersome manual processes by individuals to search different strategies that make language models work for the intended task. We introduce LeetPrompt, a citizen science platform that leverages on collective human creativity with prompting to solve reasoning questions across various domains. Users of \\leetprompt attempt questions by writing prompts that solve all the hidden test cases. To measure the efficacy of LeetPrompt, we conduct a study $10$ questions across $5$ domains (biology, physics, math, programming, and general knowledge) with $20$ human subjects. We gather a total of $1173$ GPT-4 prompts with the following observations: First, problems deemed unsolvable by question setters were successfully solved. Second, diverse prompting strategies were used by the different participants. Third, the more difficult problems also had a high number of prompt submissions enabling better debugging of the LLM behaviour for that problem. These observations support various downstream implications in robust approaches to prompt interpretability and model evaluation, high quality data collection, human-AI alignment and real-world usage of LLMs.", "keywords": "evaluation of foundation models;collective intelligence", "primary_area": "infrastructure, software libraries, hardware, etc.", "supplementary_material": "/attachment/6e764791827649eea989d43d637989d37478306c.pdf", "author": "Sebastin Santy;Ayana Bharadwaj;Sahith Dambekodi;Alex Albert;Cathy Lang Yuan;Ranjay Krishna", "authorids": "~Sebastin_Santy2;~Ayana_Bharadwaj1;~Sahith_Dambekodi1;~Alex_Albert1;~Cathy_Lang_Yuan1;~Ranjay_Krishna1", "gender": "M;F;M;M;F;M", "homepage": "http://sebastinsanty.com/;;;https://www.alexalbert.me;;http://ranjaykrishna.com", "dblp": "212/5368;;;;;167/3785", "google_scholar": "HsyMg08AAAAJ;;;;;IcqahyAAAAAJ", "orcid": "0000-0003-1162-0865;;;;;0000-0001-8784-2531", "linkedin": "sebastinsanty/;ayana-bharadwaj-1921a71b0;sahith-dambekodi-31270b104/;alex-Albert;cathy-lang-yuan/;ranjay-krishna-1a344444/", "or_profile": "~Sebastin_Santy2;~Ayana_Bharadwaj1;~Sahith_Dambekodi1;~Alex_Albert1;~Cathy_Lang_Yuan1;~Ranjay_Krishna1", "aff": "University of Washington;Department of Computer Science;;;;University of Washington", "aff_domain": "cs.washington.edu;cs.washington.edu;;;;cs.washington.edu", "position": "PhD student;Undergrad student;;;;Assistant Professor", "bibtex": "@misc{\nsanty2024leetprompt,\ntitle={LeetPrompt: Leveraging Collective Human Intelligence to Study {LLM}s},\nauthor={Sebastin Santy and Ayana Bharadwaj and Sahith Dambekodi and Alex Albert and Cathy Lang Yuan and Ranjay Krishna},\nyear={2024},\nurl={https://openreview.net/forum?id=1SEXzogsET}\n}", "github": "", "project": "", "reviewers": "o2mR;68mT;RmkX;EkgE", "site": "https://openreview.net/forum?id=1SEXzogsET", "pdf_size": 2088332, "rating": "3;5;5;5", "confidence": "3;3;4;3", "soundness": "2;2;3;3", "contribution": "2;1;2;2", "presentation": "3;3;3;3", "wc_summary": "16;70;85;82", "wc_strengths": "58;32;53;50", "wc_weaknesses": "194;318;233;181", "wc_questions": "54;3;122;90", "wc_review": "322;423;493;403", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.25, 27.851166941440713 ], "wc_strengths_avg": [ 48.25, 9.807522622966516 ], "wc_weaknesses_avg": [ 231.5, 53.48130514488217 ], "wc_questions_avg": [ 67.25, 44.21184795956849 ], "wc_review_avg": [ 410.25, 60.931826658980114 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:55gPqy9jnR0J:scholar.google.com/&scioq=LeetPrompt:+Leveraging+Collective+Human+Intelligence+to+Study+LLMs&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Washington;Unknown Institution", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.washington.edu;", "aff_unique_abbr": "UW;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "Spike-driven Transformer V2: Meta Spiking Neural Network Architecture Inspiring the Design of Next-generation Neuromorphic Chips", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19587", "id": "1SIBN5Xyw7", "author_site": "Man Yao, Jiakui Hu, Tianxiang Hu, Yifan Xu, Zhaokun Zhou, Yonghong Tian, Bo XU, Guoqi Li", "tldr": "", "abstract": "Neuromorphic computing, which exploits Spiking Neural Networks (SNNs) on neuromorphic chips, is a promising energy-efficient alternative to traditional AI. CNN-based SNNs are the current mainstream of neuromorphic computing. By contrast, no neuromorphic chips are designed especially for Transformer-based SNNs, which have just emerged, and their performance is only on par with CNN-based SNNs, offering no distinct advantage. In this work, we propose a general Transformer-based SNN architecture, termed as ``Meta-SpikeFormer\", whose goals are: (1) *Lower-power*, supports the spike-driven paradigm that there is only sparse addition in the network; (2) *Versatility*, handles various vision tasks; (3) *High-performance*, shows overwhelming performance advantages over CNN-based SNNs; (4) *Meta-architecture*, provides inspiration for future next-generation Transformer-based neuromorphic chip designs. Specifically, we extend the Spike-driven Transformer in \\citet{yao2023spike} into a meta architecture, and explore the impact of structure, spike-driven self-attention, and skip connection on its performance. On ImageNet-1K, Meta-SpikeFormer achieves 80.0\\% top-1 accuracy (55M), surpassing the current state-of-the-art (SOTA) SNN baselines (66M) by 3.7\\%. This is the first direct training SNN backbone that can simultaneously supports classification, detection, and segmentation, obtaining SOTA results in SNNs. Finally, we discuss the inspiration of the meta SNN architecture for neuromorphic chip design.", "keywords": "Spiking neural networks;Spiking transformer;Transformer-based SNNs;Neuromorphic chips;Spike-driven self-attention", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/bcb68506e3c8b968b7c11cb052a9bc1399a06b86.pdf", "author": "Man Yao;JiaKui Hu;Tianxiang Hu;Yifan Xu;Zhaokun Zhou;Yonghong Tian;Bo XU;Guoqi Li", "authorids": "~Man_Yao1;~JiaKui_Hu1;~Tianxiang_Hu3;~Yifan_Xu9;~Zhaokun_Zhou1;~Yonghong_Tian1;~Bo_XU10;~Guoqi_Li1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://github.com/jkhu29;https://github.com/TiminHu;https://yifanxu74.github.io;;http://www.pkuml.org;;https://scholar.google.com/citations?hl=en&user=qCfE--MAAAAJ", "dblp": "21/5932;327/3225;;;;86/5857;;", "google_scholar": "eE4vvp0AAAAJ;VagFt-sAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0003-4454-6630;0000-0002-2978-5935;;", "linkedin": ";;;;;;%E6%B3%A2-%E5%BE%90-74210b115/?midToken=AQH1EMB1ZoboJA&midSig=2Q5MzMXmNEH9M1&trk=eml-email_pymk_02-header-22-profile&trkEmail=eml-email_pymk_02-header-22-profile-null-7ydrhe~kpggjoav~k9-null-neptune/profile~vanity.view;", "or_profile": "~Man_Yao1;~JiaKui_Hu1;~Tianxiang_Hu3;~Yifan_Xu9;~Zhaokun_Zhou1;~Yonghong_Tian1;~Bo_XU10;~Guoqi_Li1", "aff": "Institute of automation, Chinese academy of sciences;Peking University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Peking University;Peking University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;pku.edu.cn;ia.ac.cn;ia.ac.cn;pku.edu.cn;pku.edu.cn;ia.ac.cn;ia.ac.cn", "position": "Assistant Professor;PhD student;PhD student;PhD student;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyao2024spikedriven,\ntitle={Spike-driven Transformer V2: Meta Spiking Neural Network Architecture Inspiring the Design of Next-generation Neuromorphic Chips},\nauthor={Man Yao and JiaKui Hu and Tianxiang Hu and Yifan Xu and Zhaokun Zhou and Yonghong Tian and Bo XU and Guoqi Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1SIBN5Xyw7}\n}", "github": "", "project": "", "reviewers": "1cdx;JmyY;Aepg", "pdf_size": 576149, "rating": "5;6;6", "confidence": "4;4;3", "soundness": "2;4;3", "contribution": "2;3;2", "presentation": "3;4;3", "wc_summary": "60;33;90", "wc_strengths": "23;58;96", "wc_weaknesses": "27;48;23", "wc_questions": "68;33;149", "wc_review": "178;172;358", "wc_reply_reviewers": "0;11;6", "wc_reply_authors": "2080;618;1280", "reply_reviewers": "0;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 61.0, 23.280893453645632 ], "wc_strengths_avg": [ 59.0, 29.81051268708183 ], "wc_weaknesses_avg": [ 32.666666666666664, 10.96458946893235 ], "wc_questions_avg": [ 83.33333333333333, 48.58211833815218 ], "wc_review_avg": [ 236.0, 86.30179604156567 ], "wc_reply_reviewers_avg": [ 5.666666666666667, 4.4969125210773475 ], "wc_reply_authors_avg": [ 1326.0, 597.7446500527351 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17884632612771453622&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1SIBN5Xyw7", "pdf": "https://openreview.net/pdf?id=1SIBN5Xyw7", "email": "ia.ac.cn;pku.edu.cn;ia.ac.cn;ia.ac.cn;pku.edu.cn;pku.edu.cn;ia.ac.cn;ia.ac.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;1;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Peking University", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;http://www.pku.edu.cn", "aff_unique_abbr": "CAS;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1SO93f7sVf", "title": "Training Neural Networks from Scratch with Parallel Low-Rank Adapters", "track": "main", "status": "Reject", "tldr": "", "abstract": "The scalability of deep learning applications is fundamentally constrained by compute, memory, and communication. While low-rank adaptation (LoRA) has reduced these costs for model fine-tuning, its application to model pre-training remain largely unexplored. This paper examines the extension of LoRA to model pre-training, identifying the constraints and limitations inherent to standard LoRA in the context of pre-training. We introduce LoRA-the-Explorer (LTE), a novel bi-level optimization algorithm, to facilitate parallel training of multiple low-rank heads across compute nodes, minimizing the necessity for frequent synchronization. Our methodology involves rigorous experimentation on vision transformers using ImageNet100, demonstrating that LTE is competitive with standard distributed training methodologies. Initial scalability tests on ImageNet1k show that LTE can match standard training performance by leveraging more training iterations.", "keywords": "Low-rank adapters", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Minyoung Huh;Brian Cheung;Jeremy Bernstein;Phillip Isola;Pulkit Agrawal", "authorids": "~Minyoung_Huh1;~Brian_Cheung1;~Jeremy_Bernstein1;~Phillip_Isola1;~Pulkit_Agrawal1", "gender": "M;M;M;M;M", "homepage": "https://people.csail.mit.edu/minhuh/;https://briancheung.github.io/;https://jeremybernste.in;http://web.mit.edu/phillipi/;https://people.eecs.berkeley.edu/~pulkitag/", "dblp": "220/3360;;215/3638;36/9988;149/2672", "google_scholar": "2k18_1IAAAAJ;7N-ethYAAAAJ;;ROILf3EAAAAJ;UpZmJI0AAAAJ", "orcid": ";;;0000-0002-1411-6704;", "linkedin": ";;;phillip-isola-a9955b20/;", "or_profile": "~Minyoung_Huh1;~Brian_Cheung1;~Jeremy_Bernstein1;~Phillip_Isola1;~Pulkit_Agrawal1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;Research Fellow;Postdoc;Associate Professor;Assistant Professor", "bibtex": "@misc{\nhuh2024training,\ntitle={Training Neural Networks from Scratch with Parallel Low-Rank Adapters},\nauthor={Minyoung Huh and Brian Cheung and Jeremy Bernstein and Phillip Isola and Pulkit Agrawal},\nyear={2024},\nurl={https://openreview.net/forum?id=1SO93f7sVf}\n}", "github": "", "project": "", "reviewers": "q4qF;Sixn;i1ro;aA3h", "site": "https://openreview.net/forum?id=1SO93f7sVf", "pdf_size": 2726951, "rating": "3;3;5;6", "confidence": "4;4;4;3", "soundness": "2;1;2;2", "contribution": "2;2;2;3", "presentation": "3;1;2;3", "wc_summary": "89;117;107;49", "wc_strengths": "54;103;101;67", "wc_weaknesses": "400;223;277;50", "wc_questions": "246;170;6;66", "wc_review": "789;613;491;232", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2215;1068;547;460", "reply_reviewers": "0;0;0;0", "reply_authors": "5;2;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 90.5, 25.975950415721076 ], "wc_strengths_avg": [ 81.25, 21.26470079733077 ], "wc_weaknesses_avg": [ 237.5, 125.8302427876542 ], "wc_questions_avg": [ 122.0, 92.56349172324907 ], "wc_review_avg": [ 531.25, 202.66274324601451 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1072.5, 699.3984915625712 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8115461409047773032&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Be Careful What You Smooth For: Label Smoothing Can Be a Privacy Shield but Also a Catalyst for Model Inversion Attacks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19586", "id": "1SbkubNdbW", "author_site": "Lukas Struppek, Dominik Hintersdorf, Kristian Kersting", "tldr": "", "abstract": "Label smoothing \u2013 using softened labels instead of hard ones \u2013 is a widely adopted regularization method for deep learning, showing diverse benefits such as enhanced generalization and calibration. Its implications for preserving model privacy, however, have remained unexplored. To fill this gap, we investigate the impact of label smoothing on model inversion attacks (MIAs), which aim to generate class-representative samples by exploiting the knowledge encoded in a classifier, thereby inferring sensitive information about its training data. Through extensive analyses, we uncover that traditional label smoothing fosters MIAs, thereby increasing a model's privacy leakage. Even more, we reveal that smoothing with negative factors counters this trend, impeding the extraction of class-related information and leading to privacy preservation, beating state-of-the-art defenses. This establishes a practical and powerful novel way for enhancing model resilience against MIAs.", "keywords": "model inversion attacks;privacy;label smoothing;defense", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/f571eba3be2633fb8ea8e19a64374cead84fb9b3.zip", "author": "Lukas Struppek;Dominik Hintersdorf;Kristian Kersting", "authorids": "~Lukas_Struppek1;~Dominik_Hintersdorf1;~Kristian_Kersting1", "gender": "M;M;M", "homepage": "https://lukasstruppek.github.io/;https://d0mih.github.io/;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "306/1485;306/1325;40/3793", "google_scholar": "tU8K5qsAAAAJ;DKITUfsAAAAJ;QY-earAAAAAJ", "orcid": "0000-0003-0626-3672;0000-0003-4976-6894;0000-0002-2873-9152", "linkedin": "lukas-struppek/;;", "or_profile": "~Lukas_Struppek1;~Dominik_Hintersdorf1;~Kristian_Kersting1", "aff": "Technische Universit\u00e4t Darmstadt;CS Department, TU Darmstadt, Technische Universit\u00e4t Darmstadt;TU Darmstadt", "aff_domain": "tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nstruppek2024be,\ntitle={Be Careful What You Smooth For: Label Smoothing Can Be a Privacy Shield but Also a Catalyst for Model Inversion Attacks},\nauthor={Lukas Struppek and Dominik Hintersdorf and Kristian Kersting},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1SbkubNdbW}\n}", "github": "", "project": "", "reviewers": "EAeB;wJ7G;dNRe;22v2;uLpV", "pdf_size": 7637387, "rating": "5;6;6;6;8", "confidence": "4;4;5;2;4", "soundness": "3;3;2;2;2", "contribution": "2;3;2;3;3", "presentation": "3;3;3;4;3", "wc_summary": "37;77;65;28;57", "wc_strengths": "125;63;34;32;155", "wc_weaknesses": "206;87;848;187;178", "wc_questions": "73;17;11;59;1", "wc_review": "441;244;958;306;391", "wc_reply_reviewers": "0;43;506;0;23", "wc_reply_authors": "773;1369;4076;894;811", "reply_reviewers": "0;1;1;0;1", "reply_authors": "3;4;8;2;2", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 52.8, 17.98221343439122 ], "wc_strengths_avg": [ 81.8, 49.68460526158983 ], "wc_weaknesses_avg": [ 301.2, 276.46149822353203 ], "wc_questions_avg": [ 32.2, 28.414081016284864 ], "wc_review_avg": [ 468.0, 254.23532406021002 ], "wc_reply_reviewers_avg": [ 114.4, 196.4572218066824 ], "wc_reply_authors_avg": [ 1584.6, 1263.9333210260738 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 3.8, 2.2271057451320084 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.04166666666666666, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15861784773542527773&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1SbkubNdbW", "pdf": "https://openreview.net/pdf?id=1SbkubNdbW", "email": "tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TUD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "1V1QQYARmd", "title": "Nearest neighbor-based out-of-distribution detection via label smoothing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Detecting out-of-distribution (OOD) examples is critical in many applications. We propose an unsupervised method to detect OOD samples using a $k$-NN density estimate with respect to a classification model's intermediate activations on in-distribution samples. We leverage a recent insight about label smoothing, which we call the {\\it Label Smoothed Embedding Hypothesis}, and show that one of the implications is that the $k$-NN density estimator performs better as an OOD detection method both theoretically and empirically when the model is trained with label smoothing. Finally, we show that our proposal outperforms many OOD baselines and we also provide new finite-sample high-probability statistical results for $k$-NN density estimation's ability to detect OOD examples.", "keywords": "out-of-distribution detection;nearest neighbors;label smoothing;k-NN", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Dara Bahri;Heinrich Jiang;Yi Tay;Donald Metzler", "authorids": "~Dara_Bahri1;~Heinrich_Jiang1;~Yi_Tay1;~Donald_Metzler1", "gender": "M;M;M;M", "homepage": "http://www.dara.run;;http://yitay.net;https://research.google/people/DonaldMetzler/", "dblp": "231/7656;182/2472;;95/2272", "google_scholar": "j5PpTOwAAAAJ;;VBclY_cAAAAJ;bmXpOd8AAAAJ", "orcid": ";;;0000-0003-4276-6269", "linkedin": ";;;donmetzler/", "or_profile": "~Dara_Bahri1;~Heinrich_Jiang1;~Yi_Tay1;~Donald_Metzler1", "aff": "Google Research;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com", "position": "Research Scientist;Research scientist;Research Scientist;Research Scientist", "bibtex": "@misc{\nbahri2024nearest,\ntitle={Nearest neighbor-based out-of-distribution detection via label smoothing},\nauthor={Dara Bahri and Heinrich Jiang and Yi Tay and Donald Metzler},\nyear={2024},\nurl={https://openreview.net/forum?id=1V1QQYARmd}\n}", "github": "", "project": "", "reviewers": "dvW7;rYbM;rsM8;qE8z", "site": "https://openreview.net/forum?id=1V1QQYARmd", "pdf_size": 4927567, "rating": "3;5;6;8", "confidence": "3;3;3;3", "soundness": "2;2;2;4", "contribution": "2;2;2;4", "presentation": "1;2;3;4", "wc_summary": "30;40;64;47", "wc_strengths": "36;22;49;65", "wc_weaknesses": "95;103;128;29", "wc_questions": "72;125;135;42", "wc_review": "233;290;376;183", "wc_reply_reviewers": "87;0;37;0", "wc_reply_authors": "427;495;506;117", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 45.25, 12.397076268217438 ], "wc_strengths_avg": [ 43.0, 15.890248582070704 ], "wc_weaknesses_avg": [ 88.75, 36.5812451947716 ], "wc_questions_avg": [ 93.5, 38.17394399325278 ], "wc_review_avg": [ 270.5, 71.71645557332013 ], "wc_reply_reviewers_avg": [ 31.0, 35.68613176011096 ], "wc_reply_authors_avg": [ 386.25, 158.36883373947035 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QbU3QgMwBQUJ:scholar.google.com/&scioq=Nearest+neighbor-based+out-of-distribution+detection+via+label+smoothing&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "1VcKvdYbUM", "title": "APBench: A Unified Benchmark for Availability Poisoning Attacks and Defenses", "track": "main", "status": "Reject", "tldr": "", "abstract": "The efficacy of availability poisoning, a method of poisoning data by injecting imperceptible perturbations to prevent its use in model training, has been a hot subject of investigation. Previous research suggested that it was difficult to effectively counteract such poisoning attacks. However, the introduction of various defense methods has challenged this notion. Due to the rapid progress in this field, the performance of different novel methods cannot be accurately validated due to variations in experimental setups. To further evaluate the attack and defense capabilities of these poisoning methods, we have developed a benchmark --- APBench for assessing the efficacy of adversarial poisoning. APBench consists of 9 state-of-the-art availability poisoning attacks, 8 defense algorithms, and 4 conventional data augmentation techniques. We also have set up experiments with varying different poisoning ratios, and evaluated the attacks on multiple datasets and their transferability across model architectures. We further conducted a comprehensive evaluation of 2 additional attacks specifically targeting unsupervised models. Our results reveal the glaring inadequacy of existing attacks in safeguarding individual privacy. APBench is open source and available to the deep learning community.", "keywords": "Availability Attacks;Data Protection;Privacy", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/444e8cd8a0da355de7ca04beb34c5fa8766b1ab4.zip", "author": "Tianrui Qin;Xitong Gao;Juanjuan Zhao;Kejiang Ye;Cheng-zhong Xu", "authorids": "~Tianrui_Qin1;~Xitong_Gao1;~Juanjuan_Zhao1;~Kejiang_Ye1;~Cheng-zhong_Xu1", "gender": "M;M;F;;", "homepage": "https://github.com/Tianyue818;https://github.com/admk;https://people.ucas.ac.cn/~zhaojuanjuan?language=cn;;", "dblp": "294/4992;140/2071;;;", "google_scholar": "_bOMXMkAAAAJ;-YIUCL8AAAAJ;;;", "orcid": "0009-0002-8386-2003;0000-0002-2063-2051;;;", "linkedin": ";;;;", "or_profile": "~Tianrui_Qin1;~Xitong_Gao1;~Juanjuan_Zhao1;~Kejiang_Ye1;~Cheng-zhong_Xu1", "aff": "Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;;", "aff_domain": "siat.ac.cn;siat.ac.cn;siat.ac.cn;;", "position": "MS student;Researcher;Associate Professor;;", "bibtex": "@misc{\nqin2024apbench,\ntitle={{APB}ench: A Unified Benchmark for Availability Poisoning Attacks and Defenses},\nauthor={Tianrui Qin and Xitong Gao and Juanjuan Zhao and Kejiang Ye and Cheng-zhong Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=1VcKvdYbUM}\n}", "github": "", "project": "", "reviewers": "gtEa;nmU1;WVAC;8Vbt", "site": "https://openreview.net/forum?id=1VcKvdYbUM", "pdf_size": 6134649, "rating": "3;3;5;6", "confidence": "4;5;4;4", "soundness": "2;1;3;3", "contribution": "2;2;2;3", "presentation": "1;3;2;3", "wc_summary": "22;72;123;104", "wc_strengths": "8;11;30;87", "wc_weaknesses": "16;29;21;190", "wc_questions": "244;554;641;55", "wc_review": "290;666;815;436", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "455;463;658;170", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 80.25, 38.25163395202877 ], "wc_strengths_avg": [ 34.0, 31.741140496207755 ], "wc_weaknesses_avg": [ 64.0, 72.89375830618147 ], "wc_questions_avg": [ 373.5, 235.76948487876882 ], "wc_review_avg": [ 551.75, 202.6479397872083 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 436.5, 174.0179588433332 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4711376913267131339&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Shenzhen Institute of Advanced Technology", "aff_unique_dep": "Shenzhen Institutes of Advanced Technology;", "aff_unique_url": "http://www.cas.cn;http://www.siat.cas.cn", "aff_unique_abbr": "CAS;SIAT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Beyond Stationarity: Convergence Analysis of Stochastic Softmax Policy Gradient Methods", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19585", "id": "1VeQ6VBbev", "author_site": "Sara Klein, Simon Weissmann, Leif D\u00f6ring", "tldr": "", "abstract": "Markov Decision Processes (MDPs) are a formal framework for modeling and solving sequential decision-making problems. In finite time horizons such problems are relevant for instance for optimal stopping or specific supply chain problems, but also in the training of large language models. In contrast to infinite horizon MDPs optimal policies are not stationary, policies must be learned for every single epoch. In practice all parameters are often trained simultaneously, ignoring the inherent structure suggested by dynamic programming. This paper introduces a combination of dynamic programming and policy gradient called dynamical policy gradient, where the parameters are trained backwards in time. \n \n For the tabular softmax parametrisation we carry out the convergence analysis for simultaneous and dynamic policy gradient towards global optima, both in the exact and sampled gradient settings without regularisation. It turns out that the use of dynamic policy gradient training much better exploits the structure of finite-time problems which is reflected in improved convergence bounds.", "keywords": "reinforcement learning;policy gradient;stochastic approximation;finite-time MDP", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/2c81ff9fb426afbf43cc32ae4539fee865a0d40b.pdf", "author": "Sara Klein;Simon Weissmann;Leif D\u00f6ring", "authorids": "~Sara_Klein1;~Simon_Weissmann1;~Leif_D\u00f6ring1", "gender": "F;M;M", "homepage": ";https://www.wim.uni-mannheim.de/doering/team/prof-dr-simon-weissmann/;https://www.wim.uni-mannheim.de/doering/", "dblp": "359/1702;246/5069;346/0412.html", "google_scholar": ";Pfxc_RgAAAAJ;", "orcid": ";0000-0002-5111-6658;0000-0002-4569-5083", "linkedin": "sara-klein-wima/;;leif-d\u00f6ring-0b002496/?originalSubdomain=de", "or_profile": "~Sara_Klein1;~Simon_Weissmann1;~Leif_D\u00f6ring1", "aff": "Universit\u00e4t Mannheim;University of Mannheim;Universit\u00e4t Mannheim", "aff_domain": "uni-mannheim.de;uni-mannheim.de;uni-mannheim.de", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nklein2024beyond,\ntitle={Beyond Stationarity: Convergence Analysis of Stochastic Softmax Policy Gradient Methods},\nauthor={Sara Klein and Simon Weissmann and Leif D{\\\"o}ring},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1VeQ6VBbev}\n}", "github": "", "project": "", "reviewers": "Y9J6;iprP;EGiX", "pdf_size": 735199, "rating": "6;8;8", "confidence": "3;3;3", "soundness": "3;4;3", "contribution": "3;3;3", "presentation": "2;4;3", "wc_summary": "79;160;37", "wc_strengths": "102;109;36", "wc_weaknesses": "128;20;49", "wc_questions": "58;44;269", "wc_review": "367;333;391", "wc_reply_reviewers": "0;60;0", "wc_reply_authors": "541;234;795", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 92.0, 51.048996072400875 ], "wc_strengths_avg": [ 82.33333333333333, 32.8870119584549 ], "wc_weaknesses_avg": [ 65.66666666666667, 45.63867755411948 ], "wc_questions_avg": [ 123.66666666666667, 102.92499966264539 ], "wc_review_avg": [ 363.6666666666667, 23.79542439676633 ], "wc_reply_reviewers_avg": [ 20.0, 28.284271247461902 ], "wc_reply_authors_avg": [ 523.3333333333334, 229.3677299786427 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7573781758097719815&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1VeQ6VBbev", "pdf": "https://openreview.net/pdf?id=1VeQ6VBbev", "email": "uni-mannheim.de;uni-mannheim.de;uni-mannheim.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Mannheim", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-mannheim.de", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "1WSd408I9M", "title": "Generative AI in healthcare: A trustworthy approach", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Generative AI in healthcare: A trustworthy approach\n\nAbstract: The recent advancements in self-supervised algorithms like Transformer Architecture and Diffusion models have expanded the means of applying AI in healthcare and life sciences. To achieve real world adoption, it is important to measure and audit the trustworthiness of the AI system as per the legal and compliance requirements for privacy, security, fairness, and safety. In this paper, we focus on the method to achieve trustworthiness in an LLM (Large Language Model) based decision support system for physicians. The stakeholders for this decision support system are patients, physicians, regulators, and external auditors. We focus on the limitations of large or foundation models and the method to overcome these limitations, with the aim of accelerating the adoption of this far-reaching technology in the healthcare sector. It also explores possible guardrails for safety and the methods for aligning AI systems to guardrails.\n\nOur Solution Approach:\nWe explore an approach to an AI system which can enhance decision capabilities by using the data and EHRs (Electronic Health Record) collected over many years for a vast volume of patients. The longitudinal data consists of biomarkers, disease progression indicators, treatment administered, and patient outcome. The goal of the system is to assist physicians in identifying the best treatment option for a given patient context. The LLM-based system will be able to predict optimal options based on hundreds of similar cases on which it was trained. The paper addresses the transparency, data integrity, model development, and performance validation of the system. In the sections below, we explore the various stages of development and deployment of such a system, the challenges, and the methods to overcome the challenges.", "keywords": "Generative AI;healthcare;trustworthy;Transformer Architecture;guardrails", "primary_area": "generative models", "supplementary_material": "", "author": "parul berry;Vishwesh Milind Bharadiya;Sree Kumar", "authorids": "~parul_berry1;~Vishwesh_Milind_Bharadiya1;~Sree_Kumar1", "gender": ";;M", "homepage": ";;https://www.linkedin.com/in/kumarsree/", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~parul_berry1;~Vishwesh_Milind_Bharadiya1;~Sree_Kumar1", "aff": ";;Wipro Technologies", "aff_domain": ";;wipro.com", "position": ";;Researcher", "bibtex": "@misc{\nanonymous2024generative,\ntitle={Generative {AI} in healthcare: A trustworthy approach},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=1WSd408I9M}\n}", "github": "", "project": "", "reviewers": "8VP9;mU33;tPic", "site": "https://openreview.net/forum?id=1WSd408I9M", "pdf_size": 197634, "rating": "1;1;1", "confidence": "5;3;5", "soundness": "1;1;1", "contribution": "1;1;1", "presentation": "1;1;2", "wc_summary": "86;38;33", "wc_strengths": "30;41;23", "wc_weaknesses": "86;101;89", "wc_questions": "23;34;4", "wc_review": "225;214;149", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 1.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 1.0, 0.0 ], "contribution_avg": [ 1.0, 0.0 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 52.333333333333336, 23.893281249943232 ], "wc_strengths_avg": [ 31.333333333333332, 7.408703590297623 ], "wc_weaknesses_avg": [ 92.0, 6.48074069840786 ], "wc_questions_avg": [ 20.333333333333332, 12.39175353029407 ], "wc_review_avg": [ 196.0, 33.53605025441527 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:g-dr3QN8t_MJ:scholar.google.com/&scioq=Generative+AI+in+healthcare:+A+trustworthy+approach&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Wipro", "aff_unique_dep": "", "aff_unique_url": "https://www.wipro.com", "aff_unique_abbr": "Wipro", "aff_country_unique_index": "0", "aff_country_unique": "India" }, { "id": "1WceuzWff5", "title": "Understanding the Transfer of High-Level Reinforcement Learning Skills Across Diverse Environments", "track": "main", "status": "Reject", "tldr": "", "abstract": "A large number of reinforcement learning (RL) environments are available to the research community. However, due to differences across these environments, it is difficult to transfer skills learnt by a RL agent from one environment to another. For this transfer learning problem, a multitask RL perspective is considered in this paper, the goal being to transfer the skills from one environment to another using a single policy. To achieve such goal, we design an environment agnostic policy that enables the sharing of skills. Our experimental results demonstrate that: (a) by training on both desired environments using standard RL algorithms, the skills can be transferred from one environment to another; (b) by changing the amount of data that the RL algorithm uses to optimize the policy and value functions, we\nshow empirically that the transfer of knowledge between different environments is possible, and results in learning tasks with up to 84% fewer gradient update steps. This study takes an important step towards enabling more effective transfer of skills by learning in multitask RL scenarios across diverse environments by designing skill-sharing, sample-efficient RL training protocols.", "keywords": "reinforcement learning;representation learning;multi-task reinforcement learning;transfer learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Reginald McLean;Kai Yuan;Isaac Woungang;Nariman Farsad", "authorids": "~Reginald_McLean1;~Kai_Yuan1;~Isaac_Woungang2;~Nariman_Farsad1", "gender": "M;M;M;M", "homepage": "https://www.reggiemclean.ca/;https://www.linkedin.com/in/kai-yuan/;https://cs.torontomu.ca/~iwoungan/;http://narimanfarsad.com/", "dblp": ";;67/176.html;", "google_scholar": "gBBveasAAAAJ;8eLlbhMAAAAJ;https://scholar.google.ca/citations?user=sVTW2tMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": "https://www.linkedin.com/mwlite/in/reginaldmclean;;;", "or_profile": "~Reginald_McLean1;~Kai_Yuan1;~Isaac_Woungang2;~Nariman_Farsad1", "aff": "Toronto Metropolitan University;Intel;Toronto Metropolitan University;Toronto Metropolitan University", "aff_domain": "torontomu.ca;intel.com;torontomu.ca;torontomu.ca", "position": "PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@misc{\nmclean2024understanding,\ntitle={Understanding the Transfer of High-Level Reinforcement Learning Skills Across Diverse Environments},\nauthor={Reginald McLean and Kai Yuan and Isaac Woungang and Nariman Farsad},\nyear={2024},\nurl={https://openreview.net/forum?id=1WceuzWff5}\n}", "github": "", "project": "", "reviewers": "Um91;awjg;NkEv;8Y8K", "site": "https://openreview.net/forum?id=1WceuzWff5", "pdf_size": 1796034, "rating": "3;3;3;5", "confidence": "4;5;5;3", "soundness": "1;2;2;2", "contribution": "1;2;1;3", "presentation": "2;2;1;3", "wc_summary": "79;78;158;83", "wc_strengths": "28;244;43;140", "wc_weaknesses": "211;255;133;413", "wc_questions": "2;281;49;375", "wc_review": "320;858;383;1011", "wc_reply_reviewers": "325;0;170;505", "wc_reply_authors": "399;510;903;1697", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;2;3", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.5, 33.826764551165695 ], "wc_strengths_avg": [ 113.75, 86.62094146336669 ], "wc_weaknesses_avg": [ 253.0, 102.18610473053565 ], "wc_questions_avg": [ 176.75, 155.7471909859051 ], "wc_review_avg": [ 643.0, 297.3121255515826 ], "wc_reply_reviewers_avg": [ 250.0, 186.78195844352848 ], "wc_reply_authors_avg": [ 877.25, 508.98151980204545 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HK4IaEpE5oQJ:scholar.google.com/&scioq=Understanding+the+Transfer+of+High-Level+Reinforcement+Learning+Skills+Across+Diverse+Environments&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Toronto Metropolitan University;Intel", "aff_unique_dep": ";Intel Corporation", "aff_unique_url": "https://www.tmu.ca/;https://www.intel.com", "aff_unique_abbr": "TMU;Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Beyond IID weights: sparse and low-rank deep Neural Networks are also Gaussian Processes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19584", "id": "1Wi0Ys33Nm", "author_site": "Thiziri Nait Saada, Alireza Naderi, Jared Tanner", "tldr": "", "abstract": "The infinitely wide neural network has been proven a useful and manageable mathematical model that enables the understanding of many phenomena appearing in deep learning. One example is the convergence of random deep networks to Gaussian processes that enables a rigorous analysis of the way the choice of activation function and network weights impacts the training dynamics. In this paper, we extend the seminal proof of Matthews et al., 2018 to a larger class of initial weight distributions (which we call pseudo-iid), including the established cases of iid and orthogonal weights, as well as the emerging low-rank and structured sparse settings celebrated for their computational speed-up benefits. We show that fully-connected and convolutional networks initialised with pseudo-iid distributions are all effectively equivalent up to their variance. Using our results, one can identify the Edge of Chaos for a broader class of neural networks and tune them at criticality in order to enhance their training. Moreover, they enable the posterior distribution of Bayesian Neural Networks to be tractable across these various initialization schemes.", "keywords": "Deep Neural Networks;Gaussian processes;Neural Networks initialisation;Edge of chaos;Large width limit;Mean-Field", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Thiziri Nait Saada;Alireza Naderi;Jared Tanner", "authorids": "~Thiziri_Nait_Saada1;~Alireza_Naderi1;~Jared_Tanner1", "gender": "F;M;", "homepage": ";;http://people.maths.ox.ac.uk/tanner/publications.shtml", "dblp": ";;85/1256", "google_scholar": ";n9_Av0AAAAAJ;https://scholar.google.co.uk/citations?user=J7248tkAAAAJ", "orcid": ";;", "linkedin": "thiziri-nait-saada-4332021b2;;", "or_profile": "~Thiziri_Nait_Saada1;~Alireza_Naderi1;~Jared_Tanner1", "aff": "University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nsaada2024beyond,\ntitle={Beyond {IID} weights: sparse and low-rank deep Neural Networks are also Gaussian Processes},\nauthor={Thiziri Nait Saada and Alireza Naderi and Jared Tanner},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1Wi0Ys33Nm}\n}", "github": "", "project": "", "reviewers": "rGFF;SWNp;uyEL;EeDC", "pdf_size": 8344303, "rating": "5;6;6;8", "confidence": "3;3;3;2", "soundness": "3;2;3;3", "contribution": "2;2;3;3", "presentation": "1;3;3;4", "wc_summary": "28;49;92;128", "wc_strengths": "27;62;57;111", "wc_weaknesses": "114;16;110;39", "wc_questions": "118;32;160;21", "wc_review": "287;159;419;299", "wc_reply_reviewers": "0;137;0;0", "wc_reply_authors": "738;372;616;431", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 74.25, 38.66765444140619 ], "wc_strengths_avg": [ 64.25, 30.127852562039664 ], "wc_weaknesses_avg": [ 69.75, 43.04866432306582 ], "wc_questions_avg": [ 82.75, 58.30683922148413 ], "wc_review_avg": [ 291.0, 92.04346799203081 ], "wc_reply_reviewers_avg": [ 34.25, 59.322740159234044 ], "wc_reply_authors_avg": [ 539.25, 145.8447376493235 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11557098523946512237&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "openreview": "https://openreview.net/forum?id=1Wi0Ys33Nm", "pdf": "https://openreview.net/pdf?id=1Wi0Ys33Nm", "email": "ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "1X99YOwQfI", "title": "Controllable Pareto Trade-off between Fairness and Accuracy", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The fairness-accuracy trade-off is a fundamental challenge in machine learning.While simply combining the two objectives can result in mediocre or extreme solutions, multi-objective optimization (MOO) could potentially provide diverse trade-offs by visiting different regions of the Pareto front. However, MOO methods usually lack precise control of the trade-offs. They rely on the full gradient per objective and inner products between these gradients to determine the update direction, which can be prone to large data sizes and the curse of dimensionality when training millions of parameters for neural networks. Moreover, the trade-off is usually sensitive to naive stochastic gradients due to the imbalance of groups in each batch and the existence of various trivial directions to improve fairness. To address these challenges, we propose \u201cControllable Pareto Trade-off (CPT)\u201d that can effectively train models performing different trade-offs defined by reference vectors. CPT begins with a correction stage that solely approaches the reference vector and then includes the discrepancy between the reference and the two objectives as the third objective in the rest training. To overcome the issues caused by\nhigh-dimensional stochastic gradients, CPT (1) uses a moving average of stochastic gradients to determine the update direction; and (2) prunes the gradients by only comparing different objectives\u2019 gradients on the critical parameters. Experiments show that CPT can achieve a higher-quality set of diverse models on the Pareto front performing different yet better trade-offs between fairness and accuracy than existing MOO approaches. It also exhibits better controllability and can precisely follow the human-defined reference vectors.", "keywords": "Multi-objective optimization;Fairness-accuracy trade-off", "primary_area": "optimization", "supplementary_material": "", "author": "Yongkang Du;Jieyu Zhao;Yijun Yang;Tianyi Zhou", "authorids": "~Yongkang_Du1;~Jieyu_Zhao1;~Yijun_Yang3;~Tianyi_Zhou1", "gender": "M;F;M;M", "homepage": "https://yongkdu.github.io/;http://jyzhao.net/;https://stevenyangyj.github.io/;https://tianyizhou.github.io/", "dblp": "321/7311;59/2379-1;;88/8205-1", "google_scholar": "LYu-0z0AAAAJ;9VaGBCQAAAAJ;X0quXnsAAAAJ;OKvgizMAAAAJ", "orcid": ";;;0000-0001-5348-0632", "linkedin": "yongkang-du/;;;tianyizhou", "or_profile": "~Yongkang_Du1;~Jieyu_Zhao1;~Yijun_Yang3;~Tianyi_Zhou1", "aff": "Pennsylvania State University;University of Southern California;University of Technology Sydney;University of Maryland, College Park", "aff_domain": "psu.edu;usc.edu;uts.edu.au;umd.edu", "position": "PhD student;Assistant Professor;PhD student;Assistant Professor", "bibtex": "@misc{\ndu2024controllable,\ntitle={Controllable Pareto Trade-off between Fairness and Accuracy},\nauthor={Yongkang Du and Jieyu Zhao and Yijun Yang and Tianyi Zhou},\nyear={2024},\nurl={https://openreview.net/forum?id=1X99YOwQfI}\n}", "github": "", "project": "", "reviewers": "MTak;8pma;whui", "site": "https://openreview.net/forum?id=1X99YOwQfI", "pdf_size": 1099856, "rating": "3;3;3", "confidence": "3;3;4", "soundness": "3;1;2", "contribution": "2;2;2", "presentation": "3;2;1", "wc_summary": "132;112;122", "wc_strengths": "64;23;35", "wc_weaknesses": "241;254;407", "wc_questions": "73;113;7", "wc_review": "510;502;571", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 122.0, 8.16496580927726 ], "wc_strengths_avg": [ 40.666666666666664, 17.21110752456745 ], "wc_weaknesses_avg": [ 300.6666666666667, 75.37609405876346 ], "wc_questions_avg": [ 64.33333333333333, 43.70608907489003 ], "wc_review_avg": [ 527.6666666666666, 30.814859330452176 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KWYG2L1ZRn4J:scholar.google.com/&scioq=Controllable+Pareto+Trade-off+between+Fairness+and+Accuracy&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Pennsylvania State University;University of Southern California;University of Technology Sydney;University of Maryland", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.psu.edu;https://www.usc.edu;https://www.uts.edu.au;https://www/umd.edu", "aff_unique_abbr": "PSU;USC;UTS;UMD", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;College Park", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Australia" }, { "id": "1XDG1Z5Nhk", "title": "Sparse Backpropagation for MoE Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "One defining characteristic of Mixture-of-Expert (MoE) models is their capacity for conducting sparse computation via expert routing, leading to remarkable scalability. However, backpropagation, the cornerstone of deep learning, requires dense computation, thereby posting challenges in MoE gradient computations. Here, we introduce SparseMixer, a scalable gradient estimator that bridges the gap between backpropagation and sparse expert routing. Unlike typical MoE training which strategically neglects certain gradient terms for the sake of sparse computation and scalability, SparseMixer provides scalable gradient approximations for these terms, enabling reliable gradient estimation in MoE training. Grounded in a numerical ODE framework, SparseMixer harnesses the mid-point method, a second-order ODE solver, to deliver precise gradient approximations with negligible computational overhead. Applying SparseMixer to Switch Transformer on both pre-training and machine translation tasks, SparseMixer showcases considerable performance gain, accelerating training convergence by up to 2 times.", "keywords": "Mixture-of-Expert;MoE;Sparsity;Backpropagation;Straight Through", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/e8b408b03d5274efe07a8fc6981911cbdc38ff9f.zip", "author": "Liyuan Liu;Jianfeng Gao;Weizhu Chen", "authorids": "~Liyuan_Liu3;~Jianfeng_Gao1;~Weizhu_Chen1", "gender": "M;M;M", "homepage": "https://www.microsoft.com/en-us/research/people/jfgao/;https://www.microsoft.com/en-us/research/people/wzchen/;https://liyuanlucasliu.github.io/", "dblp": "92/5339;79/2536;06/1624", "google_scholar": "https://scholar.google.com/citations?hl=en;LG_E-4EAAAAJ;RmvbkzYAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jianfeng_Gao1;~Weizhu_Chen1;~Liyuan_Liu1", "aff": "Microsoft Research;Microsoft GenAI;University of Illinois, Urbana Champaign", "aff_domain": "microsoft.com;microsoft.com;illinois.edu", "position": "Principal Researcher;Vice President;PhD student", "bibtex": "@misc{\nliu2024sparse,\ntitle={Sparse Backpropagation for MoE Training},\nauthor={Liyuan Liu and Jianfeng Gao and Weizhu Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=1XDG1Z5Nhk}\n}", "github": "", "project": "", "reviewers": "yUXn;D3YK;4GtZ;tyBo", "site": "https://openreview.net/forum?id=1XDG1Z5Nhk", "pdf_size": 452975, "rating": "3;5;5;6", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "contribution": "2;3;2;2", "presentation": "3;3;3;2", "wc_summary": "56;75;138;52", "wc_strengths": "51;44;70;47", "wc_weaknesses": "307;59;144;208", "wc_questions": "45;3;5;34", "wc_review": "459;181;357;341", "wc_reply_reviewers": "247;0;0;0", "wc_reply_authors": "631;456;444;550", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 34.455587355318734 ], "wc_strengths_avg": [ 53.0, 10.124228365658293 ], "wc_weaknesses_avg": [ 179.5, 90.62146544831418 ], "wc_questions_avg": [ 21.75, 18.18481509391833 ], "wc_review_avg": [ 334.5, 99.51256202108355 ], "wc_reply_reviewers_avg": [ 61.75, 106.95413736737817 ], "wc_reply_authors_avg": [ 520.25, 75.98149445753222 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15288756663620775966&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Microsoft;University of Illinois Urbana-Champaign", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://illinois.edu", "aff_unique_abbr": "MSR;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "1XHzHMQfcK", "title": "Significance of Fairly Distributed Instances and Optimal Ratio for Validation Set in Machine Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Machine learning plays a crucial role in various research areas and industries. The effectiveness of machine learning models relies heavily on the quality and quantity of training data. To evaluate model performance on unseen data, it is important to divide the data into training and testing data sets. A three-way split into train-validation-test data-sets is also commonly used to create robust and generalized models. Validation set helps in tuning hyper-parameters to mitigate the problem of overfitting. It is of utmost importance to achieve precise and true portrayal of data across all three categories of data-sets: training, testing, and validation. Previous research has explored various statistical techniques such as 'SPlit' aimed to ensure proper membership of the complete data in the test set. Despite the utilization of these techniques, Insufficient evidence exists regarding the equitable treatment of the validation set. Although cross-validation is widely used for validation, randomly selecting the validation part may not be the complete representative of overall data, hindering the creation of a generalized model suitable for the test data. This work focuses on extracting validation sets using the Support Points method in 'SPlit' to obtain accurate data membership. Results demonstrate significant accuracy improvement when both test and validation sets are selected using the Support Points method.", "keywords": "Data Split;Support Points;SPlit;validation Set;Optimal Ratio;Significance of validation set", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Hina Nasir;Dr Archana Pandita;Chaudhary Nauman bin Nasir", "authorids": "hinanasirch@gmail.com;~Dr_Archana_Pandita1;~Chaudhary_Nauman_bin_Nasir1", "gender": ";;M", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;chnaumannasir/", "or_profile": "hinanasirch@gmail.com;~Dr_Archana_Pandita1;~Chaudhary_Nauman_bin_Nasir1", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@misc{\nnasir2024significance,\ntitle={Significance of Fairly Distributed Instances and Optimal Ratio for Validation Set in Machine Learning},\nauthor={Hina Nasir and Dr Archana Pandita and Chaudhary Nauman bin Nasir},\nyear={2024},\nurl={https://openreview.net/forum?id=1XHzHMQfcK}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=1XHzHMQfcK", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ye5NfBelT3gJ:scholar.google.com/&scioq=Significance+of+Fairly+Distributed+Instances+and+Optimal+Ratio+for+Validation+Set+in+Machine+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0 }, { "id": "1XReHUSUp9", "title": "Monsters in the Dark: Sanitizing Hidden Threats with Diffusion Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Steganography is the art of hiding information in plain sight. This form of covert communication can be used by bad actors to propagate malware, exfiltrate victim data, and communicate with other bad actors. Current image steganography defenses rely upon steganalysis, or the detection of hidden messages. These methods, however, are non-blind as they require information about known steganography techniques and are easily bypassed. Recent work has instead focused on a defense mechanism known as sanitization, which eliminates hidden information from images. In this work, we introduce a novel blind deep learning steganography sanitization method that utilizes a diffusion model framework to sanitize universal and dependent steganography (DM-SUDS), which both sanitizes and preserves image quality. We evaluate this approach against state-of-the-art deep learning sanitization frameworks and provide further detailed analysis through an ablation study. DM-SUDS outperforms previous sanitization methods and improves image preservation MSE by 71.32\\%, PSNR by 22.43\\% and SSIM by 17.30\\%. This is the first blind deep learning image sanitization framework to meet these image quality results.", "keywords": "representation learning;security;computer vision;steganography", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/90b0f2d5d7ea4dac24695f7924948a9da1ba8c9f.pdf", "author": "Preston K Robinette;Daniel Moyer;Taylor T Johnson", "authorids": "~Preston_K_Robinette1;~Daniel_Moyer3;~Taylor_T_Johnson1", "gender": "F;M;M", "homepage": ";https://dcmoyer.github.io;http://www.TaylorTJohnson.com/", "dblp": "260/0809.html;187/6201;96/11505", "google_scholar": ";sKmoxSMAAAAJ;https://scholar.google.com.tw/citations?user=MdTkXNYAAAAJ", "orcid": "0000-0002-4906-2179;;0000-0001-8021-9923", "linkedin": "prestonrobinette/;;taylortjohnson/", "or_profile": "~Preston_K_Robinette1;~Daniel_Moyer2;~Taylor_Johnson1", "aff": "Vanderbilt University;Vanderbilt University;Vanderbilt University", "aff_domain": "vanderbilt.edu;vanderbilt.edu;vanderbilt.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nrobinette2024monsters,\ntitle={Monsters in the Dark: Sanitizing Hidden Threats with Diffusion Models},\nauthor={Preston K Robinette and Daniel Moyer and Taylor T Johnson},\nyear={2024},\nurl={https://openreview.net/forum?id=1XReHUSUp9}\n}", "github": "", "project": "", "reviewers": "zZQW;158U;sHuH;Ha3d", "site": "https://openreview.net/forum?id=1XReHUSUp9", "pdf_size": 1045040, "rating": "3;5;6;8", "confidence": "5;5;3;5", "soundness": "2;3;3;3", "contribution": "1;2;3;3", "presentation": "2;3;3;4", "wc_summary": "305;58;29;85", "wc_strengths": "13;21;15;52", "wc_weaknesses": "83;515;62;215", "wc_questions": "39;2;6;99", "wc_review": "440;596;112;451", "wc_reply_reviewers": "372;0;0;0", "wc_reply_authors": "1684;750;204;546", "reply_reviewers": "1;0;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 119.25, 109.05589163360226 ], "wc_strengths_avg": [ 25.25, 15.722197683530124 ], "wc_weaknesses_avg": [ 218.75, 180.8153408867732 ], "wc_questions_avg": [ 36.5, 38.836194458262774 ], "wc_review_avg": [ 399.75, 177.17276173272234 ], "wc_reply_reviewers_avg": [ 93.0, 161.0807251039056 ], "wc_reply_authors_avg": [ 796.0, 548.5489950770123 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.16012815380508713, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9133272584659701835&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Vanderbilt University", "aff_unique_dep": "", "aff_unique_url": "https://www.vanderbilt.edu", "aff_unique_abbr": "Vanderbilt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "1XarNmzbgG", "title": "Understanding of Server-Assisted Federated Learning with Incomplete Client Participation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Existing works in federated learning (FL) often assumes an ideal system with either full client or uniformly distributed client participation. \nHowever, in practice, it has been observed that some clients may never participate in FL training (aka incomplete client participation) due to a myriad of system heterogeneity factors. To mitigate impacts of incomplete client participation, a popular approach is the server-assisted federated learning (SA-FL) framework, where the server is equipped with an auxiliary dataset. However, despite the fact that SA-FL has been empirically shown to be effective in addressing the incomplete client participation problem, there remains a lack of theoretical understanding for SA-FL. Meanwhile, the ramifications of incomplete client participation in conventional FL is also poorly understood. These theoretical gaps motivate us to rigorously investigate SA-FL. Toward this end, to fully understand the impact of incomplete client participation on conventional FL, we first show that conventional FL is {\\em not} PAC-learnable under incomplete client participation in the worst case. Then, we show that the PAC-learnability of FL with incomplete client participation can indeed be revived by SA-FL, which theoretically justifies the use of SA-FL for the first time. Lastly, to provide practical guidance for SA-FL training under {\\em incomplete client participation}, we propose the SAFARI (server-assisted federated averaging) algorithm that enjoys the same linear convergence speedup guarantees as classic FL with ideal client participation assumptions, offering the first SA-FL algorithm with convergence guarantee. Extensive experiments on different datasets show SAFARI significantly improve the performance under incomplete client participation.", "keywords": "federated learning;client participation;probably approximately correct;statistical learning", "primary_area": "learning theory", "supplementary_material": "/attachment/8e2188b61ace254aa10b49d4f0fec35cdbd93f9d.pdf", "author": "Haibo Yang;Peiwen Qiu;Prashant Khanduri;Minghong Fang;Jia Liu", "authorids": "~Haibo_Yang1;~Peiwen_Qiu1;~Prashant_Khanduri1;~Minghong_Fang1;~Jia_Liu1", "gender": "M;F;M;M;M", "homepage": "https://haibo-yang-osu.github.io/homepage/;;https://sites.google.com/view/khanduri-prashant/home?authuser=0;https://minghongfang.com/;https://kevinliu-osu.github.io/index.html", "dblp": "43/7829-1;287/6757;158/4888;157/0863;", "google_scholar": "eyy22VoAAAAJ;LzaQe5sAAAAJ;;L6vkkC8AAAAJ;Ofx3dScAAAAJ", "orcid": "0000-0002-3245-2728;;;0000-0002-1365-3911;", "linkedin": ";peiwen-qiu/;prashant-khanduri-0497894b/;;", "or_profile": "~Haibo_Yang1;~Peiwen_Qiu1;~Prashant_Khanduri1;~Minghong_Fang1;~Jia_Liu1", "aff": "Rochester Institute of Technology;Ohio State University, Columbus;Wayne State University;Duke University;The Ohio State University", "aff_domain": "rit.edu;osu.edu;wayne.edu;duke.edu;osu.edu", "position": "Assistant Professor;PhD student;Assistant Professor;Postdoc;Assistant Professor", "bibtex": "@misc{\nyang2024understanding,\ntitle={Understanding of Server-Assisted Federated Learning with Incomplete Client Participation},\nauthor={Haibo Yang and Peiwen Qiu and Prashant Khanduri and Minghong Fang and Jia Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=1XarNmzbgG}\n}", "github": "", "project": "", "reviewers": "65h6;JHPN;SJXE;Yuno", "site": "https://openreview.net/forum?id=1XarNmzbgG", "pdf_size": 473051, "rating": "3;3;5;8", "confidence": "4;4;3;3", "soundness": "2;3;2;3", "contribution": "2;2;2;3", "presentation": "3;2;2;4", "wc_summary": "117;50;137;155", "wc_strengths": "134;52;128;17", "wc_weaknesses": "2321;50;468;117", "wc_questions": "40;322;42;15", "wc_review": "2612;474;775;304", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 114.75, 39.72640809335775 ], "wc_strengths_avg": [ 82.75, 49.85666956386076 ], "wc_weaknesses_avg": [ 739.0, 927.061216964662 ], "wc_questions_avg": [ 104.75, 125.87965482952359 ], "wc_review_avg": [ 1041.25, 922.4227271159357 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8551861104941366, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-pEe_yhagpoJ:scholar.google.com/&scioq=Understanding+of+Server-Assisted+Federated+Learning+with+Incomplete+Client+Participation&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Rochester Institute of Technology;Ohio State University;Wayne State University;Duke University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.rit.edu;https://www.osu.edu;https://wayne.edu;https://www.duke.edu", "aff_unique_abbr": "RIT;OSU;WSU;Duke", "aff_campus_unique_index": "1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1Xcqp27Unx", "title": "Knowledge Crosswords: Geometric Reasoning over Structured Knowledge with Large Language Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large language models (LLMs) are widely adopted in knowledge-intensive tasks and have achieved impressive performance thanks to their knowledge abilities. While LLMs have demonstrated outstanding performance on atomic or linear (multi-hop) QA tasks, whether they can reason in knowledge-rich scenarios with interweaving constraints remains an underexplored problem. In this work, we propose geometric reasoning over structured knowledge, where pieces of knowledge are connected in a graph structure and models need to fill in the missing information of this graph. Such geometric knowledge reasoning would require the ability to handle structured knowledge, reason with uncertainty, verify facts, and backtrack when an error occurs. We specifically propose Knowledge Crosswords, a multi-blank QA dataset where each problem consists of a natural language question representing the geometric constraints of an incomplete entity network, where LLMs are tasked with working out the missing entities while meeting all factual constraints. Knowledge Crosswords contains 2,101 individual problems, covering a wide array of knowledge domains and further divided into three difficulty levels. We conduct extensive experiments to evaluate existing LLM prompting approaches on the Knowledge Crosswords benchmark. We additionally propose two new approaches, Staged Prompting and Verify-All, to augment LLMs' ability to backtrack and verify structured constraints. Our results demonstrate that while baseline approaches perform well on easier problems but struggle with questions on the hard side, our proposed Verify-All outperforms other methods by a large margin and is more robust with hard problems. Further analysis reveals that LLMs' ability of geometric reasoning over structured knowledge is still far from robust or perfect, susceptible to confounders such as the order of options, certain structural patterns, assumption of existence of correct answer, and more.", "keywords": "knowledge crosswords;large language models;structured knowledge;geometric reasoning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/f587d2b679b6c426005c33071c62f80ffc216f60.zip", "author": "Wenxuan Ding;Shangbin Feng;Yuhan Liu;Zhaoxuan Tan;Vidhisha Balachandran;Tianxing He;Yulia Tsvetkov", "authorids": "~Wenxuan_Ding1;~Shangbin_Feng1;~Yuhan_Liu9;~Zhaoxuan_Tan1;~Vidhisha_Balachandran1;~Tianxing_He1;~Yulia_Tsvetkov1", "gender": "F;M;F;M;F;M;F", "homepage": "https://wenwen-d.github.io/;https://bunsenfeng.github.io/;https://www.yhliu-nlp.info/;https://tamsiuhin.github.io/;https://vidhishanair.github.io/;https://cloudygoose.github.io/;https://homes.cs.washington.edu/~yuliats/", "dblp": "36/1339-1;295/9571;;301/7706;234/4867;149/0111;75/8157", "google_scholar": "GyHBjwQAAAAJ;Y3rLP9UAAAAJ;or-2JE8AAAAJ;0KE2CZsAAAAJ;LgitgaIAAAAJ;egmfjjwAAAAJ;SEDPkrsAAAAJ", "orcid": ";0000-0002-4133-1987;;0000-0001-8230-6238;;;0000-0002-4634-7128", "linkedin": "wenxuan-ding-0b299923b/;;;zhaoxuan-tan-927132213/;;;", "or_profile": "~Wenxuan_Ding1;~Shangbin_Feng1;~Yuhan_Liu9;~Zhaoxuan_Tan1;~Vidhisha_Balachandran1;~Tianxing_He1;~Yulia_Tsvetkov1", "aff": "Hong Kong University of Science and Technology;University of Washington;Xi'an Jiaotong University;University of Notre Dame;Carnegie Mellon University;University of Washington;Department of Computer Science, University of Washington", "aff_domain": "ust.hk;cs.washington.edu;xjtu.edu.cn;nd.edu;cmu.edu;cs.washington.edu;cs.washington.edu", "position": "Undergrad student;PhD student;Undergrad student;PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@misc{\nding2024knowledge,\ntitle={Knowledge Crosswords: Geometric Reasoning over Structured Knowledge with Large Language Models},\nauthor={Wenxuan Ding and Shangbin Feng and Yuhan Liu and Zhaoxuan Tan and Vidhisha Balachandran and Tianxing He and Yulia Tsvetkov},\nyear={2024},\nurl={https://openreview.net/forum?id=1Xcqp27Unx}\n}", "github": "", "project": "", "reviewers": "3S11;DY7D;iJEx;mbzm", "site": "https://openreview.net/forum?id=1Xcqp27Unx", "pdf_size": 780044, "rating": "3;3;5;6", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "contribution": "2;1;2;2", "presentation": "3;3;3;4", "wc_summary": "59;161;62;120", "wc_strengths": "59;186;30;128", "wc_weaknesses": "101;659;43;156", "wc_questions": "109;278;27;58", "wc_review": "328;1284;162;462", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.5, 42.55878287733332 ], "wc_strengths_avg": [ 100.75, 60.74279792699707 ], "wc_weaknesses_avg": [ 239.75, 245.32975257803525 ], "wc_questions_avg": [ 118.0, 96.90459225444376 ], "wc_review_avg": [ 559.0, 431.8576154243433 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3981051923048782917&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;1;1", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Washington;Xi'an Jiao Tong University;University of Notre Dame;Carnegie Mellon University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ust.hk;https://www.washington.edu;https://www.xjtu.edu.cn;https://www.nd.edu;https://www.cmu.edu", "aff_unique_abbr": "HKUST;UW;XJTU;Notre Dame;CMU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Hong Kong SAR;;Seattle", "aff_country_unique_index": "0;1;0;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "A Variational Perspective on Solving Inverse Problems with Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19583", "id": "1YO4EE3SPB", "author_site": "Morteza Mardani, Jiaming Song, Jan Kautz, Arash Vahdat", "tldr": "", "abstract": "Diffusion models have emerged as a key pillar of foundation models in visual domains. One of their critical applications is to universally solve different downstream inverse tasks via a single diffusion prior without re-training for each task. Most inverse tasks can be formulated as inferring a posterior distribution over data (e.g., a full image) given a measurement (e.g., a masked image). This is however challenging in diffusion models since the nonlinear and iterative nature of the diffusion process renders the posterior intractable. To cope with this challenge, we propose a variational approach that by design seeks to approximate the true posterior distribution. We show that our approach naturally leads to regularization by denoising diffusion process (RED-diff) where denoisers at different timesteps concurrently impose different structural constraints over the image. To gauge the contribution of denoisers from different timesteps, we propose a weighting mechanism based on signal-to-noise-ratio (SNR). Our approach provides a new variational perspective for solving inverse problems with diffusion models, allowing us to formulate sampling as stochastic optimization, where one can simply apply off-the-shelf solvers with lightweight iterates. Our experiments for various linear and nonlinear image restoration tasks demonstrate the strengths of our method compared with state-of-the-art sampling-based diffusion models. The code is available online \\footnote{\\url{https://github.com/NVlabs/RED-diff}}.", "keywords": "diffusion models;score matching;variational approximation;regularization by denoising;inverse problems", "primary_area": "generative models", "supplementary_material": "", "author": "Morteza Mardani;Jiaming Song;Jan Kautz;Arash Vahdat", "authorids": "~Morteza_Mardani1;~Jiaming_Song1;~Jan_Kautz1;~Arash_Vahdat3", "gender": "M;M;;M", "homepage": "http://web.stanford.edu/~morteza/;http://tsong.me;http://jankautz.com;http://latentspace.cc/", "dblp": "74/258;173/5104;48/6214;92/8108", "google_scholar": "H7edsyEAAAAJ;;P9FclNEAAAAJ;https://scholar.google.ca/citations?user=p9-nlRIAAAAJ", "orcid": ";;;", "linkedin": ";jiamings/;;", "or_profile": "~Morteza_Mardani1;~Jiaming_Song1;~Jan_Kautz1;~Arash_Vahdat3", "aff": ";Luma AI;NVIDIA;NVIDIA", "aff_domain": ";lumalabs.ai;nvidia.com;nvidia.com", "position": ";Chief Scientist;VP Research;Research Scientist", "bibtex": "@inproceedings{\nmardani2024a,\ntitle={A Variational Perspective on Solving Inverse Problems with Diffusion Models},\nauthor={Morteza Mardani and Jiaming Song and Jan Kautz and Arash Vahdat},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1YO4EE3SPB}\n}", "github": "", "project": "", "reviewers": "5Auf;RLDf;Qren;xtWM", "pdf_size": 19747124, "rating": "5;5;6;6", "confidence": "4;4;3;5", "soundness": "2;2;3;3", "contribution": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "87;64;45;79", "wc_strengths": "76;76;58;37", "wc_weaknesses": "142;321;88;400", "wc_questions": "791;36;135;24", "wc_review": "1096;497;326;540", "wc_reply_reviewers": "0;222;0;0", "wc_reply_authors": "797;699;499;723", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.75, 16.005858302509115 ], "wc_strengths_avg": [ 61.75, 16.068213964221414 ], "wc_weaknesses_avg": [ 237.75, 127.3270886339588 ], "wc_questions_avg": [ 246.5, 317.3046643212167 ], "wc_review_avg": [ 614.75, 289.1499394777734 ], "wc_reply_reviewers_avg": [ 55.5, 96.12881982007269 ], "wc_reply_authors_avg": [ 679.5, 110.29392549002868 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8814413069022914272&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1YO4EE3SPB", "pdf": "https://openreview.net/pdf?id=1YO4EE3SPB", "email": ";lumalabs.ai;nvidia.com;nvidia.com", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Luma AI;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.luma.ai;https://www.nvidia.com", "aff_unique_abbr": "Luma AI;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Defining Expertise: Applications to Treatment Effect Estimation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19582", "id": "1YPfmglNRU", "author_site": "Alihan H\u00fcy\u00fck, Qiyao Wei, Alicia Curth, Mihaela van der Schaar", "tldr": "", "abstract": "Decision-makers are often experts of their domain and take actions based on their domain knowledge. Doctors, for instance, may prescribe treatments by predicting the likely outcome of each available treatment. Actions of an expert thus naturally encode part of their domain knowledge, and can help make inferences within the same domain: Knowing doctors try to prescribe the best treatment for their patients, we can tell treatments prescribed more frequently are likely to be more effective. Yet in machine learning, the fact that most decision-makers are experts is often overlooked, and \u201cexpertise\u201d is seldom leveraged as an inductive bias. This is especially true for the literature on treatment effect estimation, where often the only assumption made about actions is that of overlap. In this paper, we argue that expertise\u2014particularly the type of expertise the decision-makers of a domain are likely to have\u2014can be informative in designing and selecting methods for treatment effect estimation. We formally define two types of expertise, predictive and prognostic, and demonstrate empirically that: (i) the prominent type of expertise in a domain significantly influences the performance of different methods in treatment effect estimation, and (ii) it is possible to predict the type of expertise present in a dataset, which can provide a quantitative basis for model selection.", "keywords": "expertise;model selection;balancing representations;treatment effect estimation", "primary_area": "causal reasoning", "supplementary_material": "", "author": "Alihan H\u00fcy\u00fck;Qiyao Wei;Alicia Curth;Mihaela van der Schaar", "authorids": "~Alihan_H\u00fcy\u00fck1;~Qiyao_Wei1;~Alicia_Curth1;~Mihaela_van_der_Schaar2", "gender": ";M;F;F", "homepage": ";https://qiyaowei.github.io;;https://www.vanderschaar-lab.com", "dblp": "227/2296;327/3121;261/8064;", "google_scholar": "EMq6KwMAAAAJ;;eWRBqsYAAAAJ;DZ3S--MAAAAJ", "orcid": ";;;", "linkedin": ";qiyaowei;;", "or_profile": "~Alihan_H\u00fcy\u00fck1;~Qiyao_Wei1;~Alicia_Curth1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nh{\\\"u}y{\\\"u}k2024defining,\ntitle={Defining Expertise: Applications to Treatment Effect Estimation},\nauthor={Alihan H{\\\"u}y{\\\"u}k and Qiyao Wei and Alicia Curth and Mihaela van der Schaar},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1YPfmglNRU}\n}", "github": "", "project": "", "reviewers": "ScD8;7Wu5;uYNJ;obyv", "pdf_size": 425361, "rating": "5;6;8;8", "confidence": "2;2;4;3", "soundness": "4;2;4;2", "contribution": "1;3;3;3", "presentation": "2;3;3;4", "wc_summary": "211;143;161;227", "wc_strengths": "65;80;71;71", "wc_weaknesses": "218;8;190;118", "wc_questions": "106;259;130;153", "wc_review": "600;490;552;569", "wc_reply_reviewers": "0;0;35;36", "wc_reply_authors": "585;724;964;899", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;3;3", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 1.0 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 185.5, 34.565155865408734 ], "wc_strengths_avg": [ 71.75, 5.356071321407137 ], "wc_weaknesses_avg": [ 133.5, 81.12182197164952 ], "wc_questions_avg": [ 162.0, 58.41660722773961 ], "wc_review_avg": [ 552.75, 40.10844674130376 ], "wc_reply_reviewers_avg": [ 17.75, 17.75352077758099 ], "wc_reply_authors_avg": [ 793.0, 148.74642852855325 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784892, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1782251837120903391&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1YPfmglNRU", "pdf": "https://openreview.net/pdf?id=1YPfmglNRU", "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "1YSJW69CFQ", "title": "Enhancing Machine Learning System Reliability in Healthcare through Uncertainty Estimation and Multi-Modal Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "It is crucial to ensure the dependability of machine learning (ML) systems, especially in areas where safety is a top priority, like healthcare. A tried-and-true method for highlighting the reliability of ML systems during deployment is uncertainty estimation. By successfully using integrated feature sets, sequential and parallel ensemble algorithms have both shown improved ML system performance in multi-modal contexts. We provide Uncertainty-Receptive fusing (URF), a cutting-edge technique that uses uncertainty estimations to improve the fusing of predictions from several base learners. URF, which successively modifies the weighting of the loss function during training in contrast to conventional boosting techniques, is especially successful for multi-modal learning tasks. In order to understand how noise and spatial transformations affect image-based activities, we then offer an image acquisition model that takes these aspects into consideration. We can make predictions with greater accuracy utilizing latent variables thanks to this approach. To quantify uncertainty at the pixel and structure/lesion levels, we use entropy-based uncertainty assessment (EUA). EUA measures the variety within prediction distributions and provides insightful information about the model's confidence. We also present Gnostic Uncertainty Estimation (GUE), which quantifies the model's lack of knowledge regarding the result and helps to comprehend the accuracy of the prediction.", "keywords": "Gnostic Uncertainty Estimation; Machine Learning Reliability; Uncertainty Estimation; Healthcare", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Saeed Iqbal Khattak;Adnan Nabeel Qureshi;khalid javeed;Khursheed Aurangzeb", "authorids": "~Saeed_Iqbal_Khattak1;~Adnan_Nabeel_Qureshi1;~khalid_javeed1;~Khursheed_Aurangzeb1", "gender": "M;M;;M", "homepage": ";;;https://faculty.ksu.edu.sa/en/kaurangzeb", "dblp": ";;;214/8964.html", "google_scholar": "https://scholar.google.com.pk/citations?user=Cj7u9FkAAAAJ;https://scholar.google.co.uk/citations?hl=en;https://scholar.google.com.pk/citations?user=GPJb34wAAAAJ;GBkDDr0AAAAJ", "orcid": ";;;0000-0003-3647-8578", "linkedin": "saeed-iqbal-32748b36;;;", "or_profile": "~Saeed_Iqbal_Khattak1;~Adnan_Nabeel_Qureshi1;~khalid_javeed1;~Khursheed_Aurangzeb1", "aff": "University of Central Punjab, Lahore;Birmingham Newman University, UK;University of Sharjah;", "aff_domain": "ucp.edu.pk;newman.ac.uk;sharja.ac.ae;", "position": "PhD student;Associate Professor;Assistant Professor;", "bibtex": "@misc{\nkhattak2024enhancing,\ntitle={Enhancing Machine Learning System Reliability in Healthcare through Uncertainty Estimation and Multi-Modal Learning},\nauthor={Saeed Iqbal Khattak and Adnan Nabeel Qureshi and khalid javeed and Khursheed Aurangzeb},\nyear={2024},\nurl={https://openreview.net/forum?id=1YSJW69CFQ}\n}", "github": "", "project": "", "reviewers": "fEUv;fQud;8rRf", "site": "https://openreview.net/forum?id=1YSJW69CFQ", "pdf_size": 2495881, "rating": "1;1;3", "confidence": "4;4;3", "soundness": "1;1;2", "contribution": "1;1;2", "presentation": "1;1;2", "wc_summary": "42;97;76", "wc_strengths": "20;12;85", "wc_weaknesses": "446;1071;128", "wc_questions": "46;78;8", "wc_review": "554;1258;297", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 1.6666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 1.3333333333333333, 0.4714045207910317 ], "contribution_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 71.66666666666667, 22.661764175711376 ], "wc_strengths_avg": [ 39.0, 32.69046751985457 ], "wc_weaknesses_avg": [ 548.3333333333334, 391.71957089507566 ], "wc_questions_avg": [ 44.0, 28.61235164516658 ], "wc_review_avg": [ 703.0, 406.22735834341177 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QI2IB2ndglsJ:scholar.google.com/&scioq=Enhancing+Machine+Learning+System+Reliability+in+Healthcare+through+Uncertainty+Estimation+and+Multi-Modal+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Central Punjab;Birmingham Newman University;University of Sharjah", "aff_unique_dep": ";;", "aff_unique_url": "https://ucp.edu.pk;https://www.birminghamnewman.ac.uk;https://www.sharjah.ac.ae", "aff_unique_abbr": "UCP;;UOS", "aff_campus_unique_index": "0", "aff_campus_unique": "Lahore;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Pakistan;United Kingdom;United Arab Emirates" }, { "id": "1Yq7zIOfj0", "title": "On the Similarity between Attention and SVM on the Token Separation and Selection Behavior", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The attention mechanism underpinning the transformer architecture is effective in learning the token interaction within a sequence via softmax similarity. However, the current theoretical understanding on optimization dynamics of the softmax attention is insufficient in characterizing how attention performs intrinsic token separation and selection, which is crucial to sequence-level understanding tasks. On the other hand, support vector machines have been well-studied of its max-margin separation behaviour. In this paper, we will formulate the softmax attention convergence dynamics as hard-margin SVM optimization problem. We adopt a tensor trick to formulate the matrix-based attention optimization problem and relax the strong assumptions on the derivative of the loss function from the prior works. As a result, we demonstrate that gradient descent converges to the optimal solution for SVM. In addition, we show softmax is more stable than other linear attention through analysis on their lipschitz. Our theoretical insights are validated through numerical experiments, shedding insights on the convergence dynamics of softmax attention as the foundational stones on the success of the large language models.", "keywords": "Transformer;SVM;Convergence Dynamics;Optimization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/7a95412f2c91452c08645bde982129c1aa02e9ef.pdf", "author": "Beidi Chen;Wentao Guo;Zhihang Li;Zhao Song;Tianyi Zhou", "authorids": "~Beidi_Chen1;~Wentao_Guo1;~Zhihang_Li2;~Zhao_Song3;~Tianyi_Zhou4", "gender": "F;M;M;M;", "homepage": "https://www.andrew.cmu.edu/user/beidic/;http://wentaoguo.me/;https://dblp.org/pid/122/5633.html;https://www.youtube.com/@zhaosong2031;", "dblp": "192/1339;;122/5633;76/4051-2;", "google_scholar": ";7uHQMsYAAAAJ;Y9Rafe8AAAAJ;yDZct7UAAAAJ;", "orcid": ";;;;", "linkedin": ";wentao-guo-11b03217b/;zhihang-li-90a888332/;;", "or_profile": "~Beidi_Chen1;~Wentao_Guo1;~Zhihang_Li2;~Zhao_Song3;~Tianyi_Zhou4", "aff": "Meta Facebook;Department of Computer Science, Princeton University;Huazhong Agricultural University;Adobe;", "aff_domain": "fb.com;cs.princeton.edu;hzau.edu.cn;adobe.com;", "position": "Researcher;PhD student;Undergrad student;Researcher;", "bibtex": "@misc{\nchen2024on,\ntitle={On the Similarity between Attention and {SVM} on the Token Separation and Selection Behavior},\nauthor={Beidi Chen and Wentao Guo and Zhihang Li and Zhao Song and Tianyi Zhou},\nyear={2024},\nurl={https://openreview.net/forum?id=1Yq7zIOfj0}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=1Yq7zIOfj0", "pdf_size": 1150423, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FJGDc5dQKwYJ:scholar.google.com/&scioq=On+the+Similarity+between+Attention+and+SVM+on+the+Token+Separation+and+Selection+Behavior&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Meta;Princeton University;Huazhong Agricultural University;Adobe", "aff_unique_dep": "Meta Platforms, Inc.;Department of Computer Science;;Adobe Inc.", "aff_unique_url": "https://meta.com;https://www.princeton.edu;http://www.hzau.edu.cn/;https://www.adobe.com", "aff_unique_abbr": "Meta;Princeton;HAU;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "1armpjgh8L", "title": "Adaptive Hierarchical Certification for Semantic Segmentation using Randomized Smoothing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Common certification methods operate on a flat pre-defined set of fine-grained classes. In this paper, however, we propose a novel, more general, and practical setting, namely adaptive hierarchical certification for image semantic segmentation. In this setting, the certification can be within a multi-level hierarchical label space composed of fine to coarse levels. Unlike classic methods where the certification would abstain for unstable components, our approach adaptively relaxes the certification to a coarser level within the hierarchy. This relaxation lowers the abstain rate whilst providing more certified semantically meaningful information. We mathematically formulate the problem setup and introduce, for the first time, an adaptive hierarchical certification algorithm for image semantic segmentation, that certifies image pixels within a hierarchy and prove the correctness of its guarantees. Since certified accuracy does not take the loss of information into account when traversing into a coarser hierarchy level, we introduce a novel evaluation paradigm for adaptive hierarchical certification, namely the certified information gain metric, which is proportional to the class granularity level. Our evaluation experiments on real-world challenging datasets such as Cityscapes and ACDC demonstrate that our adaptive algorithm achieves a higher certified information gain and a lower abstain rate compared to the current state-of-the-art certification method, as well as other non-adaptive versions of it.", "keywords": "certification for segmentation;image semantic segmentation;hierarchical certification;certification for machine learning;certified robustness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Alaa Anani;Tobias Lorenz;Mario Fritz;Bernt Schiele", "authorids": "~Alaa_Anani1;~Tobias_Lorenz1;~Mario_Fritz1;~Bernt_Schiele1", "gender": "F;M;M;M", "homepage": "https://www.mpi-inf.mpg.de/departments/computer-vision-and-machine-learning/people/alaa-anani;https://www.t-lorenz.com/;https://cispa.saarland/group/fritz/;http://www.mpi-inf.mpg.de/~schiele", "dblp": "368/7744;25/6006-2;;s/BerntSchiele", "google_scholar": "eJgMcksAAAAJ;gf-aMd0AAAAJ;https://scholar.google.de/citations?user=4V1nNm4AAAAJ;https://scholar.google.de/citations?user=z76PBfYAAAAJ", "orcid": ";0000-0003-4369-2644;;0000-0001-9683-5237", "linkedin": "aaanani/;;;", "or_profile": "~Alaa_Anani1;~Tobias_Lorenz1;~Mario_Fritz1;~Bernt_Schiele1", "aff": "Saarland Informatics Campus, Max-Planck Institute;CISPA Helmholtz Center for Information Security;Saarland University;Max Planck Institute for Informatics, Saarland Informatics Campus", "aff_domain": "mpi-inf.mpg.de;cispa.de;uni-saarland.de;mpi-inf.mpg.de", "position": "MS student;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nanani2024adaptive,\ntitle={Adaptive Hierarchical Certification for Semantic Segmentation using Randomized Smoothing},\nauthor={Alaa Anani and Tobias Lorenz and Mario Fritz and Bernt Schiele},\nyear={2024},\nurl={https://openreview.net/forum?id=1armpjgh8L}\n}", "github": "", "project": "", "reviewers": "riuJ;2yaR;dRWJ;bW5R", "site": "https://openreview.net/forum?id=1armpjgh8L", "pdf_size": 44746158, "rating": "3;3;3;6", "confidence": "4;3;3;1", "soundness": "2;2;2;2", "contribution": "3;1;2;2", "presentation": "3;2;2;2", "wc_summary": "72;46;57;51", "wc_strengths": "71;13;34;22", "wc_weaknesses": "223;35;269;47", "wc_questions": "86;12;5;54", "wc_review": "452;106;365;174", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "463;226;777;64", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 56.5, 9.759610647971568 ], "wc_strengths_avg": [ 35.0, 22.079402165819616 ], "wc_weaknesses_avg": [ 143.5, 103.86890776358439 ], "wc_questions_avg": [ 39.25, 32.85859857023729 ], "wc_review_avg": [ 274.25, 139.81125669988094 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 382.5, 268.34911961845523 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9271726499455307, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6aYZVWoVhQIJ:scholar.google.com/&scioq=Adaptive+Hierarchical+Certification+for+Semantic+Segmentation+using+Randomized+Smoothing&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Max-Planck Institute;CISPA Helmholtz Center for Information Security;Saarland University;Max Planck Institute for Informatics", "aff_unique_dep": "Informatics;;;", "aff_unique_url": "https://www.mpi-sws.org;https://www.cispa.de/;https://www.uni-saarland.de;https://mpi-inf.mpg.de", "aff_unique_abbr": "MPI-SWS;CISPA;UdS;MPII", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Saarland;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "DreamTime: An Improved Optimization Strategy for Diffusion-Guided 3D Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19581", "id": "1bAUywYJTU", "author_site": "Yukun Huang, Jianan Wang, Yukai Shi, Boshi Tang, Xianbiao Qi, Lei Zhang", "tldr": "", "abstract": "Text-to-image diffusion models pre-trained on billions of image-text pairs have recently enabled 3D content creation by optimizing a randomly initialized differentiable 3D representation with score distillation. However, the optimization process suffers slow convergence and the resultant 3D models often exhibit two limitations: (a) quality concerns such as missing attributes and distorted shape and texture; (b) extremely low diversity comparing to text-guided image synthesis. In this paper, we show that the conflict between the 3D optimization process and uniform timestep sampling in score distillation is the main reason for these limitations. To resolve this conflict, we propose to prioritize timestep sampling with monotonically non-increasing functions, which aligns the 3D optimization process with the sampling process of diffusion model. Extensive experiments show that our simple redesign significantly improves 3D content creation with faster convergence, better quality and diversity.", "keywords": "Score Distillation;3D Content Creation;Diffusion Model", "primary_area": "generative models", "supplementary_material": "", "author": "Yukun Huang;Jianan Wang;Yukai Shi;Boshi Tang;Xianbiao Qi;Lei Zhang", "authorids": "~Yukun_Huang1;~Jianan_Wang2;~Yukai_Shi3;~Boshi_Tang1;~Xianbiao_Qi2;~Lei_Zhang23", "gender": "M;F;;M;M;M", "homepage": ";https://scholar.google.com/citations?user=mt5mvZ8AAAAJ&hl=en;https://shiyukai26.github.io/info/;https://github.com/TangYucopper;https://www.linkedin.com/in/xianbiao-qi-39617727/;https://www.leizhang.org/", "dblp": "186/1316;49/6053,;;;118/3741;z/LeiZhang", "google_scholar": "lHb5gzoAAAAJ;mt5mvZ8AAAAJ;oQXfkSQAAAAJ;;odjSydQAAAAJ;fIlGZToAAAAJ", "orcid": "0000-0002-5322-2884;;;;;", "linkedin": ";;;;;", "or_profile": "~Yukun_Huang1;~Jianan_Wang2;~Yukai_Shi3;~Boshi_Tang1;~Xianbiao_Qi2;~Lei_Zhang1", "aff": "University of Hong Kong;International Digital Economy Academy (IDEA);Tsinghua University;Tsinghua University;International Digital Economy Academy;International Digital Economy Academy", "aff_domain": "hku.hk;idea.edu.cn;mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;idea.edu.cn;idea.edu.cn", "position": "Postdoc;Researcher;PhD student;MS student;Researcher;Chief Scientist", "bibtex": "@inproceedings{\nhuang2024dreamtime,\ntitle={DreamTime: An Improved Optimization Strategy for Diffusion-Guided 3D Generation},\nauthor={Yukun Huang and Jianan Wang and Yukai Shi and Boshi Tang and Xianbiao Qi and Lei Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1bAUywYJTU}\n}", "github": "", "project": "", "reviewers": "zkfR;5Rpq;DnQ9;iLxc", "pdf_size": 9084391, "rating": "3;6;6;8", "confidence": "4;5;4;4", "soundness": "3;3;4;3", "contribution": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "112;131;61;67", "wc_strengths": "24;74;115;110", "wc_weaknesses": "152;85;99;104", "wc_questions": "26;216;37;26", "wc_review": "314;506;312;307", "wc_reply_reviewers": "0;0;0;16", "wc_reply_authors": "549;626;212;226", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 92.75, 29.600464523382062 ], "wc_strengths_avg": [ 80.75, 36.38251640554842 ], "wc_weaknesses_avg": [ 110.0, 25.228951623085727 ], "wc_questions_avg": [ 76.25, 80.80957554646602 ], "wc_review_avg": [ 359.75, 84.47595811827173 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 403.25, 186.31609565466962 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.08084520834544431, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5988579552726499121&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=1bAUywYJTU", "pdf": "https://openreview.net/pdf?id=1bAUywYJTU", "email": "hku.hk;idea.edu.cn;mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;idea.edu.cn;idea.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;2;1;1", "aff_unique_norm": "University of Hong Kong;International Digital Economy Academy;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hku.hk;;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HKU;IDEA;THU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "I-PHYRE: Interactive Physical Reasoning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19580", "id": "1bbPQShCT2", "author_site": "Shiqian Li, Kewen Wu, Chi Zhang, Yixin Zhu", "tldr": "", "abstract": "Current evaluation protocols predominantly assess physical reasoning in stationary scenes, creating a gap in evaluating agents' abilities to interact with dynamic events. While contemporary methods allow agents to modify initial scene configurations and observe consequences, they lack the capability to interact with events in real time. To address this, we introduce I-PHYRE, a framework that challenges agents to simultaneously exhibit intuitive physical reasoning, multi-step planning, and in-situ intervention. Here, intuitive physical reasoning refers to a quick, approximate understanding of physics to address complex problems; multi-step denotes the need for extensive sequence planning in I-PHYRE, considering each intervention can significantly alter subsequent choices; and in-situ implies the necessity for timely object manipulation within a scene, where minor timing deviations can result in task failure. We formulate four game splits to scrutinize agents' learning and generalization of essential principles of interactive physical reasoning, fostering learning through interaction with representative scenarios. Our exploration involves three planning strategies and examines several supervised and reinforcement agents' zero-shot generalization proficiency on I-PHYRE. The outcomes highlight a notable gap between existing learning algorithms and human performance, emphasizing the imperative for more research in enhancing agents with interactive physical reasoning capabilities. The environment and baselines will be made publicly available.", "keywords": "Intuitive physics;physical reasoning", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/bf4202ce3dfc8cb3a1fdecda0c659cf93a36497d.zip", "author": "Shiqian Li;Kewen Wu;Chi Zhang;Yixin Zhu", "authorids": "~Shiqian_Li1;~Kewen_Wu2;~Chi_Zhang12;~Yixin_Zhu1", "gender": ";F;;M", "homepage": ";https://github.com/k101w;;https://yzhu.io/", "dblp": ";20/9169-4;;91/1103-1.html", "google_scholar": ";9zPBJE4AAAAJ;;qG9l6JEAAAAJ", "orcid": ";;;0000-0001-7024-1545", "linkedin": ";;;", "or_profile": "~Shiqian_Li1;~Kewen_Wu2;~Chi_Zhang12;~Yixin_Zhu1", "aff": ";Carnegie Mellon University;;Peking University", "aff_domain": ";andrew.cmu.edu;;pku.edu.cn", "position": ";MS student;;Assistant Professor", "bibtex": "@inproceedings{\nli2024iphyre,\ntitle={I-{PHYRE}: Interactive Physical Reasoning},\nauthor={Shiqian Li and Kewen Wu and Chi Zhang and Yixin Zhu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1bbPQShCT2}\n}", "github": "", "project": "", "reviewers": "PPjJ;p9BE;BYPY;yzoQ", "pdf_size": 1092524, "rating": "6;6;6;8", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "contribution": "3;2;3;3", "presentation": "3;3;2;3", "wc_summary": "142;45;199;106", "wc_strengths": "96;48;256;126", "wc_weaknesses": "338;109;680;143", "wc_questions": "151;3;98;95", "wc_review": "727;205;1233;470", "wc_reply_reviewers": "97;17;0;24", "wc_reply_authors": "784;149;404;230", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 123.0, 55.924055646921744 ], "wc_strengths_avg": [ 131.5, 77.07626093681505 ], "wc_weaknesses_avg": [ 317.5, 226.79781744981585 ], "wc_questions_avg": [ 86.75, 53.237087636346146 ], "wc_review_avg": [ 658.75, 379.4524838500863 ], "wc_reply_reviewers_avg": [ 34.5, 37.12478956169314 ], "wc_reply_authors_avg": [ 391.75, 244.48964702007322 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9770961054998957492&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=1bbPQShCT2", "pdf": "https://openreview.net/pdf?id=1bbPQShCT2", "email": ";andrew.cmu.edu;;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;http://www.pku.edu.cn", "aff_unique_abbr": "CMU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "ReSimAD: Zero-Shot 3D Domain Transfer for Autonomous Driving with Source Reconstruction and Target Simulation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19579", "id": "1d2cLKeNgY", "author_site": "Bo Zhang, Xinyu Cai, Jiakang Yuan, Donglin Yang, Jianfei Guo, Xiangchao Yan, Renqiu Xia, Botian Shi, Min Dou, Tao Chen, Si Liu, Junchi Yan, Yu Qiao", "tldr": "", "abstract": "Domain shifts such as sensor type changes and geographical situation variations are prevalent in Autonomous Driving (AD), which poses a challenge since AD model relying on the previous domain knowledge can be hardly directly deployed to a new domain without additional costs. In this paper, we provide a new perspective and approach of alleviating the domain shifts, by proposing a Reconstruction-Simulation-Perception (ReSimAD) scheme. Specifically, the implicit reconstruction process is based on the knowledge from the previous old domain, aiming to convert the domain-related knowledge into domain-invariant representations, e.g., 3D scene-level meshes. Besides, the point clouds simulation process of multiple new domains is conditioned on the above reconstructed 3D meshes, where the target-domain-like simulation samples can be obtained, thus reducing the cost of collecting and annotating new-domain data for the subsequent perception process. For experiments, we consider different cross-domain situations such as Waymo-to-KITTI, Waymo-to-nuScenes, etc, to verify the zero-shot target-domain perception using ReSimAD. Results demonstrate that our method is beneficial to boost the domain generalization ability, even promising for 3D pre-training. Code and simulated points are available at: https://github.com/PJLab-ADG/3DTrans", "keywords": "Autonomous Driving;3D Domain Transfer;Zero-shot 3D Detection", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/84f21850a271ea5b6dcfea958210801e97bfa51a.zip", "author": "Bo Zhang;Xinyu Cai;Jiakang Yuan;Donglin Yang;Jianfei Guo;Xiangchao Yan;Renqiu Xia;Botian Shi;Min Dou;Tao Chen;Si Liu;Junchi Yan;Yu Qiao", "authorids": "~Bo_Zhang17;~Xinyu_Cai2;~Jiakang_Yuan1;~Donglin_Yang3;~Jianfei_Guo1;~Xiangchao_Yan1;~Renqiu_Xia2;~Botian_Shi1;~Min_Dou1;~Tao_Chen6;~Si_Liu5;~Junchi_Yan2;~Yu_Qiao1", "gender": "M;;M;M;M;;;M;M;M;F;;", "homepage": "https://bobrown.github.io/boZhang.github.io/;;https://jiakangyuan.github.io/;https://github.com/puffyyy;https://ventusff.github.io;https://github.com/sky-fly97;;;;https://eetchen.github.io/;https://colalab.net;;", "dblp": "36/2259-69;;323/7363;;305/7388.html;314/2496.html;;245/8742;;69/510-3;60/7642;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=zh-CN;;MJb2_wYAAAAJ;0mMk6PMAAAAJ;;K0PpvLkAAAAJ;;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;", "orcid": "0000-0001-8052-782X;0000-0001-8500-9300;;;0000-0002-5838-679X;;;0000-0003-3677-7252;;;0000-0002-9180-2935;;", "linkedin": ";;;;;;;friskit/;%E6%B0%91-%E7%AA%A6-a34b17a3;;;;", "or_profile": "~Bo_Zhang17;~Xinyu_Cai2;~Jiakang_Yuan1;~Donglin_Yang3;~Jianfei_Guo1;~Xiangchao_Yan1;~Renqiu_Xia2;~Botian_Shi1;~Min_Dou1;~Tao_Chen6;~Si_Liu5;~Junchi_Yan2;~Yu_Qiao1", "aff": "Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;;Shanghai AI Lab;Shanghai AI Laboratory;Fudan University;Beihang University;;", "aff_domain": "pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;pjlab.org.cn;pjlab.org.cn;fudan.edu.cn;buaa.edu.cn;;", "position": "Researcher;Researcher;Intern;Intern;Researcher;Researcher;;Researcher;Researcher;Full Professor;Full Professor;;", "bibtex": "@inproceedings{\nzhang2024resimad,\ntitle={ReSim{AD}: Zero-Shot 3D Domain Transfer for Autonomous Driving with Source Reconstruction and Target Simulation},\nauthor={Bo Zhang and Xinyu Cai and Jiakang Yuan and Donglin Yang and Jianfei Guo and Xiangchao Yan and Renqiu Xia and Botian Shi and Min Dou and Tao Chen and Si Liu and Junchi Yan and Yu Qiao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1d2cLKeNgY}\n}", "github": "", "project": "", "reviewers": "bhEL;Fz1j;kWr7;7ZCS", "pdf_size": 12391706, "rating": "5;6;6;6", "confidence": "4;4;3;3", "soundness": "2;3;2;3", "contribution": "2;3;2;3", "presentation": "1;2;1;3", "wc_summary": "90;68;122;84", "wc_strengths": "17;51;17;43", "wc_weaknesses": "167;27;276;266", "wc_questions": "4;141;2;6", "wc_review": "278;287;417;399", "wc_reply_reviewers": "86;0;120;118", "wc_reply_authors": "1816;897;778;1259", "reply_reviewers": "1;0;2;1", "reply_authors": "4;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 91.0, 19.621416870348583 ], "wc_strengths_avg": [ 32.0, 15.264337522473747 ], "wc_weaknesses_avg": [ 184.0, 100.15737616371547 ], "wc_questions_avg": [ 38.25, 59.33959470707565 ], "wc_review_avg": [ 345.25, 63.15209814408386 ], "wc_reply_reviewers_avg": [ 81.0, 48.67237409455183 ], "wc_reply_authors_avg": [ 1187.5, 403.7960500054452 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1073026491614599340&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=1d2cLKeNgY", "pdf": "https://openreview.net/pdf?id=1d2cLKeNgY", "email": "pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;pjlab.org.cn;pjlab.org.cn;fudan.edu.cn;buaa.edu.cn;;", "author_num": 13, "aff_unique_index": "0;0;1;0;0;1;2;1;3;4", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Shanghai AI Lab;Fudan University;Beihang University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.shailab.org/;https://www.shanghai-ai-lab.com;https://www.shanghaiailab.com;https://www.fudan.edu.cn;http://www.buaa.edu.cn/", "aff_unique_abbr": "Shanghai AI Lab;SAIL;SAIL;Fudan;BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1dY11GyZdp", "title": "Signed-Binarization: Unlocking Efficiency Through Repetition-Sparsity Trade-Off", "track": "main", "status": "Reject", "tldr": "", "abstract": "Efficient inference of Deep Neural Networks (DNNs) on resource-constrained edge devices is essential. Quantization and sparsity are key algorithmic techniques that translate to repetition and sparsity within tensors at the hardware-software interface. This paper introduces the concept of repetition-sparsity trade-off that helps explain computational efficiency during inference. We propose Signed Binarization, a unified co-design framework that synergistically integrates hardware-software systems, quantization functions, and representation learning techniques to address this trade-off. Our results demonstrate that Signed Binarization is more accurate than binary models with the same number of non-zero weights. Detailed analysis indicates that signed binarization generates a smaller distribution of effectual (non-zero) parameters nested within a larger distribution of total parameters, both of the same type, for a DNN block. Finally, our approach achieves a 26\\% speedup on real hardware, doubles energy efficiency, and reduces density by 2.8x compared to binary methods for ResNet 18, presenting an alternative solution for deploying efficient models in resource-limited environments.", "keywords": "Representation Learning;Quantization;DNN Inference", "primary_area": "infrastructure, software libraries, hardware, etc.", "supplementary_material": "", "author": "Sachit Kuhar;Yash Jain;Alexey Tumanov;Sujan Kumar Gonugondla", "authorids": "~Sachit_Kuhar1;~Yash_Jain1;~Alexey_Tumanov1;~Sujan_Kumar_Gonugondla1", "gender": "M;M;;", "homepage": "https://sachitkuhar.github.io/;https://yash-jain.com;;https://gsujankumar.github.io", "dblp": "240/0924;255/2617;;166/6408.html", "google_scholar": "X8slYZEAAAAJ;Fr6QHDsAAAAJ;;F_ud9E4AAAAJ", "orcid": "0000-0002-5739-013X;0000-0002-5175-1352;;0000-0003-4743-6461", "linkedin": ";jinga-lala/;;sujan-kumar-gonugondla-ab6787142/", "or_profile": "~Sachit_Kuhar1;~Yash_Jain1;~Alexey_Tumanov1;~Sujan_Kumar_Gonugondla1", "aff": "Amazon;Microsoft;;Amazon", "aff_domain": "amazon.com;microsoft.com;;amazon.com", "position": "Researcher;Researcher;;Researcher", "bibtex": "@misc{\nkuhar2024signedbinarization,\ntitle={Signed-Binarization: Unlocking Efficiency Through Repetition-Sparsity Trade-Off},\nauthor={Sachit Kuhar and Yash Jain and Alexey Tumanov and Sujan Kumar Gonugondla},\nyear={2024},\nurl={https://openreview.net/forum?id=1dY11GyZdp}\n}", "github": "", "project": "", "reviewers": "ZM5v;pHUD;Ffqa", "site": "https://openreview.net/forum?id=1dY11GyZdp", "pdf_size": 1242248, "rating": "3;5;6", "confidence": "4;3;3", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "2;2;2", "wc_summary": "53;57;118", "wc_strengths": "10;23;138", "wc_weaknesses": "300;101;296", "wc_questions": "8;85;30", "wc_review": "371;266;582", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1545;883;598", "reply_reviewers": "0;0;0", "reply_authors": "3;2;2", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 76.0, 29.743346594938952 ], "wc_strengths_avg": [ 57.0, 57.52101065407897 ], "wc_weaknesses_avg": [ 232.33333333333334, 92.88104698424156 ], "wc_questions_avg": [ 41.0, 32.38312317653544 ], "wc_review_avg": [ 406.3333333333333, 131.40353453727525 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1008.6666666666666, 396.6915958552969 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9449111825230683, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:msxiS4RewesJ:scholar.google.com/&scioq=Signed-Binarization:+Unlocking+Efficiency+Through+Repetition-Sparsity+Trade-Off&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Amazon;Microsoft", "aff_unique_dep": "Amazon.com, Inc.;Microsoft Corporation", "aff_unique_url": "https://www.amazon.com;https://www.microsoft.com", "aff_unique_abbr": "Amazon;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "1djnGJnaiy", "title": "Unsupervised Representation Learning of Brain Activity via Bridging Voxel Activity and Functional Connectivity", "track": "main", "status": "Reject", "tldr": "", "abstract": "Effective brain representation learning is a key step toward revealing the understanding of cognitive processes and unlocking detecting and potential therapeutic interventions for neurological diseases/disorders. Existing studies have focused on either (1) voxel-level activity, where only a single beta weight for each voxel (i.e., aggregation of voxel activity over a time window) is considered, missing their temporal dynamics, or (2) functional connectivity of the brain in the level of region of interests, missing voxel-level activities. In this paper, we bridge this gap and design BrainMixer, an unsupervised learning framework that effectively utilizes both functional connectivity and associated time series of voxels to learn voxel-level representation in an unsupervised manner. BrainMixer employs two simple yet effective MLP-based encoders to simultaneously learn the dynamics of voxel-level signals and their functional correlations. To encode voxel activity, BrainMixer fuses information across both time and voxel dimensions via a dynamic self-attention mechanism. To learn the structure of the functional connectivity graph, BrainMixer presents a temporal graph patching and encodes each patch by combining its nodes' features via a new adaptive temporal pooling. Our experiments show that BrainMixer attains outstanding performance and outperforms 13 baselines in different downstream tasks and experimental setups.", "keywords": "Functional Connectivity;Graph Representation Learning;Anomaly Detection;Brain Representation Learning", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Ali Behrouz;Parsa Delavari;Farnoosh Hashemi", "authorids": "~Ali_Behrouz1;~Parsa_Delavari1;~Farnoosh_Hashemi1", "gender": "M;M;F", "homepage": "https://Abehrouz.github.io;;https://farnooshha.github.io/", "dblp": "220/4163;;318/9574", "google_scholar": "UbwVuqIAAAAJ;clef-H4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "ali-behrouz-506aa2127;;farnoosh-hashemi-a48328123/", "or_profile": "~Ali_Behrouz1;~Parsa_Delavari1;~Farnoosh_Hashemi1", "aff": "Cornell University;University of British Columbia;Cornell University", "aff_domain": "cornell.edu;ubc.ca;cornell.edu", "position": "PhD student;PhD student;PhD student", "bibtex": "@misc{\nbehrouz2024unsupervised,\ntitle={Unsupervised Representation Learning of Brain Activity via Bridging Voxel Activity and Functional Connectivity},\nauthor={Ali Behrouz and Parsa Delavari and Farnoosh Hashemi},\nyear={2024},\nurl={https://openreview.net/forum?id=1djnGJnaiy}\n}", "github": "", "project": "", "reviewers": "JxFN;v3MT;WgDG", "site": "https://openreview.net/forum?id=1djnGJnaiy", "pdf_size": 5259198, "rating": "3;6;6", "confidence": "4;3;4", "soundness": "2;3;2", "contribution": "2;3;3", "presentation": "1;3;2", "wc_summary": "103;81;101", "wc_strengths": "30;34;21", "wc_weaknesses": "630;579;541", "wc_questions": "2;157;11", "wc_review": "765;851;674", "wc_reply_reviewers": "0;33;745", "wc_reply_authors": "2775;1815;3077", "reply_reviewers": "0;1;3", "reply_authors": "5;3;5", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 95.0, 9.93310961716756 ], "wc_strengths_avg": [ 28.333333333333332, 5.436502143433363 ], "wc_weaknesses_avg": [ 583.3333333333334, 36.46307112073194 ], "wc_questions_avg": [ 56.666666666666664, 71.04145894397784 ], "wc_review_avg": [ 763.3333333333334, 72.26955713776645 ], "wc_reply_reviewers_avg": [ 259.3333333333333, 343.6823468779791 ], "wc_reply_authors_avg": [ 2555.6666666666665, 538.0466729032178 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 4.333333333333333, 0.9428090415820634 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6364363259821404592&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Cornell University;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.ubc.ca", "aff_unique_abbr": "Cornell;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Canada" }, { "id": "1g77zRaJq0", "title": "Text2NKG: Fine-Grained N-ary Relation Extraction for N-ary relational Knowledge Graph Construction", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Beyond traditional binary relational facts, n-ary relational knowledge graphs (NKGs) are comprised of n-ary relational facts containing more than two entities, which are closer to real-world facts with broader applications. However, the construction of NKGs still significantly relies on manual labor, and n-ary relation extraction still remains at a course-grained level, which is always in a single schema and fixed arity of entities. To address these restrictions, we propose Text2NKG, a novel fine-grained n-ary relation extraction framework for n-ary relational knowledge graph construction. We introduce a span-tuple classification approach with hetero-ordered merging to accomplish fine-grained n-ary relation extraction in different arity. Furthermore, Text2NKG supports four typical NKG schemas: hyper-relational schema, event-based schema, role-based schema, and hypergraph-based schema, with high flexibility and practicality. Experimental results demonstrate that Text2NKG outperforms the previous state-of-the-art model by nearly 20\\% points in the $F_1$ scores on the fine-grained n-ary relation extraction benchmark in the hyper-relational schema. Our code and datasets are publicly available.", "keywords": "N-ary Relation Extraction;N-ary relational Knowledge Graph;Knowledge Graph Construction", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/a2434b97e1785d88eaefa40b23a48dac5cd1aecd.zip", "author": "Haoran Luo;Haihong E;Yuhao Yang;Tianyu Yao;Yikai Guo;Zichen Tang;Wentai Zhang;Kaiyang Wan;Shiyao Peng;Meina Song;Wei Lin", "authorids": "~Haoran_Luo1;~Haihong_E1;~Yuhao_Yang1;~Tianyu_Yao1;~Yikai_Guo2;~Zichen_Tang1;~Wentai_Zhang2;~Kaiyang_Wan1;~Shiyao_Peng2;~Meina_Song1;~Wei_Lin13", "gender": "M;F;M;M;M;M;M;M;F;M;", "homepage": "https://lhrlab.github.io/;https://teacher.bupt.edu.cn/ehaihong/zh_CN/index.htm;;https://github.com/yao12315;https://github.com/GYK-CASIC;https://github.com/StarLight24;;https://github.com/coverdpsy;http://teacher.bupt.edu.cn/songmeina/;http://www.inspur.com;", "dblp": "227/5902-1.html;43/10222.html;;324/5213;334/4154;264/0465;324/4644;;95/4440;https://dblp.uni-trier.de/pid/99/2649;", "google_scholar": "https://scholar.google.com.hk/citations?user=Q9Nv9mcAAAAJ;https://scholar.google.com.hk/citations?user=J4akh64AAAAJ;T0arNqgAAAAJ;M3wrJAwAAAAJ;https://scholar.google.com.hk/citations?user=4LxmyZAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;v_faxAsAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-2727-0361;;;;0000-0003-0345-1686;0000-0002-0244-4970;;;0000-0001-6626-9932;;", "linkedin": "haoran-luo-88a96b255/;;;;https://www.linkedin.cn/incareer/in/ACoAAD5htzQBKWZKy68SvRuuztB4LJDhIKMdM1o;;;;;;", "or_profile": "~Haoran_Luo1;~Haihong_E1;~Yuhao_Yang1;~Tianyu_Yao1;~Yikai_Guo2;~Zichen_Tang1;~Kaiyang_Wan1;~Shiyao_Peng2;~Meina_Song1;~Wei_Lin13;~wentai_zhang1", "aff": "Nanyang Technological University;Beijing University of Post and Telecommunication;Beihang University;Beijing University of Posts and Telecommunications;Beijing Institute of Computer Technology and Application;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;China University of Petroleum-Beijing at Karamay;Beijing University of Posts and Telecommunications;;", "aff_domain": "ntu.edu.sg;bupt.edu.cn;buaa.edu.cn;bupt.edu.cn;casic.com.cn;bupt.edu.cn;bupt.edu.cn;cupk.edu.cn;bupt.edu.cn;;", "position": "Intern;Full Professor;MS student;MS student;PhD student;MS student;Undergrad student;Undergrad student;Full Professor;;", "bibtex": "@misc{\nluo2024textnkg,\ntitle={Text2{NKG}: Fine-Grained N-ary Relation Extraction for N-ary relational Knowledge Graph Construction},\nauthor={Haoran Luo and Haihong E and Yuhao Yang and Tianyu Yao and Yikai Guo and Zichen Tang and Wentai Zhang and Kaiyang Wan and Shiyao Peng and Meina Song and Wei Lin},\nyear={2024},\nurl={https://openreview.net/forum?id=1g77zRaJq0}\n}", "github": "", "project": "", "reviewers": "ZHPg;vB44;JaVm;z8Bh", "site": "https://openreview.net/forum?id=1g77zRaJq0", "pdf_size": 2177425, "rating": "3;3;5;5", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "contribution": "2;2;3;2", "presentation": "2;1;2;2", "wc_summary": "98;184;155;40", "wc_strengths": "49;20;180;84", "wc_weaknesses": "63;88;607;146", "wc_questions": "5;136;265;37", "wc_review": "215;428;1207;307", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 119.25, 55.23302906775981 ], "wc_strengths_avg": [ 83.25, 60.280075481040996 ], "wc_weaknesses_avg": [ 226.0, 222.02139536540167 ], "wc_questions_avg": [ 110.75, 101.30739114200898 ], "wc_review_avg": [ 539.25, 392.85644642795415 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13416463536580625484&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;1;3;1;1;4;1", "aff_unique_norm": "Nanyang Technological University;Beijing University of Posts and Telecommunications;Beihang University;Beijing Institute of Computer Technology and Application;China University of Petroleum", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ntu.edu.sg;http://www.bupt.edu.cn/;http://www.buaa.edu.cn/;;http://www.cup.edu.cn", "aff_unique_abbr": "NTU;BUPT;BUAA;;CUP", "aff_campus_unique_index": "1;1;1;1;1;1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1", "aff_country_unique": "Singapore;China" }, { "id": "1gkePTsAWf", "title": "Self-Taught Optimizer (STOP): Recursively Self-Improving Code Generation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Several recent advances in AI systems (e.g., Tree-of-Thoughts and Program-Aided Language Models) solve problems by providing a \"scaffolding\" program that structures multiple calls to language models to generate better outputs. A scaffolding program is written in a programming language such as Python. In this work, we use a language-model-infused scaffolding program to improve itself. We start with a seed \"improver\" that improves an input program according to a given utility function by querying a language model several times and returning the best solution. We then run this seed improver to improve itself. Across a small set of downstream tasks, the resulting improved improver generates programs with significantly better performance than its seed improver. Afterward, we analyze the variety of self-improvement strategies proposed by the language model, including beam search, genetic algorithms, and simulated annealing. Since the language models themselves are not altered, this is not full recursive self-improvement. Nonetheless, it demonstrates that a modern language model, GPT-4 in our proof-of-concept experiments, is capable of writing code that can call itself to improve itself. We critically consider concerns around the development of self-improving technologies and evaluate the frequency with which the generated code bypasses a sandbox.", "keywords": "reasoning;language models;self-improvement;code generation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Eric Zelikman;Eliana Lorch;Lester Mackey;Adam Tauman Kalai", "authorids": "~Eric_Zelikman1;eliana@lorien.ai;~Lester_Mackey1;~Adam_Tauman_Kalai1", "gender": "M;;M;", "homepage": "https://zelikman.me;;https://stanford.edu/~lmackey;", "dblp": "217/2378;;05/2961;", "google_scholar": "V5B8dSUAAAAJ;;erv7TP0AAAAJ;", "orcid": ";;0000-0002-1102-0387;", "linkedin": "ericzelikman/;;lester-mackey-5902909;", "or_profile": "~Eric_Zelikman1;eliana@lorien.ai;~Lester_Mackey1;~Adam_Tauman_Kalai1", "aff": "Stanford University;;Microsoft Research New England;", "aff_domain": "stanford.edu;;microsoft.com;", "position": "PhD student;;Principal Researcher;", "bibtex": "@misc{\nzelikman2024selftaught,\ntitle={Self-Taught Optimizer ({STOP}): Recursively Self-Improving Code Generation},\nauthor={Eric Zelikman and Eliana Lorch and Lester Mackey and Adam Tauman Kalai},\nyear={2024},\nurl={https://openreview.net/forum?id=1gkePTsAWf}\n}", "github": "", "project": "", "reviewers": "UnQ9;VZPP;X5AY;443q;yFD6", "site": "https://openreview.net/forum?id=1gkePTsAWf", "pdf_size": 600577, "rating": "5;6;6;6;8", "confidence": "4;2;4;4;4", "soundness": "2;2;3;3;3", "contribution": "3;2;3;3;4", "presentation": "4;2;3;2;4", "wc_summary": "107;105;78;83;111", "wc_strengths": "53;23;97;67;189", "wc_weaknesses": "224;300;66;255;12", "wc_questions": "38;59;51;84;26", "wc_review": "422;487;292;489;338", "wc_reply_reviewers": "0;91;0;108;7", "wc_reply_authors": "776;1412;654;670;242", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.6, 0.8000000000000002 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 96.8, 13.541048703848606 ], "wc_strengths_avg": [ 85.8, 56.83097746827869 ], "wc_weaknesses_avg": [ 171.4, 112.08139899198261 ], "wc_questions_avg": [ 51.6, 19.724096937502615 ], "wc_review_avg": [ 405.6, 79.15200565999575 ], "wc_reply_reviewers_avg": [ 41.2, 47.97249211787938 ], "wc_reply_authors_avg": [ 750.8, 377.517363839069 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.10206207261596571, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7905804306816397467&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-new-england", "aff_unique_abbr": "Stanford;MSR NE", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;New England", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "1hLFLNu4uy", "title": "Split and Merge: Aligning Position Biases in Large Language Model based Evaluators", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) have shown promise as automated evaluators for assessing the quality of answers generated by AI systems. However, these LLM-based evaluators exhibit position bias, or inconsistency, when used to evaluate candidate answers in pairwise comparisons, favoring either the first or second answer regardless of content. To address this limitation, we propose PORTIA, an\nalignment-based system designed to mimic human comparison strategies to calibrate position bias in a lightweight yet effective manner. Specifically, PORTIA splits the answers into multiple segments, aligns similar content across candidate answers, and then merges them back into a single prompt for evaluation by LLMs. We conducted extensive experiments with six diverse LLMs to evaluate 11,520 answer pairs. Our results show that PORTIA markedly enhances the consistency rates for all the models and comparison forms tested, achieving an average relative improvement of 47.46%. Remarkably, PORTIA enables less advanced GPT models to achieve 88% agreement with the state-of-the-art GPT-4 model at just 10% of the cost. Furthermore, it rectifies around 80% of the position bias instances within the\nGPT-4 model, elevating its consistency rate up to 98%. Subsequent human evaluations indicate that the PORTIA-enhanced GPT-3.5 model can even surpass the standalone GPT-4 in terms of alignment with human evaluators. These findings highlight PORTIA\u2019s ability to correct position bias, improve LLM consistency, and boost performance while keeping cost-efficiency. This represents a valuable step\ntoward a more reliable and scalable use of LLMs for automated evaluations across diverse applications.", "keywords": "Large language models;Alignment;Consistency", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/6ff851bfb65da08eef4cc8051cf1210a71b91aad.zip", "author": "Zongjie Li;Chaozheng Wang;Pingchuan Ma;Daoyuan Wu;Tianxiang Li;Shuai Wang;Cuiyun Gao;Yang Liu", "authorids": "~Zongjie_Li1;~Chaozheng_Wang1;~Pingchuan_Ma4;~Daoyuan_Wu1;~Tianxiang_Li2;~Shuai_Wang7;~Cuiyun_Gao1;~Yang_Liu36", "gender": "Not Specified;M;;;M;M;F;M", "homepage": ";;;https://daoyuan14.github.io/;http://tianxiang.net;https://home.cse.ust.hk/~shuaiw/;https://cuiyungao.github.io/;https://personal.ntu.edu.sg/yangliu/", "dblp": ";217/1869;;https://dblp.uni-trier.de/pid/144/7556.html;;42/1503-11;;51/3710-3", "google_scholar": "https://scholar.google.com.hk/citations?user=Sd8mmE0AAAAJ;gx5zRYIAAAAJ;;WtO-bN8AAAAJ;;;9I2hTmQAAAAJ;https://scholar.google.com.sg/citations?hl=en", "orcid": ";;;;;;;0000-0001-7300-9215", "linkedin": ";;;;;;;", "or_profile": "~Zongjie_Li1;~Chaozheng_Wang1;~Pingchuan_Ma4;~Daoyuan_Wu1;~Tianxiang_Li2;~Shuai_Wang7;~Cuiyun_Gao1;~Yang_Liu36", "aff": "Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Department of Computer Science and Engineering, The Chinese University of Hong Kong;;Nanyang Technological University;;Hong Kong University of Science and Technology;;Nanyang Technological University", "aff_domain": "cse.ust.hk;cse.cuhk.edu.hk;;ntu.edu.sg;;hkust.edu;;ntu.edu.sg", "position": "PhD student;PhD student;;Senior Research Fellow;;Associate Professor;;Full Professor", "bibtex": "@misc{\nli2024split,\ntitle={Split and Merge: Aligning Position Biases in Large Language Model based Evaluators},\nauthor={Zongjie Li and Chaozheng Wang and Pingchuan Ma and Daoyuan Wu and Tianxiang Li and Shuai Wang and Cuiyun Gao and Yang Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=1hLFLNu4uy}\n}", "github": "", "project": "", "reviewers": "SWLn;cPgm;YB2D;RHzc", "site": "https://openreview.net/forum?id=1hLFLNu4uy", "pdf_size": 642987, "rating": "3;5;6;6", "confidence": "5;4;4;3", "soundness": "1;3;3;2", "contribution": "2;3;3;2", "presentation": "2;3;3;3", "wc_summary": "160;123;56;86", "wc_strengths": "33;80;20;36", "wc_weaknesses": "312;133;80;288", "wc_questions": "41;100;3;6", "wc_review": "546;436;159;416", "wc_reply_reviewers": "272;67;0;308", "wc_reply_authors": "1900;1253;454;683", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 106.25, 39.06644980030819 ], "wc_strengths_avg": [ 42.25, 22.609455986378798 ], "wc_weaknesses_avg": [ 203.25, 98.9125244850216 ], "wc_questions_avg": [ 37.5, 39.05444917035702 ], "wc_review_avg": [ 389.25, 141.8509340822259 ], "wc_reply_reviewers_avg": [ 161.75, 131.03887781876034 ], "wc_reply_authors_avg": [ 1072.5, 559.3721927303859 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8660254037844386, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=320404609852215416&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Chinese University of Hong Kong;Nanyang Technological University", "aff_unique_dep": "Department of Computer Science and Engineering;Department of Computer Science and Engineering;", "aff_unique_url": "https://www.ust.hk;https://www.cuhk.edu.hk;https://www.ntu.edu.sg", "aff_unique_abbr": "HKUST;CUHK;NTU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "China;Singapore" }, { "id": "1hhja8ZxcP", "title": "Turbulent Flow Simulation using Autoregressive Conditional Diffusion Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Simulating turbulent flows is crucial for a wide range of applications, and machine learning-based solvers are gaining increasing relevance. However, achieving stability when generalizing to longer rollout horizons remains a persistent challenge for learned PDE solvers. We address this challenge by introducing a fully data-driven fluid solver that utilizes an autoregressive rollout based on conditional diffusion models. We show that this approach offers clear advantages in terms of rollout stability compared to other learned baselines. Remarkably, these improvements in stability are achieved without compromising the quality of generated samples, and our model successfully generalizes to flow parameters beyond the training regime. Additionally, the probabilistic nature of the diffusion approach allows for inferring predictions that align with the statistics of the underlying physics. We quantitatively and qualitatively evaluate the performance of our method on a range of challenging scenarios, including incompressible and transonic flows, as well as isotropic turbulence.", "keywords": "turbulent flow;PDEs;numerical simulation;diffusion models;autoregressive models", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/00bf9b95aea315c609939fe20bbf16083220ea6c.zip", "author": "Georg Kohl;Liwei Chen;Nils Thuerey", "authorids": "~Georg_Kohl1;~Liwei_Chen2;~Nils_Thuerey1", "gender": "M;M;M", "homepage": "https://ge.in.tum.de/about/georg-kohl/;;https://ge.in.tum.de", "dblp": "259/1567;;42/478", "google_scholar": "https://scholar.google.de/citations?user=9gVgWocAAAAJ;;https://scholar.google.com.tw/citations?user=GEehwv8AAAAJ", "orcid": "0000-0002-9661-575X;0000-0002-0309-2284;", "linkedin": ";liwei-chen-46557017/;", "or_profile": "~Georg_Kohl1;~Liwei_Chen2;~Nils_Thuerey1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technical University Munich;Technical University Munich", "aff_domain": "tum.de;tum.de;tum.de", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@misc{\nkohl2024turbulent,\ntitle={Turbulent Flow Simulation using Autoregressive Conditional Diffusion Models},\nauthor={Georg Kohl and Liwei Chen and Nils Thuerey},\nyear={2024},\nurl={https://openreview.net/forum?id=1hhja8ZxcP}\n}", "github": "", "project": "", "reviewers": "1NQ6;ySpT;wsiC;JT77", "site": "https://openreview.net/forum?id=1hhja8ZxcP", "pdf_size": 16314951, "rating": "5;5;5;5", "confidence": "5;4;5;3", "soundness": "2;2;2;3", "contribution": "2;2;2;1", "presentation": "3;2;3;3", "wc_summary": "48;57;75;49", "wc_strengths": "33;72;247;61", "wc_weaknesses": "55;284;772;348", "wc_questions": "209;120;221;120", "wc_review": "345;533;1315;578", "wc_reply_reviewers": "0;69;0;0", "wc_reply_authors": "1070;716;1585;904", "reply_reviewers": "0;1;0;0", "reply_authors": "2;1;3;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.25, 10.825317547305483 ], "wc_strengths_avg": [ 103.25, 84.20325112488234 ], "wc_weaknesses_avg": [ 364.75, 259.1325674244748 ], "wc_questions_avg": [ 167.5, 47.68909728648677 ], "wc_review_avg": [ 692.75, 369.73394150388737 ], "wc_reply_reviewers_avg": [ 17.25, 29.877876430563134 ], "wc_reply_authors_avg": [ 1068.75, 323.29968682323215 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2514720713749392884&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Sample-Efficient Learning of POMDPs with Multiple Observations In Hindsight", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19578", "id": "1hsVvgW0rU", "author_site": "Jiacheng Guo, Minshuo Chen, Huan Wang, Caiming Xiong, Mengdi Wang, Yu Bai", "tldr": "", "abstract": "This paper studies the sample-efficiency of learning in Partially Observable Markov Decision Processes (POMDPs), a challenging problem in reinforcement learning that is known to be exponentially hard in the worst-case. Motivated by real-world settings such as loading in game playing, we propose an enhanced feedback model called ``multiple observations in hindsight'', where after each episode of interaction with the POMDP, the learner may collect multiple additional observations emitted from the encountered latent states, but may not observe the latent states themselves. We show that sample-efficient learning under this feedback model is possible for two new subclasses of POMDPs: \\emph{multi-observation revealing POMDPs} and \\emph{distinguishable POMDPs}. Both subclasses generalize and substantially relax \\emph{revealing POMDPs}---a widely studied subclass for which sample-efficient learning is possible under standard trajectory feedback. Notably, distinguishable POMDPs only require the emission distributions from different latent states to be \\emph{different} instead of \\emph{linearly independent} as required in revealing POMDPs.", "keywords": "reinforcement learning theory;POMDPs;partially observable reinforcement learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Jiacheng Guo;Minshuo Chen;Huan Wang;Caiming Xiong;Mengdi Wang;Yu Bai", "authorids": "~Jiacheng_Guo1;~Minshuo_Chen1;~Huan_Wang1;~Caiming_Xiong1;~Mengdi_Wang1;~Yu_Bai1", "gender": ";M;M;M;F;", "homepage": "http://;https://minshuochen.github.io;http://www.cs.yale.edu/homes/wang-huan/;http://cmxiong.com/;http://mwang.princeton.edu;https://yubai.org", "dblp": ";217/1509;70/6155-16.html;80/7282;;03/6325-17.html", "google_scholar": ";qU9WvTgAAAAJ;7NpTttkAAAAJ;vaSdahkAAAAJ;;owqhKD8AAAAJ", "orcid": ";;;;;", "linkedin": ";;huanwangyale/;caiming-xiong-150a1417;;", "or_profile": "~Jiacheng_Guo1;~Minshuo_Chen1;~Huan_Wang1;~Caiming_Xiong1;~Mengdi_Wang1;~Yu_Bai1", "aff": "Princeton University;Princeton University;Salesforce.com;Salesforce Research;Princeton University;Salesforce Research", "aff_domain": "princeton.edu;princeton.edu;salesforce.com;salesforce.com;princeton.edu;salesforce.com", "position": "PhD student;Postdoc;Researcher;Research Scientist;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nguo2024sampleefficient,\ntitle={Sample-Efficient Learning of {POMDP}s with Multiple Observations In Hindsight},\nauthor={Jiacheng Guo and Minshuo Chen and Huan Wang and Caiming Xiong and Mengdi Wang and Yu Bai},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1hsVvgW0rU}\n}", "github": "", "project": "", "reviewers": "jqb5;q33U;se9m;FWjL", "pdf_size": 481945, "rating": "6;6;6;6", "confidence": "4;3;3;4", "soundness": "3;4;2;4", "contribution": "2;3;3;3", "presentation": "3;4;2;3", "wc_summary": "170;35;89;164", "wc_strengths": "111;78;34;54", "wc_weaknesses": "70;102;24;233", "wc_questions": "96;60;61;150", "wc_review": "447;275;208;601", "wc_reply_reviewers": "0;0;56;0", "wc_reply_authors": "282;458;433;1224", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 114.5, 55.90393546075267 ], "wc_strengths_avg": [ 69.25, 28.699956445959984 ], "wc_weaknesses_avg": [ 107.25, 77.71542639656556 ], "wc_questions_avg": [ 91.75, 36.62222685747004 ], "wc_review_avg": [ 382.75, 153.22267293060776 ], "wc_reply_reviewers_avg": [ 14.0, 24.24871130596428 ], "wc_reply_authors_avg": [ 599.25, 366.9300853023638 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15402953603063387946&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=1hsVvgW0rU", "pdf": "https://openreview.net/pdf?id=1hsVvgW0rU", "email": "princeton.edu;princeton.edu;salesforce.com;salesforce.com;princeton.edu;salesforce.com", "author_num": 6, "aff_unique_index": "0;0;1;1;0;1", "aff_unique_norm": "Princeton University;Salesforce", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.salesforce.com", "aff_unique_abbr": "Princeton;Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1iKydVG6pL", "title": "Discovering Mathematical Formulas from Data via LSTM-guided Monte Carlo Tree Search", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Finding a concise and interpretable mathematical formula that accurately describes the relationship between each variable and the predicted value in the data is a crucial task in scientific research, as well as a significant challenge in artificial intelligence. This problem is commonly referred to as symbolic regression, which poses an NP-hard combinatorial optimization problem. Traditional symbolic regression algorithms typically rely on genetic algorithms; however, these approaches are sensitive to hyperparameters and often struggle to fully recover the target expression. To address these limitations, a novel symbolic regression algorithm based on Monte Carlo Tree Search (MCTS) was proposed this year. While this algorithm has shown considerable improvement in recovering target expressions compared to previous methods, it still faces challenges when dealing with complex expressions due to the vast search space involved. Moreover, the lack of guidance during the MCTS expansion process severely hampers its search efficiency. In order to overcome these issues, we propose AlphaSymbol - a new symbolic regression algorithm that combines MCTS with a Long Short-Term Memory network (LSTM). By leveraging LSTM's ability to guide the MCTS expansion process effectively, we enhance the overall search efficiency of MCTS significantly. Next, we utilize the MCTS results to further refine the LSTM network, enhancing its capabilities and providing more accurate guidance for the MCTS process. MCTS and LSTM hand in hand advance together, win-win cooperation until the target expression is successfully determined. We conducted extensive evaluations of AlphaSymbol using 222 expressions sourced from over 10 different symbolic regression datasets. The experimental results demonstrate that AlphaSymbol outperforms existing state-of-the-art algorithms in accurately recovering symbolic expressions both with and without added noise.", "keywords": "Symbolic Regression;Long Short-Term Mem- ory network;Monte Carlo Tree Search;Reinforcement learning.", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/d1f08fa871ba306251908079d750099dbef4b664.pdf", "author": "Yanjie Li;Weijun Li;Lina Yu;Min Wu;Linjun Sun;Jingyi Liu;Wenqiang Li;Meilan Hao", "authorids": "~Yanjie_Li4;~Weijun_Li1;~Lina_Yu1;~Min_Wu5;~Linjun_Sun1;~Jingyi_Liu2;~Wenqiang_Li2;~Meilan_Hao1", "gender": ";M;F;M;M;F;M;", "homepage": ";;;http://lab.semi.ac.cn/ailab/;;;https://github.com/AILWQ;", "dblp": ";;;16/0;;;;", "google_scholar": ";HrzfypUAAAAJ;I8Uc918AAAAJ;wvvyr8UAAAAJ;50WznDAAAAAJ;;b-MGt8gAAAAJ;", "orcid": ";0000-0001-9668-2883;;0000-0001-9475-3975;0000-0002-9287-9467;0000-0002-9710-5006;0000-0003-3286-7445;", "linkedin": ";;;;;;;", "or_profile": "~Yanjie_Li4;~Weijun_Li1;~Lina_Yu1;~Min_Wu5;~Linjun_Sun1;~Jingyi_Liu2;~Wenqiang_Li2;~Meilan_Hao1", "aff": ";Institute of Semiconductors Chinese Academy of Sciences;Institute of Semiconductors, Chinese Academy of Sciences;Institute of Semiconductors, Chinese Academy of Sciences;Institute of Semiconductors, Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;", "aff_domain": ";ucac.ac.cn;semi.ac.cn;semi.ac.cn;semi.ac.cn;ucas.edu;ucas.ac.cn;", "position": ";Full Professor;Associate Professor;Assistant Professor;Postdoc;PhD student;MS student;", "bibtex": "@misc{\nli2024discovering,\ntitle={Discovering Mathematical Formulas from Data via {LSTM}-guided Monte Carlo Tree Search},\nauthor={Yanjie Li and Weijun Li and Lina Yu and Min Wu and Linjun Sun and Jingyi Liu and Wenqiang Li and Meilan Hao},\nyear={2024},\nurl={https://openreview.net/forum?id=1iKydVG6pL}\n}", "github": "", "project": "", "reviewers": "SBg1;YgjN;27mA;Fmza", "site": "https://openreview.net/forum?id=1iKydVG6pL", "pdf_size": 1317617, "rating": "3;3;5;6", "confidence": "4;4;3;5", "soundness": "1;2;3;3", "contribution": "1;2;3;2", "presentation": "1;1;2;3", "wc_summary": "20;18;77;43", "wc_strengths": "16;21;57;18", "wc_weaknesses": "105;321;342;21", "wc_questions": "263;40;4;51", "wc_review": "404;400;480;133", "wc_reply_reviewers": "287;26;56;22", "wc_reply_authors": "2163;658;735;243", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 39.5, 23.77498685593748 ], "wc_strengths_avg": [ 28.0, 16.837458240482736 ], "wc_weaknesses_avg": [ 197.25, 137.69599667383218 ], "wc_questions_avg": [ 89.5, 101.66734972448135 ], "wc_review_avg": [ 354.25, 131.65556387787035 ], "wc_reply_reviewers_avg": [ 97.75, 110.05084052382335 ], "wc_reply_authors_avg": [ 949.75, 725.0356456754384 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.2721655269759087, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15681812717127003335&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Semiconductors;", "aff_unique_url": "http://www.semi.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Simple Minimax Optimal Byzantine Robust Algorithm for Nonconvex Objectives with Uniform Gradient Heterogeneity", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19577", "id": "1ii8idH4tH", "author_site": "Tomoya Murata, Kenta Niwa, Takumi Fukami, Iifan Tyou", "tldr": "", "abstract": "In this study, we consider nonconvex federated learning problems with the existence of Byzantine workers. We propose a new simple Byzantine robust algorithm called Momentum Screening. The algorithm is adaptive to the Byzantine fraction, i.e., all its hyperparameters do not depend on the number of Byzantine workers. We show that our method achieves the best optimization error of $O(\\delta^2\\zeta_\\mathrm{max}^2)$ for nonconvex smooth local objectives satisfying $\\zeta_\\mathrm{max}$-uniform gradient heterogeneity condition under $\\delta$-Byzantine fraction, which can be better than the best known error rate of $O(\\delta\\zeta_\\mathrm{mean}^2)$ for local objectives satisfying $\\zeta_\\mathrm{mean}$-mean heterogeneity condition when $\\delta \\leq (\\zeta_\\mathrm{max}/\\zeta_\\mathrm{mean})^2$. Furthermore, we derive an algorithm independent lower bound for local objectives satisfying $\\zeta_\\mathrm{max}$-uniform gradient heterogeneity condition and show the minimax optimality of our proposed method on this class. In numerical experiments, we validate the superiority of our method over the existing robust aggregation algorithms and verify our theoretical results.", "keywords": "Byzantine Robustness;Nonconvex Optimization;Federated Learning", "primary_area": "optimization", "supplementary_material": "", "author": "Tomoya Murata;Kenta Niwa;Takumi Fukami;Iifan Tyou", "authorids": "~Tomoya_Murata1;~Kenta_Niwa1;~Takumi_Fukami1;~Iifan_Tyou1", "gender": "M;M;;M", "homepage": ";http://www.kecl.ntt.co.jp/icl/ls/members/niwa/index.html;;", "dblp": "151/5035;64/1008.html;;234/8871.html", "google_scholar": "hH5pbMIAAAAJ;Btla06EAAAAJ;;", "orcid": ";0000-0002-6911-0238;;", "linkedin": ";;http://www.linkedin.com/in/takumi-fukami-8a4a40238;tyou-iifan-354b132b/", "or_profile": "~Tomoya_Murata1;~Kenta_Niwa1;~Takumi_Fukami1;~Iifan_Tyou1", "aff": "The University of Tokyo;NTT Corporation;NTT Corporation;NTT", "aff_domain": "tokyo.ac.jp;ntt.co.jp;ntt.co.jp;ntt.co.jp", "position": "PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmurata2024simple,\ntitle={Simple Minimax Optimal Byzantine Robust Algorithm for Nonconvex Objectives with Uniform Gradient Heterogeneity},\nauthor={Tomoya Murata and Kenta Niwa and Takumi Fukami and Iifan Tyou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1ii8idH4tH}\n}", "github": "", "project": "", "reviewers": "p51w;uTUe;BeSt", "pdf_size": 5418301, "rating": "3;6;6", "confidence": "2;3;3", "soundness": "3;3;3", "contribution": "2;3;3", "presentation": "2;2;3", "wc_summary": "65;123;108", "wc_strengths": "43;107;69", "wc_weaknesses": "213;255;47", "wc_questions": "3;18;26", "wc_review": "324;503;250", "wc_reply_reviewers": "0;23;16", "wc_reply_authors": "430;551;310", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 98.66666666666667, 24.580932086115496 ], "wc_strengths_avg": [ 73.0, 26.280537792569366 ], "wc_weaknesses_avg": [ 171.66666666666666, 89.80472642845079 ], "wc_questions_avg": [ 15.666666666666666, 9.533566430716728 ], "wc_review_avg": [ 359.0, 106.21048284734735 ], "wc_reply_reviewers_avg": [ 13.0, 9.626352718795768 ], "wc_reply_authors_avg": [ 430.3333333333333, 98.38812033077073 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11224376210352366179&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=1ii8idH4tH", "pdf": "https://openreview.net/pdf?id=1ii8idH4tH", "email": "tokyo.ac.jp;ntt.co.jp;ntt.co.jp;ntt.co.jp", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Tokyo;NTT Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.ntt.co.jp", "aff_unique_abbr": "UTokyo;NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Towards Few-Shot Adaptation of Foundation Models via Multitask Finetuning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19576", "id": "1jbh2e0b2K", "author_site": "Zhuoyan Xu, Zhenmei Shi, Junyi Wei, Fangzhou Mu, Yin Li, Yingyu Liang", "tldr": "", "abstract": "Foundation models have emerged as a powerful tool for many AI problems. Despite the tremendous success of foundation models, effective adaptation to new tasks, particularly those with limited labels, remains an open question and lacks theoretical understanding. \n An emerging solution with recent success in vision and NLP involves finetuning a foundation model on a selection of relevant tasks, before its adaptation to a target task with limited labeled samples. In this paper, we study the theoretical justification of this multitask finetuning approach. \nOur theoretical analysis reveals that with a diverse set of related tasks, this multitask finetuning leads to reduced error in the target task, in comparison to directly adapting the same pretrained model. We quantify the relationship between finetuning tasks and target tasks by diversity and consistency metrics, and further propose a practical task selection algorithm.\n We substantiate our theoretical claims with extensive empirical evidence.\nFurther, we present results affirming our task selection algorithm adeptly chooses related finetuning tasks, providing advantages to the model performance on target tasks.\n We believe our study shed new light on the effective adaptation of foundation models to new tasks that lack abundant labels.\n Our code is available at https://github.com/OliverXUZY/Foudation-Model_Multitask.", "keywords": "Foundation model;Multitask finetuning;Few-Shot learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/4f14dfd2bb204fa615bc528f7d20255cb50796b4.zip", "author": "Zhuoyan Xu;Zhenmei Shi;Junyi Wei;Fangzhou Mu;Yin Li;Yingyu Liang", "authorids": "~Zhuoyan_Xu1;~Zhenmei_Shi1;~Junyi_Wei1;~Fangzhou_Mu1;~Yin_Li3;~Yingyu_Liang1", "gender": "M;M;F;M;M;", "homepage": "https://pages.cs.wisc.edu/~zxu444/;http://zhmeishi.github.io/;;https://fmu2.github.io/;https://www.biostat.wisc.edu/~yli/;", "dblp": "126/2019;246/5216;166/6146;262/6282;49/5981-3;", "google_scholar": "uufndFAAAAAJ;0oeNnzMAAAAJ;Kb1GL40AAAAJ;OOymFJsAAAAJ;_y-8nrcAAAAJ;", "orcid": ";;;0000-0001-5580-2404;;", "linkedin": "zhuoyan-xu-0702301a2/;zhenmei-shi-56408a113/;Junyi-Jenny-Wei-04ba979b/;;;", "or_profile": "~Zhuoyan_Xu1;~Zhenmei_Shi1;~Junyi_Wei1;~Fangzhou_Mu1;~Yin_Li3;~Yingyu_Liang1", "aff": "University of Wisconsin - Madison;University of Wisconsin - Madison;University of Wisconsin, Madison;NVIDIA;University of Wisconsin, Madison;", "aff_domain": "wisc.edu;wisc.edu;wisc.edu;nvidia.com;wisc.edu;", "position": "PhD student;PhD student;PhD student;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nxu2024towards,\ntitle={Towards Few-Shot Adaptation of Foundation Models via Multitask Finetuning},\nauthor={Zhuoyan Xu and Zhenmei Shi and Junyi Wei and Fangzhou Mu and Yin Li and Yingyu Liang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1jbh2e0b2K}\n}", "github": "", "project": "", "reviewers": "1yNr;72L9;AgCa;mX1E", "pdf_size": 922639, "rating": "5;5;6;8", "confidence": "4;5;3;3", "soundness": "3;3;3;3", "contribution": "2;3;2;3", "presentation": "2;2;3;3", "wc_summary": "135;44;44;210", "wc_strengths": "34;54;74;150", "wc_weaknesses": "157;97;78;41", "wc_questions": "72;97;2;91", "wc_review": "398;292;198;492", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "851;475;626;338", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;3;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 108.25, 69.50674427708437 ], "wc_strengths_avg": [ 78.0, 43.9089968002003 ], "wc_weaknesses_avg": [ 93.25, 41.95458854523543 ], "wc_questions_avg": [ 65.5, 37.80542289143186 ], "wc_review_avg": [ 345.0, 110.4943437466371 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 572.5, 190.34245453918052 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7385489458759963, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1704557995012034903&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=1jbh2e0b2K", "pdf": "https://openreview.net/pdf?id=1jbh2e0b2K", "email": "wisc.edu;wisc.edu;wisc.edu;nvidia.com;wisc.edu;", "author_num": 6, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu;https://www.nvidia.com", "aff_unique_abbr": "UW-Madison;UW;NVIDIA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "InstaFlow: One Step is Enough for High-Quality Diffusion-Based Text-to-Image Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19575", "id": "1k4yZbbDqX", "author_site": "Xingchao Liu, Xiwen Zhang, Jianzhu Ma, Jian Peng, Qiang Liu", "tldr": "", "abstract": "Diffusion models have revolutionized text-to-image generation with its exceptional quality and creativity. However, its multi-step sampling process is known to be slow, often requiring tens of inference steps to obtain satisfactory results. Previous attempts to improve its sampling speed and reduce computational costs through distillation have been unsuccessful in achieving a functional one-step model.\nIn this paper, we explore a recent method called Rectified Flow, which, thus far, has only been applied to small datasets. The core of Rectified Flow lies in its \\emph{reflow} procedure, which straightens the trajectories of probability flows, refines the coupling between noises and images, and facilitates the distillation process with student models. We propose a novel text-conditioned pipeline to turn Stable Diffusion (SD) into an ultra-fast one-step model, in which we find reflow plays a critical role in improving the assignment between noise and images. Leveraging our new pipeline, we create, to the best of our knowledge, the first one-step diffusion-based text-to-image generator with SD-level image quality, achieving an FID (Fr\u00e9chet Inception Distance) of $23.3$ on MS COCO 2017-5k, surpassing the previous state-of-the-art technique, progressive distillation, by a significant margin ($37.2$ $\\rightarrow$ $23.3$ in FID). By utilizing an expanded network with 1.7B parameters, we further improve the FID to $22.4$. We call our one-step models \\emph{InstaFlow}. On MS COCO 2014-30k, InstaFlow yields an FID of $13.1$ in just $0.09$ second, the best in $\\leq 0.1$ second regime, outperforming the recent StyleGAN-T ($13.9$ in $0.1$ second). Notably, the training of InstaFlow only costs 199 A100 GPU days. Codes and pre-trained models are available at \\url{github.com/gnobitab/InstaFlow}.", "keywords": "Diffusion Models;Generative Models;Acceleration", "primary_area": "generative models", "supplementary_material": "/attachment/26c516ba81ea09b09c94825008b1320432bf0439.pdf", "author": "Xingchao Liu;Xiwen Zhang;Jianzhu Ma;Jian Peng;qiang liu", "authorids": "~Xingchao_Liu1;~Xiwen_Zhang2;~Jianzhu_Ma2;~Jian_Peng1;~qiang_liu4", "gender": "M;M;M;M;M", "homepage": ";https://xiwen1995.github.io/;https://majianzhu.com/;http://jianpeng.web.engr.illinois.edu/;https://www.cs.utexas.edu/~lqiang/", "dblp": "228/7309;;24/9080.html;29/4181-1;61/3234-1", "google_scholar": "VOTVE0UAAAAJ;9ArsuzwAAAAJ;;https://scholar.google.com.tw/citations?user=4wcAVXAAAAAJ;https://scholar.google.com.tw/citations?user=2qDh4WUAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Xingchao_Liu1;~Xiwen_Zhang2;~Jianzhu_Ma2;~Jian_Peng1;~Qiang_Liu1", "aff": "University of Texas, Austin;Helixon Research;Tsinghua University;University of Illinois, Urbana Champaign;University of Texas, Austin", "aff_domain": "utexas.edu;helixon.com;tsinghua.edu.cn;illinois.edu;utexas.edu", "position": "PhD student;Researcher;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2024instaflow,\ntitle={InstaFlow: One Step is Enough for High-Quality Diffusion-Based Text-to-Image Generation},\nauthor={Xingchao Liu and Xiwen Zhang and Jianzhu Ma and Jian Peng and qiang liu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1k4yZbbDqX}\n}", "github": "", "project": "", "reviewers": "ZDe9;VVpt;mwbp;W54n", "pdf_size": 50159880, "rating": "6;6;8;8", "confidence": "5;4;4;3", "soundness": "3;4;3;3", "contribution": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "158;47;61;62", "wc_strengths": "92;53;43;27", "wc_weaknesses": "168;85;167;74", "wc_questions": "85;54;33;45", "wc_review": "503;239;304;208", "wc_reply_reviewers": "127;257;93;58", "wc_reply_authors": "802;734;661;128", "reply_reviewers": "2;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.0, 44.27753380666091 ], "wc_strengths_avg": [ 53.75, 23.951774464536026 ], "wc_weaknesses_avg": [ 123.5, 44.17295552710957 ], "wc_questions_avg": [ 54.25, 19.253246479490155 ], "wc_review_avg": [ 313.5, 114.7617096421973 ], "wc_reply_reviewers_avg": [ 133.75, 75.22424808530823 ], "wc_reply_authors_avg": [ 581.25, 266.3919809228498 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 217, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=40660508850029294&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1k4yZbbDqX", "pdf": "https://openreview.net/pdf?id=1k4yZbbDqX", "email": "utexas.edu;helixon.com;tsinghua.edu.cn;illinois.edu;utexas.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Texas at Austin;Helixon Research;Tsinghua University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.utexas.edu;;https://www.tsinghua.edu.cn;https://illinois.edu", "aff_unique_abbr": "UT Austin;;THU;UIUC", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Austin;;Urbana-Champaign", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Domain constraints improve risk prediction when outcome data is missing", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19574", "id": "1mNFsbvo2P", "author_site": "Sidhika Balachandar, Nikhil Garg, Emma Pierson", "tldr": "", "abstract": "Machine learning models are often trained to predict the outcome resulting from a human decision. For example, if a doctor decides to test a patient for disease, will the patient test positive? A challenge is that historical decision-making determines whether the outcome is observed: we only observe test outcomes for patients doctors historically tested. Untested patients, for whom outcomes are unobserved, may differ from tested patients along observed and unobserved dimensions. We propose a Bayesian model class which captures this setting. The purpose of the model is to accurately estimate risk for both tested and untested patients. Estimating this model is challenging due to the wide range of possibilities for untested patients. To address this, we propose two domain constraints which are plausible in health settings: a prevalence constraint, where the overall disease prevalence is known, and an expertise constraint, where the human decision-maker deviates from purely risk-based decision-making only along a constrained feature set. We show theoretically and on synthetic data that domain constraints improve parameter inference. We apply our model to a case study of cancer risk prediction, showing that the model's inferred risk predicts cancer diagnoses, its inferred testing policy captures known public health policies, and it can identify suboptimalities in test allocation. Though our case study is in healthcare, our analysis reveals a general class of domain constraints which can improve model estimation in many settings.", "keywords": "Bayesian model;health;selective labels;distribution shift;domain constraint;biomedicine", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/115822eb773a6a547c0fc7d1eb1a2d2bb90f953f.pdf", "author": "Sidhika Balachandar;Nikhil Garg;Emma Pierson", "authorids": "~Sidhika_Balachandar1;~Nikhil_Garg2;~Emma_Pierson1", "gender": "F;;F", "homepage": "https://sidhikabalachandar.github.io/;https://gargnikhil.com/;https://people.eecs.berkeley.edu/~emmapierson/", "dblp": "280/2938;83/6058-1;159/0572", "google_scholar": "4x_agk4AAAAJ;8qSK3noAAAAJ;xGORWi0AAAAJ", "orcid": "0000-0001-6739-2027;0000-0002-1988-792X;", "linkedin": "sidhikab/;;", "or_profile": "~Sidhika_Balachandar1;~Nikhil_Garg2;~Emma_Pierson1", "aff": "Department of Computer Science, Cornell University;Cornell University;Cornell Tech", "aff_domain": "cs.cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nbalachandar2024domain,\ntitle={Domain constraints improve risk prediction when outcome data is missing},\nauthor={Sidhika Balachandar and Nikhil Garg and Emma Pierson},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1mNFsbvo2P}\n}", "github": "", "project": "", "reviewers": "uugS;KKGr;6mUm;26qi", "pdf_size": 495306, "rating": "5;8;8;8", "confidence": "3;4;4;2", "soundness": "2;4;3;3", "contribution": "2;4;3;3", "presentation": "3;4;4;4", "wc_summary": "82;64;65;38", "wc_strengths": "91;64;92;23", "wc_weaknesses": "143;151;50;134", "wc_questions": "38;2;46;32", "wc_review": "354;281;253;227", "wc_reply_reviewers": "0;0;25;12", "wc_reply_authors": "674;412;432;1052", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 62.25, 15.722197683530124 ], "wc_strengths_avg": [ 67.5, 28.040149785619906 ], "wc_weaknesses_avg": [ 119.5, 40.574006457336694 ], "wc_questions_avg": [ 29.5, 16.635804759614125 ], "wc_review_avg": [ 278.75, 47.457217575412066 ], "wc_reply_reviewers_avg": [ 9.25, 10.328964130056798 ], "wc_reply_authors_avg": [ 642.5, 257.93555396648986 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6485419293618097691&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=1mNFsbvo2P", "pdf": "https://openreview.net/pdf?id=1mNFsbvo2P", "email": "cs.cornell.edu;cornell.edu;cornell.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York City", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "1mOeklnLf4", "title": "FroSSL: Frobenius Norm Minimization for Self-Supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-supervised learning (SSL) is an increasingly popular paradigm for representation learning. Recent methods can be classified as sample-contrastive, dimension-contrastive, or asymmetric network-based, with each family having its own approach to avoiding informational collapse. While dimension-contrastive methods converge to similar solutions as sample-contrastive methods, it can be empirically shown that some methods require more epochs of training to converge. Motivated by closing this divide, we present the objective function FroSSL which is both sample- and dimension-contrastive up to embedding normalization. FroSSL works by minimizing covariance Frobenius norms for avoiding collapse and minimizing mean-squared error for augmentation invariance. We show that FroSSL converges more quickly than a variety of other SSL methods and provide theoretical and empirical support that this faster convergence is due to how FroSSL affects the eigenvalues of the embedding covariance matrices. We also show that FroSSL learns competitive representations on linear probe evaluation when used to train a ResNet18 on the CIFAR-10, CIFAR-100, STL-10, and ImageNet datasets.", "keywords": "self-supervised learning;representation learning;information theory;computer vision", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/e3ac395c7817f32cf7e7b4ac509117b560b6a5f5.zip", "author": "Oscar Skean;Aayush Dhakal;Nathan Jacobs;Luis Gonzalo Sanchez Giraldo", "authorids": "~Oscar_Skean1;a.dhakal@wustl.edu;~Nathan_Jacobs1;~Luis_Gonzalo_Sanchez_Giraldo2", "gender": "M;;M;", "homepage": ";;https://jacobsn.github.io/;", "dblp": ";;82/3140;", "google_scholar": "XO1nKsUAAAAJ;;ZBgGyh8AAAAJ;", "orcid": "0000-0002-4160-8392;;0000-0002-4242-8967;", "linkedin": ";;jacobsn/;", "or_profile": "~Oscar_Skean1;a.dhakal@wustl.edu;~Nathan_Jacobs1;~Luis_Gonzalo_Sanchez_Giraldo2", "aff": "University of Kentucky;;Washington University, Saint Louis;", "aff_domain": "uky.edu;;wustl.edu;", "position": "PhD student;;Full Professor;", "bibtex": "@misc{\nskean2024frossl,\ntitle={Fro{SSL}: Frobenius Norm Minimization for Self-Supervised Learning},\nauthor={Oscar Skean and Aayush Dhakal and Nathan Jacobs and Luis Gonzalo Sanchez Giraldo},\nyear={2024},\nurl={https://openreview.net/forum?id=1mOeklnLf4}\n}", "github": "", "project": "", "reviewers": "tbwD;dD2L;ZCeH;rGwc", "site": "https://openreview.net/forum?id=1mOeklnLf4", "pdf_size": 9901764, "rating": "3;3;3;5", "confidence": "5;3;4;3", "soundness": "1;2;3;3", "contribution": "1;1;2;2", "presentation": "1;3;3;3", "wc_summary": "63;49;41;20", "wc_strengths": "11;133;25;39", "wc_weaknesses": "209;261;3;58", "wc_questions": "159;56;94;25", "wc_review": "442;499;163;142", "wc_reply_reviewers": "0;0;0;39", "wc_reply_authors": "716;114;214;162", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 43.25, 15.562374497485916 ], "wc_strengths_avg": [ 52.0, 47.80167361086848 ], "wc_weaknesses_avg": [ 132.75, 105.69383851483491 ], "wc_questions_avg": [ 83.5, 49.97249243333776 ], "wc_review_avg": [ 311.5, 160.4439154346465 ], "wc_reply_reviewers_avg": [ 9.75, 16.887495373796554 ], "wc_reply_authors_avg": [ 301.5, 241.9106239915891 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1", "aff_unique_norm": "University of Kentucky;Washington University in St. Louis", "aff_unique_dep": ";", "aff_unique_url": "https://www.uky.edu;https://wustl.edu", "aff_unique_abbr": "UK;WUSTL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saint Louis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "1mjbfedaye", "title": "Learning Equi-angular Representations for Online Continual Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Online continual learning suffers from an underfitted solution for prompt model update due to the constraint of single-epoch learning. We confront this challenge by proposing an efficient online continual learning method with the notion of neural collapse. In particular, we induce neural collapse to form a simplex equiangular tight frame (ETF) structure in the representation space so that the learned model with single epoch can better fit the streamed data by proposing preparatory data training and residual correction in the representation space. With an extensive set of empirical validations using CIFAR10/100, TinyImageNet, and ImageNet-200, we show that our proposed method outperforms state-of-the-art methods by a noticeable margin in various online continual learning scenarios, including Disjoint and Gaussian scheduled setups.", "keywords": "Online continual learning;Neural collapse", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Minhyuk Seo;Hyunseo Koh;Wonje Jeung;Min Jae Lee;San Kim;Hankook Lee;Sungjun Cho;Sungik Choi;Hyunwoo Kim;Jonghyun Choi", "authorids": "~Minhyuk_Seo1;~Hyunseo_Koh1;~Wonje_Jeung1;~Min_Jae_Lee2;~San_Kim2;~Hankook_Lee1;~Sungjun_Cho1;~Sungik_Choi1;~Hyunwoo_Kim5;~Jonghyun_Choi1", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": "https://dbd05088.github.io/;;https://github.com/cryingInitial;https://98minjae.github.io/;https://mounkim.github.io/;https://hankook.github.io;https://sc782.github.io/;https://ppolon.github.io/;https://sites.google.com/view/hyunwookim/home;", "dblp": "350/4104;304/4369;374/6621;;;223/4393;254/8021;21/11103;https://dblp.org/rec/conf/cvpr/HanHOPKKK22;184/4055.html", "google_scholar": "ayDPR-gAAAAJ;Mi4cMxgAAAAJ;;YWWOFx0AAAAJ;;CgqswXUAAAAJ;https://scholar.google.com/citations?hl=en;uiGWnm4AAAAJ;5DfOhKwAAAAJ;H0QB0PwAAAAJ", "orcid": ";0000-0002-2576-1581;;;;;;0000-0002-7934-8434;;", "linkedin": "minhyuk-seo-59ba11247/;%ED%98%84%EC%84%9C-%EA%B3%A0-66298a221/;;;san-kim-1b4495316?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app;;sungjun-cho-46982411a/;jonghyun-choi-459bb615/;hyunwoo-k-b16460193/;", "or_profile": "~Minhyuk_Seo1;~Hyunseo_Koh1;~Wonje_Jeung1;~Min_Jae_Lee2;~San_Kim2;~Hankook_Lee1;~Sungjun_Cho1;~Jonghyun_Choi1;~Hyunwoo_Eugene_Kim1;~Choi_sungik1", "aff": "Yonsei University;Gwangju Institute of Science and Technology;Yonsei University;Seoul National University;Seoul National University;LG AI Research;LG AI Research;Yonsei University;Zhejiang Lab;LG AI Research", "aff_domain": "yonsei.ac.kr;gist.ac.kr;yonsei.ac.kr;snu.ac.kr;snu.ac.kr;lgresearch.ai;lgresearch.ai;yonsei.ac.kr;zhejianglab.com;lgresearch.ai", "position": "MS student;PhD student;Undergrad student;PhD student;MS student;Researcher;Researcher;Associate Professor;Principal Researcher;Researcher", "bibtex": "@misc{\nseo2024learning,\ntitle={Learning Equi-angular Representations for Online Continual Learning},\nauthor={Minhyuk Seo and Hyunseo Koh and Wonje Jeung and Min Jae Lee and San Kim and Hankook Lee and Sungjun Cho and Sungik Choi and Hyunwoo Kim and Jonghyun Choi},\nyear={2024},\nurl={https://openreview.net/forum?id=1mjbfedaye}\n}", "github": "", "project": "", "reviewers": "ubjn;Ff4L;Dzox;2VjM", "site": "https://openreview.net/forum?id=1mjbfedaye", "pdf_size": 1080551, "rating": "3;3;5;6", "confidence": "5;4;5;3", "soundness": "2;2;2;3", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "28;55;91;73", "wc_strengths": "38;25;52;53", "wc_weaknesses": "256;181;881;38", "wc_questions": "5;104;24;36", "wc_review": "327;365;1048;200", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 61.75, 23.27418097377435 ], "wc_strengths_avg": [ 42.0, 11.467344941179714 ], "wc_weaknesses_avg": [ 339.0, 322.57479752764317 ], "wc_questions_avg": [ 42.25, 37.325426990189946 ], "wc_review_avg": [ 485.0, 330.7408350960008 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9436547253272062366&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;2;2;3;3;0;4;3", "aff_unique_norm": "Yonsei University;Gwangju Institute of Science and Technology;Seoul National University;LG;Zhejiang Lab", "aff_unique_dep": ";;;LG AI Research;", "aff_unique_url": "https://www.yonsei.ac.kr;https://www.gist.ac.kr;https://www.snu.ac.kr;https://www.lgaires.com;http://www.zhejianglab.com", "aff_unique_abbr": "Yonsei;GIST;SNU;LG AI;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Gwangju", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;0", "aff_country_unique": "South Korea;China" }, { "title": "Unsupervised Pretraining for Fact Verification by Language Model Distillation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19573", "id": "1mjsP8RYAw", "author_site": "Adrian Bazaga, Pietro Lio, Gos Micklem", "tldr": "", "abstract": "Fact verification aims to verify a claim using evidence from a trustworthy knowledge base. To address this challenge, algorithms must produce features for every claim that are both semantically meaningful, and compact enough to find a semantic alignment with the source information. In contrast to previous work, which tackled the alignment problem by learning over annotated corpora of claims and their corresponding labels, we propose SFAVEL ($\\underline{S}$elf-supervised $\\underline{Fa}$ct $\\underline{Ve}$rification via $\\underline{L}$anguage Model Distillation), a novel unsupervised pretraining framework that leverages pre-trained language models to distil self-supervised features into high-quality claim-fact alignments without the need for annotations. This is enabled by a novel contrastive loss function that encourages features to attain high-quality claim and evidence alignments whilst preserving the semantic relationships across the corpora. Notably, we present results that achieve a new state-of-the-art on FB15k-237 (+5.3\\% Hits@1) and FEVER (+8\\% accuracy) with linear evaluation.", "keywords": "Unsupervised Learning;Self-supervised Learning;Deep Features;Contrastive Learning;Large Language Models;Knowledge Distillation;Multimodality;Fact Verification", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Adri\u00e1n Bazaga;Pietro Lio;Gos Micklem", "authorids": "~Adri\u00e1n_Bazaga1;~Pietro_Lio1;~Gos_Micklem1", "gender": ";M;M", "homepage": "https://bazaga.ai/;https://www.cst.cam.ac.uk/people/pl219;http://www.micklemlab.org/", "dblp": "218/5717;l/PietroLio.html;", "google_scholar": "lrOLKgQAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ;", "orcid": "0000-0002-1508-285X;0000-0002-0540-5053;0000-0002-6883-6168", "linkedin": "adrianbazaga/;;gosmicklem/", "or_profile": "~Adri\u00e1n_Bazaga1;~Pietro_Lio1;~Gos_Micklem1", "aff": "Amazon;University of Cambridge;University of Cambridge", "aff_domain": "amazon.com;cam.ac.uk;cam.ac.uk", "position": "Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nbazaga2024unsupervised,\ntitle={Unsupervised Pretraining for Fact Verification by Language Model Distillation},\nauthor={Adri{\\'a}n Bazaga and Pietro Lio and Gos Micklem},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1mjsP8RYAw}\n}", "github": "", "project": "", "reviewers": "6mXX;bggr;kcpN;GMHM;1Hnq", "pdf_size": 857262, "rating": "5;5;6;6;8", "confidence": "3;5;4;3;3", "soundness": "3;3;3;3;4", "contribution": "3;2;2;3;3", "presentation": "3;3;2;2;4", "wc_summary": "340;93;54;82;30", "wc_strengths": "22;59;31;11;65", "wc_weaknesses": "22;231;215;225;80", "wc_questions": "3;32;34;79;16", "wc_review": "387;415;334;397;191", "wc_reply_reviewers": "0;77;0;141;31", "wc_reply_authors": "731;648;651;1436;274", "reply_reviewers": "0;1;0;1;1", "reply_authors": "2;2;2;4;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 119.8, 112.27537575087423 ], "wc_strengths_avg": [ 37.6, 20.99142682144308 ], "wc_weaknesses_avg": [ 154.6, 86.70547848896285 ], "wc_questions_avg": [ 32.8, 25.716920499935444 ], "wc_review_avg": [ 344.8, 81.49208550528083 ], "wc_reply_reviewers_avg": [ 49.8, 53.61119286119271 ], "wc_reply_authors_avg": [ 748.0, 378.87148216776626 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4564354645876385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18104602432207369171&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=1mjsP8RYAw", "pdf": "https://openreview.net/pdf?id=1mjsP8RYAw", "email": "amazon.com;cam.ac.uk;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Amazon;University of Cambridge", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.cam.ac.uk", "aff_unique_abbr": "Amazon;Cambridge", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Dynamic Sparse No Training: Training-Free Fine-tuning for Sparse LLMs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19572", "id": "1ndDmZdT4g", "author_site": "Yuxin Zhang, Lirui Zhao, Mingbao Lin, Sun Yunyun, Yiwu Yao, Xingjia Han, Jared Tanner, Shiwei Liu, Rongrong Ji", "tldr": "", "abstract": "The ever-increasing large language models (LLMs), though opening a potential path for the upcoming artificial general intelligence, sadly drops a daunting obstacle on the way towards their on-device deployment. As one of the most well-established pre-LLMs approaches in reducing model complexity, network pruning appears to lag behind in the era of LLMs, due mostly to its costly fine-tuning (or re-training) necessity under the massive volumes of model parameter and training data. To close this industry-academia gap, we introduce Dynamic Sparse No Training ($\\texttt{DSNT}$), a training-free fine-tuning approach that slightly updates sparse LLMs without the expensive backpropagation and any weight updates. Inspired by the Dynamic Sparse Training, $\\texttt{DSNT}$ minimizes the reconstruction error between the dense and sparse LLMs, in the fashion of performing iterative weight pruning-and-growing on top of sparse LLMs. To accomplish this purpose, $\\texttt{DSNT}$ particularly takes into account the anticipated reduction in reconstruction error for pruning and growing, as well as the variance w.r.t. different input data for growing each weight. This practice can be executed efficiently in linear time since its obviates the need of backpropagation for fine-tuning LLMs. Extensive experiments on LLaMA-V1/V2, Vicuna, and OPT across various benchmarks demonstrate the effectiveness of $\\texttt{DSNT}$ in enhancing the performance of sparse LLMs, especially at high sparsity levels. For instance, $\\texttt{DSNT}$ is able to outperform the state-of-the-art Wanda by 26.79 perplexity at 70% sparsity with LLaMA-7B. Our paper offers fresh insights into how to fine-tune sparse LLMs in an efficient training-free manner and open new venues to scale the great potential of sparsity to LLMs. Codes are available at https://github.com/zyxxmu/DSnoT.", "keywords": "Large Language Models;Network Sparsity", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/3090439d99611546cd406d681bd91e7b43574421.zip", "author": "Yuxin Zhang;Lirui Zhao;Mingbao Lin;Sun Yunyun;Yiwu Yao;Xingjia Han;Jared Tanner;Shiwei Liu;Rongrong Ji", "authorids": "~Yuxin_Zhang3;~Lirui_Zhao1;~Mingbao_Lin1;~Sun_Yunyun1;~Yiwu_Yao1;~Xingjia_Han1;~Jared_Tanner1;~Shiwei_Liu2;~Rongrong_Ji5", "gender": ";M;M;;M;M;;M;M", "homepage": ";https://github.com/Lirui-Zhao;http://lmb.bjbxit.cn/;https://scholar.google.com/;https://blog.csdn.net/nature553863/article/details/80568658;;http://people.maths.ox.ac.uk/tanner/publications.shtml;https://shiweiliuiiiiiii.github.io/;http://mac.xmu.edu.cn/rrji-en.html", "dblp": "03/7346-2;;211/5903;;133/4368;;85/1256;234/8697-3.html;86/5681", "google_scholar": "6IeJLJoAAAAJ;;Dp3L1bsAAAAJ;;4tYZsLgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.uk/citations?user=J7248tkAAAAJ;73IbXtsAAAAJ;", "orcid": "0000-0002-4409-7030;;0000-0003-1764-1894;;;;;;", "linkedin": ";;mingbao-lin-890444105/;;;;;;", "or_profile": "~Yuxin_Zhang3;~Lirui_Zhao1;~Mingbao_Lin1;~Sun_Yunyun1;~Yiwu_Yao1;~Xingjia_Han1;~Jared_Tanner1;~Shiwei_Liu2;~Rongrong_Ji5", "aff": "Xiamen University;Xiamen University;Xiamen University;;Huawei Technologies Ltd.;;University of Oxford;University of Oxford;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;;huawei.com;;ox.ac.uk;ox.ac.uk;xmu.edu.cn", "position": "PhD student;MS student;PhD student;;Researcher;;Full Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2024dynamic,\ntitle={Dynamic Sparse No Training: Training-Free Fine-tuning for Sparse {LLM}s},\nauthor={Yuxin Zhang and Lirui Zhao and Mingbao Lin and Sun Yunyun and Yiwu Yao and Xingjia Han and Jared Tanner and Shiwei Liu and Rongrong Ji},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1ndDmZdT4g}\n}", "github": "", "project": "", "reviewers": "nrCN;s71w;CfsK", "pdf_size": 524223, "rating": "6;6;6", "confidence": "5;4;2", "soundness": "3;3;3", "contribution": "3;2;3", "presentation": "3;3;3", "wc_summary": "93;79;105", "wc_strengths": "59;82;120", "wc_weaknesses": "90;244;61", "wc_questions": "131;55;3", "wc_review": "373;460;289", "wc_reply_reviewers": "0;122;0", "wc_reply_authors": "559;1437;628", "reply_reviewers": "0;2;0", "reply_authors": "2;5;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.33333333333333, 10.624918300339486 ], "wc_strengths_avg": [ 87.0, 25.152865973217974 ], "wc_weaknesses_avg": [ 131.66666666666666, 80.30912498312726 ], "wc_questions_avg": [ 63.0, 52.56107558513873 ], "wc_review_avg": [ 374.0, 69.81403870282824 ], "wc_reply_reviewers_avg": [ 40.666666666666664, 57.51135153650587 ], "wc_reply_authors_avg": [ 874.6666666666666, 398.62625213210544 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6652760928495592428&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1ndDmZdT4g", "pdf": "https://openreview.net/pdf?id=1ndDmZdT4g", "email": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;;huawei.com;;ox.ac.uk;ox.ac.uk;xmu.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;1;2;2;0", "aff_unique_norm": "Xiamen University;Huawei;University of Oxford", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.huawei.com;https://www.ox.ac.uk", "aff_unique_abbr": "XMU;Huawei;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "1nfqABOIwQ", "title": "RIME: Robust Preference-based Reinforcement Learning with Noisy Human Preferences", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Designing an effective reward function remains a significant challenge in numerous reinforcement learning (RL) applications. Preference-based Reinforcement Learning (PbRL) presents a novel framework that circumvents the need for reward engineering by harnessing human preferences as the reward signal. However, current PbRL algorithms primarily focus on feedback efficiency, which heavily depends on high-quality feedback from domain experts. This over-reliance results in a lack of robustness, leading to a severe performance degradation under noisy feedback conditions, thereby limiting the broad applicability of PbRL. In this paper, we present RIME, a robust PbRL algorithm for effective reward learning from noisy human preferences. Our method incorporates a sample selection-based discriminator to dynamically filter denoised preferences for robust training. To mitigate the accumulated error caused by incorrect selection, we propose to warm start the reward model for a good initialization, which additionally bridges the performance gap during transition from pre-training to online training in PbRL. Our experiments on robotic manipulation and locomotion tasks demonstrate that RIME significantly enhances the robustness of the current state-of-the-art PbRL method. Ablation studies further demonstrate that the warm start is crucial for both robustness and feedback-efficiency in limited-feedback cases.", "keywords": "preference-based reinforcement learning;human-in-the-loop reinforcement learning;deep reinforcement learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/dd3275fefb31a0dff17fa8bd0b3ea43339f41ec3.zip", "author": "Jie Cheng;Gang Xiong;Xingyuan Dai;Qinghai Miao;Yisheng Lv;Fei-Yue Wang", "authorids": "~Jie_Cheng4;~Gang_Xiong2;~Xingyuan_Dai1;~Qinghai_Miao1;~Yisheng_Lv1;~Fei-Yue_Wang2", "gender": "M;;M;M;M;M", "homepage": ";;;https://people.ucas.edu.cn/~lvyisheng;http://www.ia.cas.cn/sourcedb_ia_cas/en/iaexpert/200908/t20090804_2310468.html;https://teacher.ucas.ac.cn/~gxiong", "dblp": ";203/8062;33/1250;;;96/372-1", "google_scholar": ";R4Rn7dMAAAAJ;;RRKqjKAAAAAJ;;", "orcid": "0009-0008-5373-7563;;0000-0003-1213-1123;;;", "linkedin": ";;;;;", "or_profile": "~Jie_Cheng4;~Xingyuan_Dai1;~Qinghai_Miao1;~Yisheng_Lv1;~Fei-Yue_Wang2;~Xiong_Gang1", "aff": "University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Science", "aff_domain": "ucas.ac.cn;ia.ac.cn;ucas.ac.cn;ucas.ac.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;Assistant Professor;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024rime,\ntitle={{RIME}: Robust Preference-based Reinforcement Learning with Noisy Human Preferences},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=1nfqABOIwQ}\n}", "github": "", "project": "", "reviewers": "dSpC;vzpZ;z4ZA", "site": "https://openreview.net/forum?id=1nfqABOIwQ", "pdf_size": 21682849, "rating": "3;6;8", "confidence": "5;3;4", "soundness": "1;3;3", "contribution": "2;2;2", "presentation": "2;3;3", "wc_summary": "86;15;68", "wc_strengths": "90;36;95", "wc_weaknesses": "586;99;163", "wc_questions": "226;4;379", "wc_review": "988;154;705", "wc_reply_reviewers": "716;0;946", "wc_reply_authors": "2483;239;2327", "reply_reviewers": "1;0;2", "reply_authors": "6;1;6", "rating_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 56.333333333333336, 30.136725472788548 ], "wc_strengths_avg": [ 73.66666666666667, 26.71246067953223 ], "wc_weaknesses_avg": [ 282.6666666666667, 216.0745756034759 ], "wc_questions_avg": [ 203.0, 153.95453874439687 ], "wc_review_avg": [ 615.6666666666666, 346.2892175945163 ], "wc_reply_reviewers_avg": [ 554.0, 402.8332988553288 ], "wc_reply_authors_avg": [ 1683.0, 1023.0464310088765 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 4.333333333333333, 2.357022603955158 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5960395606792698, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13128103223629693214&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ia.cas.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Self-Alignment with Instruction Backtranslation", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19571", "id": "1oijHJBRsT", "author_site": "Xian Li, Ping Yu, Chunting Zhou, Timo Schick, Omer Levy, Luke Zettlemoyer, Jason E Weston, Mike Lewis", "tldr": "", "abstract": "We present a scalable method to build a high quality instruction following language model by automatically labelling human-written text with corresponding instructions. Our approach, named instruction backtranslation, starts with a language model finetuned on a small amount of seed data, and a given web corpus. The seed model is used to construct training examples by generating instruction prompts for web documents (self-augmentation), and then selecting high quality examples from among these candidates (self-curation). This data is then used to finetune a stronger model. Finetuning LLaMa on two iterations of our approach yields a model that outperforms all other LLaMa-based models on the Alpaca leaderboard not relying on distillation data, demonstrating highly effective self-alignment.", "keywords": "large language models;self-supervised learning;data augmentation", "primary_area": "generative models", "supplementary_material": "", "author": "Xian Li;Ping Yu;Chunting Zhou;Timo Schick;Omer Levy;Luke Zettlemoyer;Jason E Weston;Mike Lewis", "authorids": "~Xian_Li1;~Ping_Yu2;~Chunting_Zhou1;~Timo_Schick1;~Omer_Levy1;~Luke_Zettlemoyer1;~Jason_E_Weston1;~Mike_Lewis1", "gender": ";F;F;;M;M;;M", "homepage": ";https://yuping1.wixsite.com/mysite;https://violet-zct.github.io/;http://timoschick.com;;https://www.cs.washington.edu/people/faculty/lsz/;;", "dblp": "82/1763-3.html;;161/2679;203/9176;117/4866;21/6793;;19/6214", "google_scholar": "v_sIgawAAAAJ;-V7TJhwAAAAJ;mR5W7EgAAAAJ;;PZVd2h8AAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;;SnQnQicAAAAJ", "orcid": ";;;;0000-0001-7300-8191;;;", "linkedin": ";ping-yu-05ba8212b/;;;;luke-zettlemoyer-a0109b226/;;", "or_profile": "~Xian_Li1;~Ping_Yu2;~Chunting_Zhou1;~Timo_Schick1;~Omer_Levy1;~Luke_Zettlemoyer1;~Jason_E_Weston1;~Mike_Lewis1", "aff": "Facebook AI;Meta Facebook;Meta AI;Meta Facebook;Tel Aviv University;Meta;;Facebook AI Research", "aff_domain": "fb.com;fb.com;meta.com;fb.com;tau.ac.il;meta.com;;fb.com", "position": "Principal Researcher;Researcher;Researcher;Researcher;Senior Lecturer;Researcher;;Research Scientist", "bibtex": "@inproceedings{\nli2024selfalignment,\ntitle={Self-Alignment with Instruction Backtranslation},\nauthor={Xian Li and Ping Yu and Chunting Zhou and Timo Schick and Omer Levy and Luke Zettlemoyer and Jason E Weston and Mike Lewis},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1oijHJBRsT}\n}", "github": "", "project": "", "reviewers": "Fnnk;XACc;bTHF;HdQC", "pdf_size": 1785904, "rating": "8;8;8;8", "confidence": "4;4;5;4", "soundness": "4;3;4;4", "contribution": "3;4;3;4", "presentation": "4;4;4;4", "wc_summary": "179;89;81;101", "wc_strengths": "22;101;70;66", "wc_weaknesses": "4;154;17;88", "wc_questions": "97;47;52;127", "wc_review": "302;391;220;382", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "240;200;186;778", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "contribution_avg": [ 3.5, 0.5 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 112.5, 39.04804732633887 ], "wc_strengths_avg": [ 64.75, 28.154706533721853 ], "wc_weaknesses_avg": [ 65.75, 60.15137155543504 ], "wc_questions_avg": [ 80.75, 33.04826016600571 ], "wc_review_avg": [ 323.75, 69.19673041408821 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 351.0, 247.3236745643247 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 230, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14196853842712224571&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=1oijHJBRsT", "pdf": "https://openreview.net/pdf?id=1oijHJBRsT", "email": "fb.com;fb.com;meta.com;fb.com;tau.ac.il;meta.com;;fb.com", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Meta;Tel Aviv University", "aff_unique_dep": "Facebook AI;", "aff_unique_url": "https://www.facebook.com;https://www.tau.ac.il", "aff_unique_abbr": "Facebook AI;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Theoretical Analysis of Robust Overfitting for Wide DNNs: An NTK Approach", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19570", "id": "1op5YGZu8X", "author_site": "Shaopeng Fu, Di Wang", "tldr": "", "abstract": "Adversarial training (AT) is a canonical method for enhancing the robustness of deep neural networks (DNNs). However, recent studies empirically demonstrated that it suffers from robust overfitting, i.e., a long time AT can be detrimental to the robustness of DNNs. This paper presents a theoretical explanation of robust overfitting for DNNs. Specifically, we non-trivially extend the neural tangent kernel (NTK) theory to AT and prove that an adversarially trained wide DNN can be well approximated by a linearized DNN. Moreover, for squared loss, closed-form AT dynamics for the linearized DNN can be derived, which reveals a new AT degeneration phenomenon: a long-term AT will result in a wide DNN degenerates to that obtained without AT and thus cause robust overfitting. Based on our theoretical results, we further design a method namely Adv-NTK, the first AT algorithm for infinite-width DNNs. Experiments on real-world datasets show that Adv-NTK can help infinite-width DNNs enhance comparable robustness to that of their finite-width counterparts, which in turn justifies our theoretical findings. The code is available at https://github.com/fshp971/adv-ntk.", "keywords": "NTK;neural tangent kernels;adversarial training;robust overfitting", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/ab6939d672fe71286dded0968cc514e328a5cd05.zip", "author": "Shaopeng Fu;Di Wang", "authorids": "~Shaopeng_Fu1;~Di_Wang1", "gender": ";", "homepage": "https://shaopengfu.me;", "dblp": "278/8181;", "google_scholar": "i7cWm4gAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Shaopeng_Fu1;~Di_Wang1", "aff": "King Abdullah University of Science and Technology;", "aff_domain": "kaust.edu.sa;", "position": "PhD student;", "bibtex": "@inproceedings{\nfu2024theoretical,\ntitle={Theoretical Analysis of Robust Overfitting for Wide {DNN}s: An {NTK} Approach},\nauthor={Shaopeng Fu and Di Wang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1op5YGZu8X}\n}", "github": "", "project": "", "reviewers": "edb8;s7R7;gmGK;S6eo;4ePw", "pdf_size": 878700, "rating": "6;6;6;6;8", "confidence": "3;4;3;2;3", "soundness": "3;3;3;3;4", "contribution": "2;2;3;2;3", "presentation": "3;3;2;3;3", "wc_summary": "49;136;96;81;75", "wc_strengths": "18;30;67;52;36", "wc_weaknesses": "347;86;40;176;161", "wc_questions": "20;37;26;6;31", "wc_review": "434;289;229;315;303", "wc_reply_reviewers": "32;97;0;64;27", "wc_reply_authors": "2212;792;702;1316;668", "reply_reviewers": "1;2;0;2;1", "reply_authors": "6;4;3;5;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 87.4, 28.653795560099887 ], "wc_strengths_avg": [ 40.6, 17.153425313913253 ], "wc_weaknesses_avg": [ 162.0, 104.95903962975271 ], "wc_questions_avg": [ 24.0, 10.601886624558858 ], "wc_review_avg": [ 314.0, 66.89095604040952 ], "wc_reply_reviewers_avg": [ 44.0, 33.40059879702758 ], "wc_reply_authors_avg": [ 1138.0, 585.8108909878682 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18088651160665427077&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=1op5YGZu8X", "pdf": "https://openreview.net/pdf?id=1op5YGZu8X", "email": "kaust.edu.sa;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kast.kau.edu.sa", "aff_unique_abbr": "KAUST", "aff_country_unique_index": "0", "aff_country_unique": "Saudi Arabia" }, { "title": "Convolutional Deep Kernel Machines", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19569", "id": "1oqedRt6Z7", "author_site": "Edward Milsom, Ben Anson, Laurence Aitchison", "tldr": "", "abstract": "Standard infinite-width limits of neural networks sacrifice the ability for intermediate layers to learn representations from data. Recent work (\u201cA theory of representation learning gives a deep generalisation of kernel methods\u201d, Yang et al. 2023) modified the Neural Network Gaussian Process (NNGP) limit of Bayesian neural networks so that representation learning is retained. Furthermore, they found that applying this modified limit to a deep Gaussian process gives a practical learning algorithm which they dubbed the \u201cdeep kernel machine\u201d (DKM). However, they only considered the simplest possible setting: regression in small, fully connected networks with e.g. 10 input features. Here, we introduce convolutional deep kernel machines. This required us to develop a novel inter-domain inducing point approximation, as well as introducing and experimentally assessing a number of techniques not previously seen in DKMs, including analogues to batch normalisation, different likelihoods, and different types of top-layer. The resulting model trains in roughly 77 GPU hours, achieving around 99\\% test accuracy on MNIST, 72\\% on CIFAR-100, and 92.7\\% on CIFAR-10, which is SOTA for kernel methods.", "keywords": "Gaussian process;infinite-width neural network;NNGP;Bayesian deep learning", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Edward Milsom;Ben Anson;Laurence Aitchison", "authorids": "~Edward_Milsom1;~Ben_Anson1;~Laurence_Aitchison1", "gender": "M;M;", "homepage": ";;http://www.gatsby.ucl.ac.uk/~laurence/", "dblp": "348/6212;;155/1918.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "orcid": "0000-0003-0404-4564;;", "linkedin": ";ben-anson-7b7519183/;", "or_profile": "~Edward_Milsom1;~Ben_Anson1;~Laurence_Aitchison1", "aff": "University of Bristol;University of Bristol;University of Bristol", "aff_domain": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmilsom2024convolutional,\ntitle={Convolutional Deep Kernel Machines},\nauthor={Edward Milsom and Ben Anson and Laurence Aitchison},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1oqedRt6Z7}\n}", "github": "", "project": "", "reviewers": "hjfZ;3QEZ;5yKS;jw8S", "pdf_size": 405912, "rating": "5;5;5;8", "confidence": "4;3;4;3", "soundness": "3;2;3;3", "contribution": "2;3;2;3", "presentation": "3;1;2;4", "wc_summary": "106;49;89;119", "wc_strengths": "42;32;47;46", "wc_weaknesses": "23;139;225;75", "wc_questions": "538;17;4;101", "wc_review": "709;237;365;341", "wc_reply_reviewers": "250;0;0;62", "wc_reply_authors": "1102;445;448;680", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 90.75, 26.34743820563965 ], "wc_strengths_avg": [ 41.75, 5.931905258852336 ], "wc_weaknesses_avg": [ 115.5, 75.39728111808807 ], "wc_questions_avg": [ 165.0, 218.5463337601434 ], "wc_review_avg": [ 413.0, 177.53872816937718 ], "wc_reply_reviewers_avg": [ 78.0, 102.4792661956554 ], "wc_reply_authors_avg": [ 668.75, 267.6876678145633 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7905167758364695693&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=1oqedRt6Z7", "pdf": "https://openreview.net/pdf?id=1oqedRt6Z7", "email": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Bristol", "aff_unique_dep": "", "aff_unique_url": "https://www.bristol.ac.uk", "aff_unique_abbr": "Bristol", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "1p4q1cXOX9", "title": "Attribute-Enhanced Similarity Ranking for Sparse Link Prediction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Link prediction is a fundamental problem in graph data. In its most realistic setting, the problem consists of predicting missing or future links between random pairs of nodes from the set of disconnected pairs. Graph Neural Networks (GNNs) have become the predominant framework for link prediction. GNN-based methods treat link prediction as a binary classification problem and handle the extreme class imbalance---real graphs are very sparse---by sampling (uniformly at random) a balanced number of disconnected pairs not only for training but also for evaluation. However, we show that the reported performance of GNNs for link prediction in the balanced setting does not translate to the more realistic imbalanced setting and that simpler topology-based approaches are often better at handling sparsity. These findings motivate Gelato, a similarity-based link-prediction method that applies (1) graph learning based on node attributes to enhance a topological heuristic, (2) a ranking loss for addressing class imbalance, and (3) a negative sampling scheme that efficiently selects hard training pairs via graph partitioning. Experiments show that Gelato is more accurate and faster than GNN-based alternatives.", "keywords": "Link Prediction;Graph Neural Networks;Graph Learning;Network Science", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Zexi Huang;Joao Pedro Rodrigues Mattos;Mert Kosan;Arlei Lopes da Silva;Ambuj Singh", "authorids": "~Zexi_Huang1;~Joao_Pedro_Rodrigues_Mattos1;~Mert_Kosan1;~Arlei_Lopes_da_Silva1;~Ambuj_Singh1", "gender": "M;M;M;M;", "homepage": "https://zexihuang.com/;https://joaopedromattos.github.io/;https://www.mertkosan.com;https://cs.rice.edu/~al110/index.html;", "dblp": "299/4829;249/6600;304/8019;19/2546;", "google_scholar": "56TmFA4AAAAJ;4fBin7kAAAAJ;12lDpTAAAAAJ;atGtis4AAAAJ;", "orcid": "0000-0002-1480-4494;0000-0002-6877-1608;0000-0002-8092-5024;0000-0003-1792-0076;", "linkedin": "zexihuang/;joaopedromattos/;https://linkedin.com/in/mertkosan;;", "or_profile": "~Zexi_Huang1;~Joao_Pedro_Rodrigues_Mattos1;~Mert_Kosan1;~Arlei_Lopes_da_Silva1;~Ambuj_Singh1", "aff": "UC Santa Barbara;Rice University;VISA;Rice University;", "aff_domain": "ucsb.edu;rice.edu;visa.com;rice.edu;", "position": "PhD student;PhD student;Researcher;Assistant Professor;", "bibtex": "@misc{\nhuang2024attributeenhanced,\ntitle={Attribute-Enhanced Similarity Ranking for Sparse Link Prediction},\nauthor={Zexi Huang and Joao Pedro Rodrigues Mattos and Mert Kosan and Arlei Lopes da Silva and Ambuj Singh},\nyear={2024},\nurl={https://openreview.net/forum?id=1p4q1cXOX9}\n}", "github": "", "project": "", "reviewers": "554z;6oyf;LQFc;4LRj", "site": "https://openreview.net/forum?id=1p4q1cXOX9", "pdf_size": 3036908, "rating": "3;3;3;5", "confidence": "4;4;4;4", "soundness": "2;1;2;3", "contribution": "2;1;2;2", "presentation": "3;2;3;3", "wc_summary": "58;32;158;86", "wc_strengths": "22;1;67;409", "wc_weaknesses": "146;157;146;131", "wc_questions": "3;1;120;110", "wc_review": "229;191;491;736", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "481;426;422;500", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.5, 47.06113045816048 ], "wc_strengths_avg": [ 124.75, 165.83481992633514 ], "wc_weaknesses_avg": [ 145.0, 9.246621004453464 ], "wc_questions_avg": [ 58.5, 56.61492736019362 ], "wc_review_avg": [ 411.75, 219.96974223742683 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 457.25, 33.9512518178638 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1DqsmIPgAvMJ:scholar.google.com/&scioq=Attribute-Enhanced+Similarity+Ranking+for+Sparse+Link+Prediction&hl=en&as_sdt=0,24", "gs_version_total": 3, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of California, Santa Barbara;Rice University;VISA", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsb.edu;https://www.rice.edu;https://www.visa.com", "aff_unique_abbr": "UCSB;Rice;VISA", "aff_campus_unique_index": "0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "ConjNorm: Tractable Density Estimation for Out-of-Distribution Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19568", "id": "1pSL2cXWoz", "author_site": "Bo Peng, Yadan Luo, Yonggang Zhang, Yixuan Li, Zhen Fang", "tldr": "", "abstract": "Post-hoc out-of-distribution (OOD) detection has garnered intensive attention in reliable machine learning. Many efforts have been dedicated to deriving score functions based on logits, distances, or rigorous data distribution assumptions to identify low-scoring OOD samples. Nevertheless, these estimate scores may fail to accurately reflect the true data density or impose impractical constraints. To provide a unified perspective on density-based score design, we propose a novel theoretical framework grounded in Bregman divergence, which extends distribution considerations to encompass an exponential family of distributions. Leveraging the conjugation constraint revealed in our theorem, we introduce a \\textsc{ConjNorm} method, reframing density function design as a search for the optimal norm coefficient $p$ against the given dataset. In light of the computational challenges of normalization, we devise an unbiased and analytically tractable estimator of the partition function using the Monte Carlo-based importance sampling technique. Extensive experiments across OOD detection benchmarks empirically demonstrate that our proposed \\textsc{ConjNorm} has established a new state-of-the-art in a variety of OOD detection setups, outperforming the current best method by up to 13.25\\% and 28.19\\% (FPR95) on CIFAR-100 and ImageNet-1K, respectively.", "keywords": "Density-based OOD;Importance Sampling;Tractable density estimation", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/1fcdd42748538b321b841769b27d2f20625a614e.zip", "author": "Bo Peng;Yadan Luo;Yonggang Zhang;Yixuan Li;Zhen Fang", "authorids": "~Bo_Peng24;~Yadan_Luo1;~Yonggang_Zhang1;~Yixuan_Li1;~Zhen_Fang2", "gender": ";F;M;F;M", "homepage": ";https://sites.google.com/view/yadanluo/home;https://yonggangzhangben.github.io/index.html;http://pages.cs.wisc.edu/~sharonli/;https://fang-zhen.github.io/index.html", "dblp": ";182/2414;27/6859-3;144/6087-1;", "google_scholar": ";3IfL11AAAAAJ;XSbEr98AAAAJ;https://scholar.google.com/citations?hl=en;OzD6WJcAAAAJ", "orcid": ";0000-0001-6272-2971;0000-0002-4080-7592;;0000-0003-0602-6255", "linkedin": ";;;liyixuan;", "or_profile": "~Bo_Peng24;~Yadan_Luo1;~Yonggang_Zhang1;~Yixuan_Li1;~Zhen_Fang2", "aff": ";The University of Queensland;Hong Kong Baptist University;Cornell University;University of Technology Sydney", "aff_domain": ";uq.edu.au;hkbu.edu.hk;cornell.edu;uts.edu.au", "position": ";Assistant Professor;Postdoc;Graduate Student;Assistant Professor", "bibtex": "@inproceedings{\npeng2024conjnorm,\ntitle={ConjNorm: Tractable Density Estimation for Out-of-Distribution Detection},\nauthor={Bo Peng and Yadan Luo and Yonggang Zhang and Yixuan Li and Zhen Fang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1pSL2cXWoz}\n}", "github": "", "project": "", "reviewers": "MqR8;LYsh;EBSh;WvTp", "pdf_size": 2561923, "rating": "5;6;6;8", "confidence": "4;3;2;4", "soundness": "3;3;2;3", "contribution": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "138;69;76;88", "wc_strengths": "59;143;60;54", "wc_weaknesses": "58;171;44;280", "wc_questions": "84;68;4;96", "wc_review": "339;451;184;518", "wc_reply_reviewers": "142;0;0;0", "wc_reply_authors": "976;1015;378;783", "reply_reviewers": "1;0;0;0", "reply_authors": "4;4;3;3", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.75, 26.994212342648563 ], "wc_strengths_avg": [ 79.0, 37.02026472082554 ], "wc_weaknesses_avg": [ 138.25, 95.51014344036973 ], "wc_questions_avg": [ 63.0, 35.482389998420345 ], "wc_review_avg": [ 373.0, 126.47727068528954 ], "wc_reply_reviewers_avg": [ 35.5, 61.48780366869514 ], "wc_reply_authors_avg": [ 788.0, 252.4866333095675 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.20751433915982243, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14808389682740397945&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=1pSL2cXWoz", "pdf": "https://openreview.net/pdf?id=1pSL2cXWoz", "email": ";uq.edu.au;hkbu.edu.hk;cornell.edu;uts.edu.au", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Queensland;Hong Kong Baptist University;Cornell University;University of Technology Sydney", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uq.edu.au;https://www.hkbu.edu.hk;https://www.cornell.edu;https://www.uts.edu.au", "aff_unique_abbr": "UQ;HKBU;Cornell;UTS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Australia;China;United States" }, { "id": "1pTlvxIfuV", "title": "A Reparameterized Discrete Diffusion Model for Text Generation", "track": "main", "status": "Reject", "tldr": "", "abstract": "This work studies discrete diffusion probabilistic models with applications to natural language generation. We derive an alternative yet equivalent formulation of the sampling from discrete diffusion processes and leverage this insight to develop a family of reparameterized discrete diffusion models. The derived generic framework is highly flexible, offers a fresh perspective of the generation process in discrete diffusion models, and features more effective training and decoding techniques. We conduct extensive experiments to evaluate the text generation capability of our model, demonstrating significant improvements over existing diffusion models.", "keywords": "discrete diffusion;text generation;non-autoregressive generation", "primary_area": "generative models", "supplementary_material": "", "author": "Lin Zheng;Jianbo Yuan;Lei Yu;Lingpeng Kong", "authorids": "~Lin_Zheng1;~Jianbo_Yuan1;~Lei_Yu4;~Lingpeng_Kong1", "gender": "M;M;F;M", "homepage": "https://lzhengisme.github.io/;;;https://ikekonglp.github.io/", "dblp": ";134/6790;https://dblp.uni-trier.de/pid/01/2775-0008;144/7656", "google_scholar": "3NXH0t8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=gX5JBc4AAAAJ;f1hBi5wAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lin_Zheng1;~Jianbo_Yuan1;~Lei_Yu4;~Lingpeng_Kong1", "aff": "The University of Hong Kong;Bytedance;Google DeepMind;Department of Computer Science, The University of Hong Kong", "aff_domain": "hku.hk;bytedance.com;deepmind.com;cs.hku.hk", "position": "PhD student;Researcher;Research Scientist;Assistant Professor", "bibtex": "@misc{\nzheng2024a,\ntitle={A Reparameterized Discrete Diffusion Model for Text Generation},\nauthor={Lin Zheng and Jianbo Yuan and Lei Yu and Lingpeng Kong},\nyear={2024},\nurl={https://openreview.net/forum?id=1pTlvxIfuV}\n}", "github": "", "project": "", "reviewers": "7X9e;shFm;uT11;Z8Rn", "site": "https://openreview.net/forum?id=1pTlvxIfuV", "pdf_size": 552481, "rating": "3;5;6;8", "confidence": "4;4;5;4", "soundness": "3;3;4;4", "contribution": "2;2;3;4", "presentation": "2;3;4;4", "wc_summary": "69;58;166;171", "wc_strengths": "31;49;84;47", "wc_weaknesses": "119;402;190;35", "wc_questions": "90;7;100;32", "wc_review": "309;516;540;285", "wc_reply_reviewers": "0;195;0;0", "wc_reply_authors": "215;909;603;307", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 116.0, 52.673522760491345 ], "wc_strengths_avg": [ 52.75, 19.343926695477318 ], "wc_weaknesses_avg": [ 186.5, 135.9788586508947 ], "wc_questions_avg": [ 57.25, 38.931831449342326 ], "wc_review_avg": [ 412.5, 116.12170339777143 ], "wc_reply_reviewers_avg": [ 48.75, 84.43747686898277 ], "wc_reply_authors_avg": [ 508.5, 272.06387117733954 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.16012815380508713, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7982020371199540857&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Hong Kong;ByteDance;Google", "aff_unique_dep": ";;Google DeepMind", "aff_unique_url": "https://www.hku.hk;https://www.bytedance.com;https://deepmind.com", "aff_unique_abbr": "HKU;Bytedance;DeepMind", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "1qDRwhe379", "title": "Refining Corpora from a Model Calibration Perspective for Chinese Spelling Correction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Chinese Spelling Correction (CSC) commonly lacks large-scale high-quality corpora, due to the labor-intensive labeling of spelling errors in real-life human writing or typing scenarios. Two data augmentation methods are widely adopted: (1) Random Replacement with the guidance of confusion sets and (2) OCR/ASR-based Generation that simulates character misusing. However, both methods inevitably introduce noisy data (e.g., false spelling errors), potentially leading to over-correction. By carefully analyzing the two types of corpora, we find that though the latter achieves more robust generalization performance, the former yields better-calibrated CSC models. We then provide a theoretical analysis of this empirical observation, based on which a corpus refining strategy is proposed. Specifically, OCR/ASR-based data samples are fed into a well-calibrated CSC model trained on random replacement-based corpora and then filtered based on prediction confidence. By learning a simple BERT-based model on the refined OCR/ASR-based corpus, we set up state-of-the-art performance on three widely-used benchmarks, while significantly alleviating over-correction (e.g., lowering false positive predictions).", "keywords": "Chinese Spelling Correction;Data Augmentation;Model Calibration;Corpus Refining Strategy.", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Yang An;Dingyao Yu;Wei Ye;xiongfeng xiao;Shaoguang Mao;Tao Ge;Shikun Zhang", "authorids": "~Yang_An4;~Dingyao_Yu1;~Wei_Ye2;~xiongfeng_xiao1;~Shaoguang_Mao1;~Tao_Ge1;~Shikun_Zhang2", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/Anychnn;;https://se.pku.edu.cn/kcl/weiye/;https://github.com/xiaoxiongfeng;https://www.linkedin.com/in/shaoguang-mao-929733120/;https://getao.github.io/;", "dblp": ";337/0563.html;09/5394-4;;214/0365;136/7923;83/3715.html", "google_scholar": ";Loc17FgAAAAJ;RgLGFMIAAAAJ;;S6XnZsQAAAAJ;LYbs7Q8AAAAJ;uiklLscAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Yang_An4;~Dingyao_Yu1;~Wei_Ye2;~xiongfeng_xiao1;~Shaoguang_Mao1;~Tao_Ge1;~Shikun_Zhang2", "aff": ";Peking University;Peking University;;Microsoft;Tencent AI Lab;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn;;microsoft.com;tencent.com;pku.edu.cn", "position": ";MS student;Associate Professor;;Researcher;Principal Researcher;Full Professor", "bibtex": "@misc{\nan2024refining,\ntitle={Refining Corpora from a Model Calibration Perspective for Chinese Spelling Correction},\nauthor={Yang An and Dingyao Yu and Wei Ye and xiongfeng xiao and Shaoguang Mao and Tao Ge and Shikun Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=1qDRwhe379}\n}", "github": "", "project": "", "reviewers": "DKJd;65Mo;7Y3U", "site": "https://openreview.net/forum?id=1qDRwhe379", "pdf_size": 555516, "rating": "5;6;6", "confidence": "2;4;2", "soundness": "3;3;3", "contribution": "2;3;2", "presentation": "3;3;3", "wc_summary": "26;120;123", "wc_strengths": "10;107;157", "wc_weaknesses": "305;82;150", "wc_questions": "49;51;53", "wc_review": "390;360;483", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "432;176;433", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.66666666666667, 45.03578823804711 ], "wc_strengths_avg": [ 91.33333333333333, 61.02640594220032 ], "wc_weaknesses_avg": [ 179.0, 93.32023717643814 ], "wc_questions_avg": [ 51.0, 1.632993161855452 ], "wc_review_avg": [ 411.0, 52.364109846344185 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 347.0, 120.91594876883143 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=132761082016325069&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Peking University;Microsoft;Tencent", "aff_unique_dep": ";Microsoft Corporation;Tencent AI Lab", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com;https://ai.tencent.com", "aff_unique_abbr": "Peking U;Microsoft;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "1qzUPE5QDZ", "title": "Rectifying Group Irregularities in Explanations for Distribution Shift", "track": "main", "status": "Reject", "tldr": "", "abstract": "It is well-known that real-world changes constituting distribution shift adversely affect model performance.\nHow to characterize those changes in an interpretable manner is poorly understood.\nExisting techniques take the form of shift explanations that elucidate how samples map from the original distribution toward the shifted one by reducing the disparity between the two distributions.\nHowever, these methods can introduce group irregularities, leading to explanations that are less feasible and robust. \nTo address these issues, we propose Group-aware Shift Explanations (GSE), an explanation method that leverages worst-group optimization to rectify group irregularities.\nWe demonstrate that GSE not only maintains group structures, but can improve feasibility and robustness over a variety of domains by up to 20% and 25% respectively.", "keywords": "explainability;distribution shift;group robust", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "/attachment/9e67dcd2c921a5ce2db0b0031ee9d132423b5ff2.zip", "author": "Adam Stein;Yinjun Wu;Eric Wong;Mayur Naik", "authorids": "~Adam_Stein2;~Yinjun_Wu1;~Eric_Wong1;~Mayur_Naik1", "gender": "M;M;M;M", "homepage": "https://www.seas.upenn.edu/~steinad/;https://wuyinjun-1993.github.io/;http://riceric22.github.io/;http://www.cis.upenn.edu/~mhnaik/", "dblp": "217/4482;169/1054;64/1811-1.html;92/6794", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;pWnTMRkAAAAJ;https://scholar.google.com.tw/citations?user=fmsV6nEAAAAJ", "orcid": "0000-0003-1887-100X;;;", "linkedin": "adam-stein-086135241/;;;ai4code/", "or_profile": "~Adam_Stein2;~Yinjun_Wu1;~Eric_Wong1;~Mayur_Naik1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "seas.upenn.edu;seas.upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;Postdoc;Assistant Professor;Professor", "bibtex": "@misc{\nstein2024rectifying,\ntitle={Rectifying Group Irregularities in Explanations for Distribution Shift},\nauthor={Adam Stein and Yinjun Wu and Eric Wong and Mayur Naik},\nyear={2024},\nurl={https://openreview.net/forum?id=1qzUPE5QDZ}\n}", "github": "", "project": "", "reviewers": "AAG5;LTxx;3qo2;XuMW", "site": "https://openreview.net/forum?id=1qzUPE5QDZ", "pdf_size": 11872203, "rating": "5;5;5;6", "confidence": "2;4;2;3", "soundness": "3;2;3;2", "contribution": "2;2;2;3", "presentation": "2;3;3;2", "wc_summary": "37;74;58;98", "wc_strengths": "9;56;36;141", "wc_weaknesses": "89;212;155;310", "wc_questions": "4;12;75;141", "wc_review": "139;354;324;690", "wc_reply_reviewers": "0;0;375;18", "wc_reply_authors": "474;379;1916;1138", "reply_reviewers": "0;0;3;1", "reply_authors": "2;2;5;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 66.75, 22.30891077574161 ], "wc_strengths_avg": [ 60.5, 49.378639106399035 ], "wc_weaknesses_avg": [ 191.5, 81.08791525252083 ], "wc_questions_avg": [ 58.0, 55.24943438624508 ], "wc_review_avg": [ 376.75, 198.71509127391408 ], "wc_reply_reviewers_avg": [ 98.25, 159.9505783046751 ], "wc_reply_authors_avg": [ 976.75, 616.0874024844202 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11382361820116394572&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "1rgMkDWfYV", "title": "Cleaning label noise with vision-language models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Current mainstream methods for learning with noisy labels often rely on sample selection, such as the common 'small-loss' strategy that considers samples with smaller losses as clean. Following this, most research focuses on developing more robust sample selection strategies. However, they are still influenced by problems such as the 'self-confirmation bias', which stems from their reliance on the in-training model. Furthermore, relying solely on visual information for sample selection can introduce biases and challenges, such as the common issue of 'hard noise', where samples are erroneously labeled as semantically similar categories.\nTo address these challenges, this paper proposes using the popular vision-language model CLIP for sample selection. Leveraging CLIP, a pre-trained model, can effectively mitigate self-confirmation bias. Additionally, CLIP's distinctive language modality supplements potential biases introduced by relying solely on visual information for sample selection.\nSpecifically, we introduce the \\textit{CLIPSelector}, which utilizes both the CLIP's zero-shot classifier and an easily-inducible classifier based on its vision encoder and noisy labels for sample selection. We theoretically and empirically demonstrate the unique advantages of the \\textit{CLIPSelector}.\nTo evaluate its effectiveness on existing benchmarks, we further introduce a semi-supervised learning method called \\textit{MixFix}, tailored for noisy datasets. \\textit{MixFix} leverages the subset selected by the \\textit{CLIPSelector} and gradually introduces missing clean samples and re-labeled noisy samples based on different thresholds.\nIn comparison to current hybrid methods involving iterative sample selection and multiple off-the-shelf techniques like model co-training, our approach simplifies the process. \nNonetheless, our approach achieves competitive or superior performance across various benchmarks, including datasets with synthetic and real-world noise. \nCode will be released upon acceptance.", "keywords": "Label noise;Sample selection", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/37b4a7fe849b3293f918e12b6df4a60700f12576.zip", "author": "Chen Feng;Ioannis Patras", "authorids": "~Chen_Feng3;~Ioannis_Patras2", "gender": "M;M", "homepage": "https://mrchenfeng.github.io/;http://www.eecs.qmul.ac.uk/~ioannisp/", "dblp": ";18/1556", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=OBYLxRkAAAAJ", "orcid": "0000-0001-9199-559X;0000-0003-3913-4738", "linkedin": "drchenfeng/;ioannis-patras-1053767/", "or_profile": "~Chen_Feng3;~Ioannis_Patras2", "aff": "Queen Mary University London;Queen Mary, University of London", "aff_domain": "qmul.ac.uk;qmul.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@misc{\nfeng2024cleaning,\ntitle={Cleaning label noise with vision-language models},\nauthor={Chen Feng and Ioannis Patras},\nyear={2024},\nurl={https://openreview.net/forum?id=1rgMkDWfYV}\n}", "github": "", "project": "", "reviewers": "GWvS;tEQg;Vnif;s2nG", "site": "https://openreview.net/forum?id=1rgMkDWfYV", "pdf_size": 2218801, "rating": "3;5;5;5", "confidence": "3;5;4;4", "soundness": "2;3;3;3", "contribution": "3;3;2;3", "presentation": "1;3;3;4", "wc_summary": "100;95;53;40", "wc_strengths": "83;56;21;19", "wc_weaknesses": "250;152;57;56", "wc_questions": "209;5;42;9", "wc_review": "642;308;173;124", "wc_reply_reviewers": "0;270;0;27", "wc_reply_authors": "652;1926;457;521", "reply_reviewers": "0;4;0;1", "reply_authors": "2;6;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 72.0, 25.971137826441105 ], "wc_strengths_avg": [ 44.75, 26.536531423680827 ], "wc_weaknesses_avg": [ 128.75, 80.12919255801846 ], "wc_questions_avg": [ 66.25, 83.65815859795146 ], "wc_review_avg": [ 311.75, 202.22558567105202 ], "wc_reply_reviewers_avg": [ 74.25, 113.55257592850987 ], "wc_reply_authors_avg": [ 889.0, 602.8237719267547 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1HbnjG1wCfMJ:scholar.google.com/&scioq=Cleaning+label+noise+with+vision-language+models&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Queen Mary University of London;Queen Mary, University of London", "aff_unique_dep": ";", "aff_unique_url": "https://www.qmul.ac.uk;https://www.qmul.ac.uk", "aff_unique_abbr": "QMUL;QMUL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "1tDoI2WBGE", "title": "A Neural Sandbox Framework for Discovering Spurious Concpets in LLM Decisions", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce a neural sandbox framework for text classification via self-referencing defined label concepts from an Large Language Model(LLM). The framework draws inspiration from the define-optimize alignment problem, in which the motivations of a model are described initially and then the model is optimized to align with these predefined objectives. In our case, we design our framework to perform text classification. We take a frozen LLM as a vector embedding generator for text and provide our framework with defined concept words based on the labels along with the input text. We then optimize an operator to classify the input text based on the relevance scores to the concept operator words(cop-words). In our experiments with multiple text classification datasets and LLM models, we find, incorporating our sandbox network generally improves the accuracy by a range of 0.12\\% to 6.31\\% in accuracy and 0.3\\% to 8.82\\% in macro f1 when compared to a baseline. The framework, not only serves as a classification tool but also as a descriptive tool for the model's decision of its prediction, based on the provided cop-words. Through further evaluations involving the injection of \"foreign\" cop-words, we showcase the sandbox framework's capacity to exhibit a coherent understanding of learned concepts and construct methodologies to discover potential spurious behaviors and biases within it. Despite witnessing results confirming our network's ability to capture domain knowledge, we show evidence that the model's secondary incentives do not match human decisions.", "keywords": "Large Language Model;Spurious Corelation;NLP;AI Alignment", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/54f58403f7233d3d325631cb6ae2508c996b3119.zip", "author": "Mostafa Mushsharat;Nabeel Mohammed;Mohammad Ruhul Amin", "authorids": "~Mostafa_Mushsharat1;~Nabeel_Mohammed1;~Mohammad_Ruhul_Amin1", "gender": "M;M;M", "homepage": ";http://ece.northsouth.edu/people/dr-nabeel-mohammed/;https://ruhulsbu.github.io", "dblp": ";127/2798;193/0290.html", "google_scholar": ";https://scholar.google.com.au/citations?hl=en;N_yWGjIAAAAJ", "orcid": ";0000-0002-7661-3570;0000-0001-6540-3415", "linkedin": "mostafa-mushsharat-81965a146/;;shajibsust/", "or_profile": "~Mostafa_Mushsharat1;~Nabeel_Mohammed1;~Mohammad_Ruhul_Amin1", "aff": ";North South University;Fordham University", "aff_domain": ";northsouth.edu;fordham.edu", "position": ";Associate Professor;Assistant Professor", "bibtex": "@misc{\nmushsharat2024a,\ntitle={A Neural Sandbox Framework for Discovering Spurious Concpets in {LLM} Decisions},\nauthor={Mostafa Mushsharat and Nabeel Mohammed and Mohammad Ruhul Amin},\nyear={2024},\nurl={https://openreview.net/forum?id=1tDoI2WBGE}\n}", "github": "", "project": "", "reviewers": "cfUj;57Af;1xDE;hACj", "site": "https://openreview.net/forum?id=1tDoI2WBGE", "pdf_size": 807818, "rating": "1;1;3;3", "confidence": "4;3;3;4", "soundness": "1;1;3;2", "contribution": "2;1;3;1", "presentation": "1;1;3;1", "wc_summary": "109;264;152;72", "wc_strengths": "61;80;57;23", "wc_weaknesses": "220;215;81;84", "wc_questions": "9;20;4;4", "wc_review": "399;579;294;183", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_avg": [ 149.25, 72.04642600434806 ], "wc_strengths_avg": [ 55.25, 20.54720175595694 ], "wc_weaknesses_avg": [ 150.0, 67.53147414354288 ], "wc_questions_avg": [ 9.25, 6.53356717268599 ], "wc_review_avg": [ 363.75, 145.86873379857659 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QlfjTFyS9-IJ:scholar.google.com/&scioq=A+Neural+Sandbox+Framework+for+Discovering+Spurious+Concpets+in+LLM+Decisions&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "North South University;Fordham University", "aff_unique_dep": ";", "aff_unique_url": "https://www.northsouth.edu/;https://www.fordham.edu", "aff_unique_abbr": "NSU;Fordham", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Bangladesh;United States" }, { "title": "MiniGPT-4: Enhancing Vision-Language Understanding with Advanced Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19567", "id": "1tZbq88f27", "author_site": "Deyao Zhu, jun chen, Xiaoqian Shen, Xiang Li, Mohamed Elhoseiny", "tldr": "", "abstract": "The recent GPT-4 has demonstrated extraordinary multi-modal abilities, such as directly generating websites from handwritten text and identifying humorous elements within images. These features are rarely observed in previous vision-language models. However, the technical details behind GPT-4 continue to remain undisclosed.\nWe believe that the enhanced multi-modal generation capabilities of GPT-4 stem from the utilization of sophisticated large language models (LLM). \nTo examine this phenomenon, we present MiniGPT-4, which aligns a frozen visual encoder with a frozen advanced LLM, Vicuna, using one projection layer. \nOur work, for the first time, uncovers that properly aligning the visual features with an advanced large language model can possess numerous advanced multi-modal abilities demonstrated by GPT-4, \nsuch as detailed image description generation and website creation from hand-drawn drafts.\nFurthermore, we also observe other emerging capabilities in MiniGPT-4, including writing stories and poems inspired by given images, teaching users how to cook based on food photos, and so on. \nIn our experiment, we found that the model trained on short image caption pairs could produce unnatural language outputs (e.g., repetition and fragmentation). To address this problem, we curate a detailed image description dataset in the second stage to finetune the model, which consequently improves the model's generation reliability and overall usability.", "keywords": "large language models;vision language models", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Deyao Zhu;Jun Chen;Xiaoqian Shen;Xiang Li;Mohamed Elhoseiny", "authorids": "~Deyao_Zhu1;~Jun_Chen11;~Xiaoqian_Shen3;~Xiang_Li18;~Mohamed_Elhoseiny1", "gender": "M;M;F;M;M", "homepage": "https://tsutikgiau.github.io/;https://junchen14.github.io/;https://xiaoqian-shen.github.io;http://xiangli.ac.cn;http://www.mohamed-elhoseiny.com", "dblp": "251/6017;85/5901-21;197/6114;40/1491-46;125/2894", "google_scholar": "dENNKrsAAAAJ;9G2OQmkAAAAJ;uToGtIwAAAAJ;4Apl5FgAAAAJ;iRBUTOAAAAAJ", "orcid": ";0000-0001-8883-0970;;0000-0002-9946-7000;0000-0001-9659-1551", "linkedin": "deyao-zhu-205774154/;;xiaoqian-shen-759991264;;mohamed-elhoseiny-8a836215/", "or_profile": "~Deyao_Zhu1;~Jun_Chen11;~Xiaoqian_Shen3;~Xiang_Li18;~Mohamed_Elhoseiny1", "aff": "ByteDance Inc.;KAUST;Meta Facebook;King Abdullah University of Science and Technology;KAUST", "aff_domain": "bytedance.com;kaust.edu.sa;meta.com;kaust.edu.sa;kaust.edu.sa", "position": "Researcher;PhD student;Intern;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nzhu2024minigpt,\ntitle={Mini{GPT}-4: Enhancing Vision-Language Understanding with Advanced Large Language Models},\nauthor={Deyao Zhu and Jun Chen and Xiaoqian Shen and Xiang Li and Mohamed Elhoseiny},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1tZbq88f27}\n}", "github": "", "project": "", "reviewers": "TFFX;7oDs;auSX;A8a5", "pdf_size": 3623133, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;2;3;2", "contribution": "4;3;2;3", "presentation": "2;3;3;3", "wc_summary": "81;110;108;85", "wc_strengths": "67;41;81;72", "wc_weaknesses": "92;396;466;84", "wc_questions": "63;61;55;97", "wc_review": "303;608;710;338", "wc_reply_reviewers": "95;0;121;20", "wc_reply_authors": "189;226;1024;474", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.0, 13.095800853708795 ], "wc_strengths_avg": [ 65.25, 14.872373717735847 ], "wc_weaknesses_avg": [ 259.5, 173.29959607569776 ], "wc_questions_avg": [ 69.0, 16.431676725154983 ], "wc_review_avg": [ 489.75, 173.49117412710078 ], "wc_reply_reviewers_avg": [ 59.0, 50.35374861914453 ], "wc_reply_authors_avg": [ 478.25, 333.60034097704397 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2918, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1879282532294332322&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "openreview": "https://openreview.net/forum?id=1tZbq88f27", "pdf": "https://openreview.net/pdf?id=1tZbq88f27", "email": "bytedance.com;kaust.edu.sa;meta.com;kaust.edu.sa;kaust.edu.sa", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "ByteDance;King Abdullah University of Science and Technology;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.bytedance.com;https://www.kaust.edu.sa;https://meta.com", "aff_unique_abbr": "ByteDance;KAUST;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;1", "aff_country_unique": "China;Saudi Arabia;United States" }, { "id": "1uHTIjXjkk", "title": "Potential Based Diffusion Motion Planning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Effective motion planning in high dimensional spaces is a long-standing open problem in robotics. One class of traditional motion planning algorithms corresponds to potential-based motion planning. An advantage of potential based motion planning is composability -- different motion constraints can easily combined by adding corresponding potentials. However, constructing motion paths from potentials requires solving a global optimization across configuration space potential landscape, which is often prone to local minima, causing these approaches to fall out of favor in recent years. We propose a new approach towards learning potential based motion planning, where we train a neural networks to capture and learn an easily optimizable potentials over motion planning trajectories. We illustrate the effectiveness of such approach, significantly outperforming both classical and recent learned motion planning approaches, and illustrate its inherent composability, enabling us to generalize to a multitude of different motion constraints.", "keywords": "Motion Planning;Diffusion Model;Energy-based Model;Compositionality", "primary_area": "generative models", "supplementary_material": "", "author": "Yunhao Luo;Chen Sun;Joshua B. Tenenbaum;Yilun Du", "authorids": "~Yunhao_Luo1;~Chen_Sun1;~Joshua_B._Tenenbaum1;~Yilun_Du1", "gender": "M;M;;", "homepage": "https://devinluo27.github.io/;https://chensun.me;;https://yilundu.github.io", "dblp": ";01/6072-2;t/JoshuaBTenenbaum;204/4379", "google_scholar": ";vQa7heEAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yunhao_Luo1;~Chen_Sun1;~Joshua_B._Tenenbaum1;~Yilun_Du1", "aff": "Brown University;Google;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "brown.edu;google.com;mit.edu;mit.edu", "position": "MS student;Research Scientist;Professor;PhD student", "bibtex": "@misc{\nluo2024potential,\ntitle={Potential Based Diffusion Motion Planning},\nauthor={Yunhao Luo and Chen Sun and Joshua B. Tenenbaum and Yilun Du},\nyear={2024},\nurl={https://openreview.net/forum?id=1uHTIjXjkk}\n}", "github": "", "project": "", "reviewers": "tz1K;PtS9;i3W8;jxc2", "site": "https://openreview.net/forum?id=1uHTIjXjkk", "pdf_size": 8601846, "rating": "1;5;5;5", "confidence": "5;5;3;4", "soundness": "1;2;2;3", "contribution": "1;2;2;2", "presentation": "2;2;2;3", "wc_summary": "323;68;62;53", "wc_strengths": "2;116;13;53", "wc_weaknesses": "2;432;73;183", "wc_questions": "2;71;443;158", "wc_review": "329;687;591;447", "wc_reply_reviewers": "0;11;672;75", "wc_reply_authors": "352;532;1420;1021", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;3;3", "rating_avg": [ 4.0, 1.7320508075688772 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 126.5, 113.57486517711567 ], "wc_strengths_avg": [ 46.0, 44.64862819841165 ], "wc_weaknesses_avg": [ 172.5, 163.11115841658412 ], "wc_questions_avg": [ 168.5, 167.84591147835565 ], "wc_review_avg": [ 513.5, 136.53845612134333 ], "wc_reply_reviewers_avg": [ 189.5, 280.0397293242514 ], "wc_reply_authors_avg": [ 831.25, 418.8862464918131 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6157278685220882083&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Brown University;Google;Massachusetts Institute of Technology", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.brown.edu;https://www.google.com;https://web.mit.edu", "aff_unique_abbr": "Brown;Google;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unified Human-Scene Interaction via Prompted Chain-of-Contacts", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19566", "id": "1vCnDyQkjg", "author_site": "Zeqi Xiao, Tai Wang, Jingbo Wang, Jinkun Cao, Wenwei Zhang, Bo DAI, Dahua Lin, Jiangmiao Pang", "tldr": "", "abstract": "Human-Scene Interaction (HSI) is a vital component of fields like embodied AI and virtual reality. Despite advancements in motion quality and physical plausibility, two pivotal factors, versatile interaction control and the development of a user-friendly interface, require further exploration before the practical application of HSI. This paper presents a unified HSI framework, UniHSI, which supports unified control of diverse interactions through language commands. The framework defines interaction as ``Chain of Contacts (CoC)\", representing steps involving human joint-object part pairs. This concept is inspired by the strong correlation between interaction types and corresponding contact regions. Based on the definition, UniHSI constitutes a Large Language Model (LLM) Planner to translate language prompts into task plans in the form of CoC, and a Unified Controller that turns CoC into uniform task execution. To facilitate training and evaluation, we collect a new dataset named ScenePlan that encompasses thousands of task plans generated by LLMs based on diverse scenarios. Comprehensive experiments demonstrate the effectiveness of our framework in versatile task execution and generalizability to real scanned scenes.", "keywords": "Human-Scene Interaction;Chain-of-Contacts;Unified;LLM", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/108d0d7943fd3252782328d667d2c70d8078786b.pdf", "author": "Zeqi Xiao;Tai Wang;Jingbo Wang;Jinkun Cao;Wenwei Zhang;Bo Dai;Dahua Lin;Jiangmiao Pang", "authorids": "~Zeqi_Xiao2;~Tai_Wang2;~Jingbo_Wang3;~Jinkun_Cao1;~Wenwei_Zhang1;~Bo_Dai2;~Dahua_Lin1;~Jiangmiao_Pang1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://github.com/xizaoqu;https://scholar.google.com/citations?user=GStTsxAAAAAJ&hl=en;https://www.jinkuncao.com;https://zhangwenwei.cn;http://daibo.info/;http://dahua.site;https://oceanpang.github.io/;https://tai-wang.github.io/", "dblp": "344/1615;10/1491-3.html;224/0126;;64/2903-2;53/6088;231/7630;", "google_scholar": ";GStTsxAAAAAJ;xDtTbmQAAAAJ;QDXADSEAAAAJ;https://scholar.google.com.hk/citations?user=KNWTvgEAAAAJ;GMzzRRUAAAAJ;https://scholar.google.com/citations?authuser=0;JmbbZWIAAAAJ", "orcid": ";;;0000-0002-2748-4514;0000-0003-0777-9232;;0000-0002-6711-9319;", "linkedin": ";;;wenweizhang-b9769a124/;;;;%E6%B3%B0-%E7%8E%8B-2b2738147/", "or_profile": "~Zeqi_Xiao2;~Jingbo_Wang3;~Jinkun_Cao1;~Wenwei_Zhang1;~Bo_Dai2;~Dahua_Lin1;~Jiangmiao_Pang1;~Tai_WANG1", "aff": "Nanyang Technological University;Shanghai Artificial Intelligence Laboratory;Carnegie Mellon University;Shanghai AI Laboratory;Shanghai AI Laboratory;The Chinese University of Hong Kong;Shanghai AI Laboratory ;Shanghai AI Laboratory", "aff_domain": "ntu.edu.sg;pjlab.org.cn;andrew.cmu.edu;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn", "position": "PhD student;Researcher;PhD student;Researcher;Scientist;Associate Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nxiao2024unified,\ntitle={Unified Human-Scene Interaction via Prompted Chain-of-Contacts},\nauthor={Zeqi Xiao and Tai Wang and Jingbo Wang and Jinkun Cao and Wenwei Zhang and Bo Dai and Dahua Lin and Jiangmiao Pang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1vCnDyQkjg}\n}", "github": "", "project": "", "reviewers": "YGEs;gSqi;tgmd;JJQ3", "pdf_size": 1428992, "rating": "5;6;8;10", "confidence": "2;3;4;4", "soundness": "2;2;3;3", "contribution": "3;3;3;4", "presentation": "2;4;3;3", "wc_summary": "95;66;157;103", "wc_strengths": "272;43;89;71", "wc_weaknesses": "3;159;379;38", "wc_questions": "292;65;65;148", "wc_review": "662;333;690;360", "wc_reply_reviewers": "0;0;0;29", "wc_reply_authors": "738;471;1090;348", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;2;2", "rating_avg": [ 7.25, 1.920286436967152 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 105.25, 32.89661836724255 ], "wc_strengths_avg": [ 118.75, 89.98437364342767 ], "wc_weaknesses_avg": [ 144.75, 147.10944055362322 ], "wc_questions_avg": [ 142.5, 92.726749107256 ], "wc_review_avg": [ 511.25, 165.32297934649011 ], "wc_reply_reviewers_avg": [ 7.25, 12.55736835487436 ], "wc_reply_authors_avg": [ 661.75, 284.6211297497078 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9028289727756884, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15514585984236712669&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=1vCnDyQkjg", "pdf": "https://openreview.net/pdf?id=1vCnDyQkjg", "email": "ntu.edu.sg;pjlab.org.cn;andrew.cmu.edu;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;3;4;3;3", "aff_unique_norm": "Nanyang Technological University;Shanghai Artificial Intelligence Laboratory;Carnegie Mellon University;Shanghai AI Laboratory;Chinese University of Hong Kong", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ntu.edu.sg;http://www.shailab.org/;https://www.cmu.edu;https://www.shanghai-ai-lab.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "NTU;Shanghai AI Lab;CMU;SAIL;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;1;1;1;1;1", "aff_country_unique": "Singapore;China;United States" }, { "title": "Mastering Memory Tasks with World Models", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19565", "id": "1vDArHJ68h", "author_site": "Mohammad Reza Samsami, Artem Zholus, Janarthanan Rajendran, Sarath Chandar", "tldr": "", "abstract": "Current model-based reinforcement learning (MBRL) agents struggle with long-term dependencies. This limits their ability to effectively solve tasks involving extended time gaps between actions and outcomes, or tasks demanding the recalling of distant observations to inform current actions. To improve temporal coherence, we integrate a new family of state space models (SSMs) in world models of MBRL agents to present a new method, Recall to Imagine (R2I). This integration aims to enhance both long-term memory and long-horizon credit assignment. Through a diverse set of illustrative tasks, we systematically demonstrate that R2I not only establishes a new state-of-the-art for challenging memory and credit assignment RL tasks, such as BSuite and POPGym, but also showcases superhuman performance in the complex memory domain of Memory Maze. At the same time, it upholds comparable performance in classic RL tasks, such as Atari and DMC, suggesting the generality of our method. We also show that R2I is faster than the state-of-the-art MBRL method, DreamerV3, resulting in faster wall-time convergence.", "keywords": "model-based reinforcement learning;state space models;memory in reinforcement learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Mohammad Reza Samsami;Artem Zholus;Janarthanan Rajendran;Sarath Chandar", "authorids": "~Mohammad_Reza_Samsami1;~Artem_Zholus1;~Janarthanan_Rajendran1;~Sarath_Chandar1", "gender": "M;M;;M", "homepage": "https://mrsamsami.github.io;https://artemzholus.github.io/;;http://sarathchandar.in/", "dblp": ";;;45/8542", "google_scholar": "0_tOLp8AAAAJ;zRhDoycAAAAJ;;https://scholar.google.co.in/citations?user=yxWtZLAAAAAJ", "orcid": ";0000-0003-3167-3585;;", "linkedin": "https://linkedin.com/in/mohammadrezasamsami;azholus/;;", "or_profile": "~Mohammad_Reza_Samsami1;~Artem_Zholus1;~Janarthanan_Rajendran1;~Sarath_Chandar1", "aff": "Universit\u00e9 de Montr\u00e9al;\u00c9cole Polytechnique de Montr\u00e9al, Universit\u00e9 de Montr\u00e9al;;\u00c9cole Polytechnique de Montr\u00e9al", "aff_domain": "umontreal.ca;polymtl.ca;;polymtl.ca", "position": "MS student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nsamsami2024mastering,\ntitle={Mastering Memory Tasks with World Models},\nauthor={Mohammad Reza Samsami and Artem Zholus and Janarthanan Rajendran and Sarath Chandar},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1vDArHJ68h}\n}", "github": "", "project": "", "reviewers": "fFhp;pPqM;Emy3", "pdf_size": 2564812, "rating": "6;8;10", "confidence": "5;4;4", "soundness": "3;3;4", "contribution": "2;3;4", "presentation": "4;3;4", "wc_summary": "71;108;52", "wc_strengths": "44;120;139", "wc_weaknesses": "218;489;134", "wc_questions": "64;335;58", "wc_review": "397;1052;383", "wc_reply_reviewers": "156;34;208", "wc_reply_authors": "2099;1657;1009", "reply_reviewers": "1;1;2", "reply_authors": "3;3;4", "rating_avg": [ 8.0, 1.632993161855452 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 77.0, 23.25224003546038 ], "wc_strengths_avg": [ 101.0, 41.04469108991645 ], "wc_weaknesses_avg": [ 280.3333333333333, 151.4823055306754 ], "wc_questions_avg": [ 152.33333333333334, 129.18806274403047 ], "wc_review_avg": [ 610.6666666666666, 312.1221270948637 ], "wc_reply_reviewers_avg": [ 132.66666666666666, 72.92614224146388 ], "wc_reply_authors_avg": [ 1588.3333333333333, 447.6317931316119 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9099273719990278443&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=1vDArHJ68h", "pdf": "https://openreview.net/pdf?id=1vDArHJ68h", "email": "umontreal.ca;polymtl.ca;;polymtl.ca", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;\u00c9cole Polytechnique de Montr\u00e9al", "aff_unique_dep": ";", "aff_unique_url": "https://www.umontreal.ca;https://www.polymtl.ca", "aff_unique_abbr": "UdeM;Polytechnique Montr\u00e9al", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Montr\u00e9al", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "1vI5fqwpRU", "title": "RayE-Sub: Countering Subgraph Degradation via Perfect Reconstruction", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Subgraph learning has dominated most practices of improving the expressive power of Message passing neural networks (MPNNs). Existing subgraph discovery policies can be classified into node-based and partition-based, which both achieve impressive performance in most scenarios. Unfortunately, we observe that there exists a subgraph degradation trap in these two mainstream solutions. This means extracted subgraphs fail to achieve better expression. In this work, we start with an intuitive observation and theoretical analysis to explore subgraph degeneration. We then summarize the limitations of these two subgraph strategies from the perspective of reconstruction ability. To this end, we propose perfect reconstruction principle to realize high-quality subgraph extraction. To achieve this, two affiliated questions should be well-addressed. \\emph{(i) how to ensure the subgraphs possessing with 'perfect' information? (ii) how to guarantee the 'reconstruction' power of obtained subgraphs?} Firstly, we propose a subgraph partition strategy \\emph{Rayleigh-resistance} to extract non-overlap subgraphs by leveraging the graph spectral theory. Secondly, we put forward the Query mechanism to achieve subgraph-level equivariant learning, which guarantees subgraph reconstruction ability. These two parts, \\emph{perfect subgraph partition} and \\emph{equivariant subgraph learning} are seamlessly unified as a novel \\emph{\\underline{Ray}leigh-resistance \\underline{E}quivariant \\underline{Sub}graph learning} architecture (\\emph{\\textbf{RayE-Sub}}). A series of experiments on both synthetic and real datasets demonstrate that our approach can consistently outperform previous MPNNs architectures.", "keywords": "Graph Nerual Network;Subgraph Learning;Reconstruction ability;Expressive power", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/646a29411bb0625757457e4a8d32a566ce170f7b.zip", "author": "Kuo Yang;Zhengyang Zhou;Xu Wang;Pengkun Wang;Limin Li;Yang Wang", "authorids": "~Kuo_Yang2;~Zhengyang_Zhou1;~Xu_Wang16;~Pengkun_Wang1;~Limin_Li3;~Yang_Wang32", "gender": "M;M;M;M;M;M", "homepage": ";http://home.ustc.edu.cn/~zzy0929/Home/;http://home.ustc.edu.cn/~wx309/;http://home.ustc.edu.cn/~pengkun/index.html;http://di.ustc.edu.cn/;http://staff.ustc.edu.cn/~angyan/", "dblp": ";246/8238;181/2815-29;;;", "google_scholar": ";dPElQLUAAAAJ;7hYGPC8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-3346-5130;0000-0003-4728-7347;0000-0002-1492-3477;0000-0002-2680-4563;;0000-0002-6079-7053", "linkedin": "https://www.linkedin.cn/incareer/in/kuo-yang-440a241b4;;;;;", "or_profile": "~Kuo_Yang2;~Zhengyang_Zhou1;~Xu_Wang16;~Pengkun_Wang1;~Limin_Li3;~Yang_Wang32", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Researcher;Associate Researcher;Researcher;PhD student;Associate Professor", "bibtex": "@misc{\nyang2024rayesub,\ntitle={RayE-Sub: Countering Subgraph Degradation via Perfect Reconstruction},\nauthor={Kuo Yang and Zhengyang Zhou and Xu Wang and Pengkun Wang and Limin Li and Yang Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=1vI5fqwpRU}\n}", "github": "", "project": "", "reviewers": "7R4q;NoaS;h1tc", "site": "https://openreview.net/forum?id=1vI5fqwpRU", "pdf_size": 1079790, "rating": "3;3;6", "confidence": "4;4;3", "soundness": "2;1;3", "contribution": "2;2;3", "presentation": "1;1;3", "wc_summary": "86;50;68", "wc_strengths": "30;43;37", "wc_weaknesses": "246;84;169", "wc_questions": "5;614;10", "wc_review": "367;791;284", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_summary_avg": [ 68.0, 14.696938456699069 ], "wc_strengths_avg": [ 36.666666666666664, 5.312459150169743 ], "wc_weaknesses_avg": [ 166.33333333333334, 66.16309814054625 ], "wc_questions_avg": [ 209.66666666666666, 285.9141285226893 ], "wc_review_avg": [ 480.6666666666667, 222.03953601905124 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:utu08KkzN34J:scholar.google.com/&scioq=RayE-Sub:+Countering+Subgraph+Degradation+via+Perfect+Reconstruction&hl=en&as_sdt=0,22", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Directly Fine-Tuning Diffusion Models on Differentiable Rewards", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19564", "id": "1vmSEVL19f", "author_site": "Kevin Clark, Paul Vicol, Kevin Swersky, David Fleet", "tldr": "", "abstract": "We present Direct Reward Fine-Tuning (DRaFT), a simple and effective method for fine-tuning diffusion models to maximize differentiable reward functions, such as scores from human preference models. We first show that it is possible to backpropagate the reward function gradient through the full sampling procedure, and that doing so achieves strong performance on a variety of rewards, outperforming reinforcement learning-based approaches. We then propose more efficient variants of DRaFT: DRaFT-K, which truncates backpropagation to only the last K steps of sampling, and DRaFT-LV, which obtains lower-variance gradient estimates for the case when K=1. We show that our methods work well for a variety of reward functions and can be used to substantially improve the aesthetic quality of images generated by Stable Diffusion 1.4. Finally, we draw connections between our approach and prior work, providing a unifying perspective on the design space of gradient-based fine-tuning algorithms.", "keywords": "diffusion models;preference-based learning", "primary_area": "generative models", "supplementary_material": "", "author": "Kevin Clark;Paul Vicol;Kevin Swersky;David J. Fleet", "authorids": "~Kevin_Clark1;~Paul_Vicol1;~Kevin_Swersky1;~David_J._Fleet1", "gender": "M;;M;M", "homepage": "http://cs.stanford.edu/~kevclark/;http://www.paulvicol.com;http://www.cs.toronto.edu/~kswersky;http://www.cs.toronto.edu/~fleet/index.html", "dblp": ";167/9924;35/9381;07/2099", "google_scholar": "Trk_R8wAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=IrixA8MAAAAJ;https://scholar.google.com.tw/citations?user=njOmQFsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kevin_Clark1;~Paul_Vicol1;~Kevin_Swersky1;~David_J._Fleet1", "aff": "Google;Google;Google Deepmind;Department of Computer Science, University of Toronto", "aff_domain": "google.com;google.com;google.com;cs.toronto.edu", "position": "Researcher;Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nclark2024directly,\ntitle={Directly Fine-Tuning Diffusion Models on Differentiable Rewards},\nauthor={Kevin Clark and Paul Vicol and Kevin Swersky and David J. Fleet},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1vmSEVL19f}\n}", "github": "", "project": "", "reviewers": "LgKe;tWnQ;m5FM;pZzS", "pdf_size": 28948707, "rating": "3;5;8;8", "confidence": "5;4;4;4", "soundness": "3;4;4;3", "contribution": "2;2;3;3", "presentation": "3;4;4;3", "wc_summary": "62;66;109;111", "wc_strengths": "80;138;125;123", "wc_weaknesses": "131;76;121;79", "wc_questions": "61;20;210;1", "wc_review": "334;300;565;314", "wc_reply_reviewers": "0;0;117;5", "wc_reply_authors": "636;371;793;94", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 87.0, 23.054283766797006 ], "wc_strengths_avg": [ 116.5, 21.84605227495348 ], "wc_weaknesses_avg": [ 101.75, 24.529319191530774 ], "wc_questions_avg": [ 73.0, 82.01524248577212 ], "wc_review_avg": [ 378.25, 108.49510357615223 ], "wc_reply_reviewers_avg": [ 30.5, 49.98249693642766 ], "wc_reply_authors_avg": [ 473.5, 265.9948307768405 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8164965809277261, "gs_citation": 135, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4389730101296429648&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=1vmSEVL19f", "pdf": "https://openreview.net/pdf?id=1vmSEVL19f", "email": "google.com;google.com;google.com;cs.toronto.edu", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Google;DeepMind;University of Toronto", "aff_unique_dep": "Google;DeepMind;Department of Computer Science", "aff_unique_url": "https://www.google.com;https://deepmind.com;https://www.utoronto.ca", "aff_unique_abbr": "Google;DeepMind;U of T", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Mountain View;;Toronto", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "United States;United Kingdom;Canada" }, { "id": "1vqHTUTod9", "title": "Can Language Models be Instructed to Protect Personal Information?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large multimodal language models have proven transformative in numerous applications. \nHowever, these models have been shown to memorize and leak pre-training data, raising serious user privacy and information security concerns.\nWhile data leaks should be prevented, it is also crucial to examine the trade-off between the privacy protection and model utility of proposed approaches.\nIn this paper, we introduce PrivQA --- a multimodal benchmark to assess this privacy/utility trade-off when a model is instructed to protect specific categories of personal information in a simulated scenario.\nWe also propose a technique to iteratively self-moderate responses, which significantly improves privacy.\nHowever, through a series of red-teaming experiments, we find that adversaries can also easily circumvent these protections with simple jailbreaking methods through textual and/or image inputs. \nWe believe PrivQA has the potential to support the development of new models with improved privacy protections, as well as the adversarial robustness of these protections. We release the entire PrivQA dataset at [URL removed for review].", "keywords": "large language model;privacy;safety;redteaming;natural language processing", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Yang Chen;Ethan Adrian Mendes;Sauvik Das;Wei Xu;Alan Ritter", "authorids": "~Yang_Chen10;~Ethan_Adrian_Mendes1;~Sauvik_Das1;~Wei_Xu5;~Alan_Ritter1", "gender": ";;Not Specified;F;M", "homepage": "https://edchengg.github.io/;;https://sauvik.me;https://cocoxu.github.io/;http://aritter.github.io/", "dblp": "48/4792-13;;https://dblp.uni-trier.de/pid/83/8570.html;32/1213-4.html;47/3133", "google_scholar": "o-oBMWEAAAAJ;1SJ4uSgAAAAJ;;BfOdG-oAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yang_Chen10;~Ethan_Adrian_Mendes1;~Sauvik_Das1;~Wei_Xu5;~Alan_Ritter1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Carnegie Mellon University;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;cmu.edu;gatech.edu;gatech.edu", "position": "PhD student;MS student;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@misc{\nchen2024can,\ntitle={Can Language Models be Instructed to Protect Personal Information?},\nauthor={Yang Chen and Ethan Adrian Mendes and Sauvik Das and Wei Xu and Alan Ritter},\nyear={2024},\nurl={https://openreview.net/forum?id=1vqHTUTod9}\n}", "github": "", "project": "", "reviewers": "YNv6;VxoZ;unBh;yBLS", "site": "https://openreview.net/forum?id=1vqHTUTod9", "pdf_size": 11363621, "rating": "3;5;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "contribution": "2;1;3;4", "presentation": "2;3;3;4", "wc_summary": "73;138;148;105", "wc_strengths": "83;49;38;51", "wc_weaknesses": "304;272;47;130", "wc_questions": "2;2;374;95", "wc_review": "462;461;607;381", "wc_reply_reviewers": "0;84;153;55", "wc_reply_authors": "932;983;779;616", "reply_reviewers": "0;1;2;1", "reply_authors": "3;3;2;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.0, 29.487285395573462 ], "wc_strengths_avg": [ 55.25, 16.768646337734005 ], "wc_weaknesses_avg": [ 188.25, 104.59057079871015 ], "wc_questions_avg": [ 118.25, 152.46044569002152 ], "wc_review_avg": [ 477.75, 81.5394843005522 ], "wc_reply_reviewers_avg": [ 73.0, 55.16792546398677 ], "wc_reply_authors_avg": [ 827.5, 143.33963164456645 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=831180042438977110&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Georgia Institute of Technology;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.cmu.edu", "aff_unique_abbr": "Georgia Tech;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MUFFIN: Curating Multi-Faceted Instructions for Improving Instruction Following", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19563", "id": "1vrS1zwekw", "author_site": "Renze Lou, Kai Zhang, Jian Xie, Yuxuan Sun, Jihyun Ahn, Hanzi XU, Yu Su, Wenpeng Yin", "tldr": "", "abstract": "In the realm of large language models (LLMs), enhancing instruction-following capability often involves curating expansive training data. This is achieved through two primary schemes: i) Scaling-Inputs: Amplifying (input, output) pairs per task instruction, aiming for better instruction adherence. ii) Scaling Input-Free Tasks: Enlarging tasks, each composed of an (instruction, output) pair (without requiring a separate input anymore). However, LLMs under Scaling-Inputs tend to be overly sensitive to inputs, leading to misinterpretation or non-compliance with instructions. Conversely, Scaling Input-Free Tasks demands a substantial number of tasks but is less effective in instruction following when dealing with instances in Scaling-Inputs. This work introduces MUFFIN, a new scheme of instruction-following dataset curation. Specifically, we automatically Scale Tasks per Input by diversifying these tasks with various input facets. Experimental results across four zero-shot benchmarks, spanning both Scaling-Inputs and Scaling Input-Free Tasks schemes, reveal that LLMs, at various scales, trained on MUFFIN generally demonstrate superior instruction-following capabilities compared to those trained on the two aforementioned schemes.", "keywords": "Instruction Tuning;Large Language Models;Automatic Data Generation", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Renze Lou;Kai Zhang;Jian Xie;Yuxuan Sun;Janice Ahn;Hanzi Xu;Yu Su;Wenpeng Yin", "authorids": "~Renze_Lou1;~Kai_Zhang10;~Jian_Xie3;~Yuxuan_Sun3;~Janice_Ahn1;~Hanzi_Xu1;~Yu_Su2;~Wenpeng_Yin1", "gender": "M;M;M;M;F;F;M;", "homepage": "https://renzelou.github.io/;https://drogozhang.github.io;;;;;http://ysu1989.github.io;http://wenpengyin.org/", "dblp": "296/4744;55/957-33;;;;229/0397;38/1070-1;117/7310-1", "google_scholar": "GVTbSPMAAAAJ;sDnAIsgAAAAJ;;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;rIh5OqoAAAAJ;mRg16LkAAAAJ", "orcid": "0000-0002-3273-0097;;0009-0000-2867-4726;;;;;", "linkedin": "renze-lou-b681b51a0/;kai-zhang-43774b196/;;;jihyun-ahn-4b6037225;hanzixu/;;", "or_profile": "~Renze_Lou1;~Kai_Zhang10;~Jian_Xie3;~Yuxuan_Sun3;~Janice_Ahn1;~Hanzi_Xu1;~Yu_Su2;~Wenpeng_Yin1", "aff": "SalesForce.com;Google DeepMind;Fudan University;Zhejiang University;Pennsylvania State University;Temple University;Microsoft;Pennsylvania State University", "aff_domain": "salesforce.com;google.com;fudan.edu.cn;zju.edu.cn;psu.edu;temple.edu;microsoft.com;psu.edu", "position": "Intern;Student Researcher;MS student;PhD student;PhD student;PhD student;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\nlou2024muffin,\ntitle={{MUFFIN}: Curating Multi-Faceted Instructions for Improving Instruction Following},\nauthor={Renze Lou and Kai Zhang and Jian Xie and Yuxuan Sun and Janice Ahn and Hanzi Xu and Yu Su and Wenpeng Yin},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=1vrS1zwekw}\n}", "github": "", "project": "", "reviewers": "cEte;od6V;j2qL;BsoU", "pdf_size": 1514572, "rating": "5;6;6;8", "confidence": "4;3;2;4", "soundness": "3;3;3;3", "contribution": "3;2;3;3", "presentation": "3;3;3;2", "wc_summary": "14;140;73;139", "wc_strengths": "28;97;82;56", "wc_weaknesses": "306;171;137;332", "wc_questions": "16;188;43;2", "wc_review": "364;596;335;529", "wc_reply_reviewers": "222;34;15;161", "wc_reply_authors": "2189;2071;917;947", "reply_reviewers": "1;1;1;1", "reply_authors": "5;5;3;4", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.5, 52.33784481615574 ], "wc_strengths_avg": [ 65.75, 26.271419832205492 ], "wc_weaknesses_avg": [ 236.5, 83.87639715676872 ], "wc_questions_avg": [ 62.25, 74.0823022050476 ], "wc_review_avg": [ 456.0, 109.58330164765069 ], "wc_reply_reviewers_avg": [ 108.0, 86.50144507463445 ], "wc_reply_authors_avg": [ 1531.0, 600.5447527037431 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.25, 0.82915619758885 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.20751433915982243, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1415875881970261133&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=1vrS1zwekw", "pdf": "https://openreview.net/pdf?id=1vrS1zwekw", "email": "salesforce.com;google.com;fudan.edu.cn;zju.edu.cn;psu.edu;temple.edu;microsoft.com;psu.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5;6;4", "aff_unique_norm": "Salesforce;Google;Fudan University;Zhejiang University;Pennsylvania State University;Temple University;Microsoft", "aff_unique_dep": ";Google DeepMind;;;;;Microsoft Corporation", "aff_unique_url": "https://www.salesforce.com;https://deepmind.com;https://www.fudan.edu.cn;https://www.zju.edu.cn;https://www.psu.edu;https://www.temple.edu;https://www.microsoft.com", "aff_unique_abbr": "Salesforce;DeepMind;Fudan;ZJU;PSU;Temple;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;0;0;0;0", "aff_country_unique": "United States;United Kingdom;China" }, { "id": "1xVDGGr6t6", "title": "Online Continual Learning via Pursuing Class-conditional Funtion", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Online continual learning is a challenging problem where models must learn from a non-stationary data stream while avoiding catastrophic forgetting. Inter-class imbalance during training has been identified as a major cause of forgetting, leading to model prediction bias towards recently learned classes. In this paper, we theoretically analyze that inter-class imbalance is entirely attributed to imbalanced class-priors, and the class-conditional function learned from intra-class distributions is the Bayes-optimal classifier. Accordingly, we present that a simple adjustment of model logits during training can effectively resist prior class bias and grasp the corresponding Bayes-optimum. Our method mitigates the impact of inter-class imbalance not only in class-incremental but also in realistic general setups by eliminating class-priors and pursuing class-conditionals, with minimal additional computational cost. We thoroughly evaluate our approach on various benchmarks and demonstrate significant performance improvements compared to prior arts. For example, our approach improves the best baseline by 4.6\\% on CIFAR10.", "keywords": "Online Continual Learning;Class-incremental Learning;Inter-class Imbalance", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/62971f4229ab1e92724f38db5b1044f1a3397b71.zip", "author": "Zhehao Huang;Tao Li;Chenhe Yuan;Yingwen Wu;Xiaolin Huang", "authorids": "~Zhehao_Huang1;~Tao_Li12;~Chenhe_Yuan1;~Yingwen_Wu1;~Xiaolin_Huang1", "gender": "M;M;M;F;M", "homepage": "https://github.com/K1nght;https://nblt.github.io/;https://github.com/vernunft2;https://github.com/snowien;http://www.pami.sjtu.edu.cn/en/xiaolin", "dblp": "258/1555;;;236/4329;61/2227", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=PcJzfBEAAAAJ;DR-gBcEAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zhehao_Huang1;~Tao_Li12;~Chenhe_Yuan1;~Yingwen_Wu1;~Xiaolin_Huang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu;sjtu.edu.cn;sjtu.edu;sjtu.edu.cn", "position": "PhD student;PhD student;Undergrad student;PhD student;Full Professor", "bibtex": "@misc{\nhuang2024online,\ntitle={Online Continual Learning via Pursuing Class-conditional Funtion},\nauthor={Zhehao Huang and Tao Li and Chenhe Yuan and Yingwen Wu and Xiaolin Huang},\nyear={2024},\nurl={https://openreview.net/forum?id=1xVDGGr6t6}\n}", "github": "", "project": "", "reviewers": "Y84H;6ngC;MqmM;3g12", "site": "https://openreview.net/forum?id=1xVDGGr6t6", "pdf_size": 2274700, "rating": "3;3;5;5", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "contribution": "2;2;3;2", "presentation": "3;2;2;3", "wc_summary": "50;75;80;54", "wc_strengths": "48;29;50;25", "wc_weaknesses": "169;137;131;54", "wc_questions": "2;46;54;4", "wc_review": "269;287;315;137", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 64.75, 12.94942083646987 ], "wc_strengths_avg": [ 38.0, 11.113055385446435 ], "wc_weaknesses_avg": [ 122.75, 42.23964370114881 ], "wc_questions_avg": [ 26.5, 23.680160472429236 ], "wc_review_avg": [ 252.0, 68.38859554048467 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7YiFW-lNX-AJ:scholar.google.com/&scioq=Online+Continual+Learning+via+Pursuing+Class-conditional+Funtion&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1xyar0Ko3E", "title": "Efficient Quantization-aware Training with Adaptive Coreset Selection", "track": "main", "status": "Reject", "tldr": "", "abstract": "The expanding model size and computation of deep neural networks (DNNs) have increased the demand for efficient model deployment methods. Quantization-aware training (QAT) is a representative model compression method to leverage redundancy in weights and activations. However, most existing QAT methods require end-to-end training on the entire dataset, which suffers from long training time and high energy costs. Coreset selection, aiming to improve data efficiency utilizing the redundancy of training data, has also been widely used for efficient training. In this work, we propose a new angle through the coreset selection to improve the training efficiency of quantization-aware training. Based on the characteristics of QAT, we propose two metrics: error vector score and disagreement score, to quantify the importance of each sample during training. Guided by these two metrics of importance, we proposed a quantization-aware adaptive coreset selection (ACS) method to select the data for the current training epoch. We evaluate our method on various networks (ResNet-18, MobileNetV2), datasets(CIFAR-100, ImageNet-1K), and under different quantization settings. Compared with previous coreset selection methods, our method significantly improves QAT performance with different dataset fractions. Our method can achieve an accuracy of 68.39\\% of 4-bit quantized ResNet-18 on the ImageNet-1K dataset with only a 10\\% subset, which has an absolute gain of 4.24\\% compared to the random baseline.", "keywords": "Model Compression;Quantization;Coreset Selection", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/cabb13438324c31be45bd5f73ba8d287223cedc8.zip", "author": "Xijie Huang;Zechun Liu;Shih-yang Liu;Kwang-Ting Cheng", "authorids": "~Xijie_Huang1;~Zechun_Liu1;~Shih-yang_Liu1;~Kwang-Ting_Cheng1", "gender": "M;;M;", "homepage": "https://huangowen.github.io/;;https://vsdl.hkust.edu.hk/people.html;", "dblp": "230/4412;;;", "google_scholar": "nFW2mqwAAAAJ;;eBXRoDgAAAAJ;", "orcid": ";;0000-0003-1997-0843;", "linkedin": "huang-xijie-4224371b8/;;;", "or_profile": "~Xijie_Huang1;~Zechun_Liu1;~Shih-yang_Liu1;~Kwang-Ting_Cheng1", "aff": "Microsoft Research;;NVIDIA;", "aff_domain": "microsoft.com;;nvidia.com;", "position": "Intern;;Intern;", "bibtex": "@misc{\nhuang2024efficient,\ntitle={Efficient Quantization-aware Training with Adaptive Coreset Selection},\nauthor={Xijie Huang and Zechun Liu and Shih-yang Liu and Kwang-Ting Cheng},\nyear={2024},\nurl={https://openreview.net/forum?id=1xyar0Ko3E}\n}", "github": "", "project": "", "reviewers": "ALc7;iCtg;NUL5", "site": "https://openreview.net/forum?id=1xyar0Ko3E", "pdf_size": 1010324, "rating": "3;3;6", "confidence": "3;5;4", "soundness": "2;2;3", "contribution": "2;1;3", "presentation": "2;3;3", "wc_summary": "50;133;64", "wc_strengths": "11;80;77", "wc_weaknesses": "38;59;176", "wc_questions": "15;37;2", "wc_review": "114;309;319", "wc_reply_reviewers": "624;0;0", "wc_reply_authors": "1693;1655;909", "reply_reviewers": "2;0;0", "reply_authors": "3;3;2", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 82.33333333333333, 36.279777042068794 ], "wc_strengths_avg": [ 56.0, 31.843366656181317 ], "wc_weaknesses_avg": [ 91.0, 60.71243694664216 ], "wc_questions_avg": [ 18.0, 14.445299120013633 ], "wc_review_avg": [ 247.33333333333334, 94.36925111261378 ], "wc_reply_reviewers_avg": [ 208.0, 294.15642097360376 ], "wc_reply_authors_avg": [ 1419.0, 360.95798462794346 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14481123808402065885&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;NVIDIA", "aff_unique_dep": "Microsoft Research;NVIDIA Corporation", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.nvidia.com", "aff_unique_abbr": "MSR;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "1yll8U12GT", "title": "Enhancing Decision Tree Learning with Deep Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Conventional approaches to (oblique) decision tree construction for classification are greedy in nature. They can fail spectacularly when the true labeling function corresponds to a decision tree whose root node is uncorrelated with the labels (e.g. if the label function is the product of the sign of a collection of linear functions of the input). We define a new figure of merit to capture the usefulness of a linear function/hyperplane in a decision tree that is applicable even in scenarios where greedy procedures fail. We devise a novel deep neural network architecture that is very effective at seeking out hyperplanes/half-spaces/features that score highly on this metric. We exploit this property in a subroutine for a new decision tree construction algorithm. The proposed algorithm outperforms all other decision tree construction procedures, especially in situations where the hyper-planes corresponding to the top levels of the true decision tree are not useful features by themselves for classification but are essential for getting to full accuracy. The properties of the deep architecture that we exploit to construct the decision tree are also of independent interest, as they reveal the inner workings of the feature learning mechanism at play in deep neural networks.", "keywords": "Deep Learning;feature learning;oblique decision trees", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/8c414d4e8eac00ddcfde2624a8649dd8f50c7184.pdf", "author": "Prithaj Banerjee;Mahesh Lorik Yadav;Harish Guruprasad Ramaswamy;Chandra Shekar Lakshminarayanan", "authorids": "~Prithaj_Banerjee1;~Mahesh_Lorik_Yadav1;~Harish_Guruprasad_Ramaswamy1;~Chandra_Shekar_Lakshminarayanan2", "gender": "M;M;M;M", "homepage": "https://www.cse.iitm.ac.in/profile.php?arg=Mjc4Mg==;;;https://iitpkd.ac.in/people/cnarayanan", "dblp": "206/7099.html;;126/1729;143/7535", "google_scholar": "rlr99eAAAAAJ;;;", "orcid": ";;;", "linkedin": "prithaj-banerjee-946a3a104/;mahesh-yadav-25779716a/;;", "or_profile": "~Prithaj_Banerjee1;~Mahesh_Lorik_Yadav1;~Harish_Guruprasad_Ramaswamy1;~Chandra_Shekar_Lakshminarayanan2", "aff": "Indian Institute of Technology, Madras;;Indian Institute of Technology Madras,;Indian Institute of Technology, Madras", "aff_domain": "iitm.ac.in;;iitm.ac.in;iitm.ac.in", "position": "MS student;;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nbanerjee2024enhancing,\ntitle={Enhancing Decision Tree Learning with Deep Networks},\nauthor={Prithaj Banerjee and Mahesh Lorik Yadav and Harish Guruprasad Ramaswamy and Chandra Shekar Lakshminarayanan},\nyear={2024},\nurl={https://openreview.net/forum?id=1yll8U12GT}\n}", "github": "", "project": "", "reviewers": "ugFu;cuTu;67Vr", "site": "https://openreview.net/forum?id=1yll8U12GT", "pdf_size": 407376, "rating": "3;3;5", "confidence": "4;5;4", "soundness": "2;2;2", "contribution": "2;1;2", "presentation": "2;2;3", "wc_summary": "34;104;68", "wc_strengths": "34;18;33", "wc_weaknesses": "187;385;221", "wc_questions": "131;13;12", "wc_review": "386;520;334", "wc_reply_reviewers": "0;116;31", "wc_reply_authors": "382;176;209", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 68.66666666666667, 28.58126814696802 ], "wc_strengths_avg": [ 28.333333333333332, 7.3181661333667165 ], "wc_weaknesses_avg": [ 264.3333333333333, 86.44587259602906 ], "wc_questions_avg": [ 52.0, 55.86292748027682 ], "wc_review_avg": [ 413.3333333333333, 78.35531606442255 ], "wc_reply_reviewers_avg": [ 49.0, 49.03740069239668 ], "wc_reply_authors_avg": [ 255.66666666666666, 90.34132805950749 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cgGzNsKDZmMJ:scholar.google.com/&scioq=Enhancing+Decision+Tree+Learning+with+Deep+Networks&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Indian Institute of Technology Madras", "aff_unique_dep": "", "aff_unique_url": "https://www.iitm.ac.in", "aff_unique_abbr": "IIT Madras", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madras", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "1zhM0XkQh0", "title": "ProFeAT: Projected Feature Adversarial Training for Self-Supervised Learning of Robust Representations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Supervised adversarial training has been the most successful approach for improving the robustness of Deep Neural Networks against adversarial attacks. While several recent works have attempted to overcome the need for supervision or labeled training data by integrating adversarial training with contrastive Self-Supervised Learning (SSL) approaches such as SimCLR, their performance has been sub-optimal due to the increased training complexity. A recent approach mitigates this by utilizing supervision from a standard self-supervised trained model in a teacher-student setting that mimics supervised adversarial training. However, we find that there is still a large gap in performance when compared to supervised training, specifically on larger capacity models. We show that this is a result of mismatch in training objectives of the teacher and student, and propose Projected Feature Adversarial Training (ProFeAT) to bridge this gap by using a projection head in the adversarial training step. We further propose appropriate attack and defense losses at the feature and projector spaces, coupled with a combination of weak and strong augmentations for the teacher and student respectively, to improve generalization without increasing the training complexity. We demonstrate significant improvements in performance when compared to existing SSL methods, and performance on par with TRADES, a popular supervised adversarial training method, on several benchmark datasets and models.", "keywords": "Self-supervised Adversarial Training;Adversarial Training;Adversarial Robustness;Contrastive Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/62d78703af11ce5771c088defe34b375d99b2eb0.zip", "author": "Sravanti Addepalli;Priyam Dey;Venkatesh Babu Radhakrishnan", "authorids": "~Sravanti_Addepalli1;~Priyam_Dey1;~Venkatesh_Babu_Radhakrishnan2", "gender": "F;M;M", "homepage": ";;http://cds.iisc.ac.in/faculty/venky", "dblp": "127/7715;;20/6289", "google_scholar": "MOO12i0AAAAJ;YMu3SJ8AAAAJ;cVg7HrEAAAAJ", "orcid": ";0000-0001-5807-1379;0000-0002-1926-1804", "linkedin": "sravanti-addepalli/;priyam-dey33;venkatesh-babu-radhakrishnan-16568939", "or_profile": "~Sravanti_Addepalli1;~Priyam_Dey1;~Venkatesh_Babu_Radhakrishnan2", "aff": "Indian Institute of Science;Indian Institute of Science, Indian institute of science, Bangalore;Indian Institute of Science", "aff_domain": "iisc.ac.in;iisc.ac.in;iisc.ac.in", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\naddepalli2024profeat,\ntitle={ProFe{AT}: Projected Feature Adversarial Training for Self-Supervised Learning of Robust Representations},\nauthor={Sravanti Addepalli and Priyam Dey and Venkatesh Babu Radhakrishnan},\nyear={2024},\nurl={https://openreview.net/forum?id=1zhM0XkQh0}\n}", "github": "", "project": "", "reviewers": "e5nc;eRyw;s59e;q8j3", "site": "https://openreview.net/forum?id=1zhM0XkQh0", "pdf_size": 750361, "rating": "5;6;6;6", "confidence": "4;5;5;4", "soundness": "2;3;3;3", "contribution": "2;3;3;2", "presentation": "2;4;3;3", "wc_summary": "118;29;70;79", "wc_strengths": "42;64;28;31", "wc_weaknesses": "186;143;185;260", "wc_questions": "79;49;48;2", "wc_review": "425;285;331;372", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2011;732;1355;1088", "reply_reviewers": "0;0;0;0", "reply_authors": "3;1;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.0, 31.63068130786942 ], "wc_strengths_avg": [ 41.25, 14.13108276106258 ], "wc_weaknesses_avg": [ 193.5, 42.13371571556442 ], "wc_questions_avg": [ 44.5, 27.518175811634027 ], "wc_review_avg": [ 353.25, 51.60608006814701 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1296.5, 467.99172001222416 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:h4K-al4-3v0J:scholar.google.com/&scioq=ProFeAT:+Projected+Feature+Adversarial+Training+for+Self-Supervised+Learning+of+Robust+Representations&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Indian Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.iisc.ac.in", "aff_unique_abbr": "IISc", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bangalore", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "1zt8GWZ9sc", "title": "Quack: Automatic Jailbreaking Large Language Models via Role-playing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large Language Models (LLMs) excel in Natural Language Processing (NLP) with human-like text generation, but the misuse of them has raised public concern and prompted the need for safety measures. Proactive testing with jailbreaks, meticulously crafted prompts that bypass model constraints and policies, has become mainstream to ensure security and reliability upon model release. While researchers have made substantial efforts to explore jailbreaks against LLMs, existing methods still face the following disadvantages: (1) require human labor and expertise to design question prompts; (2) non-determination regarding reproducing jailbreak; (3) exhibit limited effectiveness on updated model versions and lack the ability for iterative reuse when invalid.\nTo address these challenges, we introduce Quack, an automated testing framework based on role-playing of LLMs. Quack translates testing guidelines into question prompts, instead of human expertise and labor. It systematically analyzes and consolidates successful jailbreaks into a paradigm featuring eight distinct characteristics. Based on it, we reconstruct and maintain existing jailbreaks through knowledge graphs, which serve as Quack's repository of playing scenarios. It assigns four distinct roles to LLMs, for automatically organizing, evaluating, and further updating jailbreaks. We empirically demonstrate the effectiveness of our method on three state-of-the-art open-sourced LLMs (Vicuna-13B, LongChat-7B, and LLaMa-7B), as well as one widely-used commercial LLM (ChatGPT). Our work addresses the pressing need for LLM security and contributes valuable insights for creating safer LLM-empowered applications.", "keywords": "Large Language Models;Jailbreak;Testing", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/d424cb020078cca423b6b77165b89290d0da9e65.pdf", "author": "Haibo Jin;Ruoxi Chen;Jinyin Chen;Haohan Wang", "authorids": "~Haibo_Jin2;~Ruoxi_Chen1;~Jinyin_Chen1;~Haohan_Wang1", "gender": "M;;F;M", "homepage": ";;;http://cs.cmu.edu/~haohanw", "dblp": ";;50/415.html;132/4066", "google_scholar": "https://scholar.google.com.hk/citations?user=tj0eV-sAAAAJ;;;nZxJGeUAAAAJ", "orcid": ";;0000-0002-7153-2755;", "linkedin": ";;;haohanwang/", "or_profile": "~Haibo_Jin2;~Ruoxi_Chen1;~Jinyin_Chen1;~Haohan_Wang1", "aff": "Zhejiang University of Technology;;Zhejiang University of Technology;University of Illinois, Urbana Champaign", "aff_domain": "zjut.edu.cn;;zjut.edu.cn;illinois.edu", "position": "PhD student;;Full Professor;Assistant Professor", "bibtex": "@misc{\njin2024quack,\ntitle={Quack: Automatic Jailbreaking Large Language Models via Role-playing},\nauthor={Haibo Jin and Ruoxi Chen and Jinyin Chen and Haohan Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=1zt8GWZ9sc}\n}", "github": "", "project": "", "reviewers": "REtv;TsFJ;aN17", "site": "https://openreview.net/forum?id=1zt8GWZ9sc", "pdf_size": 1099076, "rating": "3;3;5", "confidence": "4;4;3", "soundness": "2;1;2", "contribution": "3;2;3", "presentation": "3;1;3", "wc_summary": "100;132;136", "wc_strengths": "57;88;57", "wc_weaknesses": "180;1140;368", "wc_questions": "215;25;164", "wc_review": "552;1385;725", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 122.66666666666667, 16.110727964792762 ], "wc_strengths_avg": [ 67.33333333333333, 14.613540144521982 ], "wc_weaknesses_avg": [ 562.6666666666666, 415.38843936194894 ], "wc_questions_avg": [ 134.66666666666666, 80.2925207531118 ], "wc_review_avg": [ 887.3333333333334, 358.9209136038498 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6587544533084629160&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Zhejiang University of Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.zjut.edu.cn;https://illinois.edu", "aff_unique_abbr": "ZJUT;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "id": "20KYsQ8Q4Z", "title": "High-dimensional Bayesian Optimization with Group Testing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Bayesian optimization is an effective method for optimizing expensive-to-evaluate black-box functions. \nHigh-dimensional problems are particularly challenging as the surrogate model of the objective suffers from the curse of dimensionality, which makes accurate modeling difficult. \nWe propose a group testing approach to identify active variables to facilitate efficient optimization in these domains. \nThe proposed algorithm, Group Testing Bayesian Optimization (GTBO), first runs a testing phase where groups of variables are systematically selected and tested on whether they influence the objective. \nTo that end, we extend the well-established theory of group testing to functions of continuous ranges.\nIn the second phase, GTBO guides optimization by placing more importance on the active dimensions.\nBy exploiting the axis-aligned subspace assumption, GTBO is competitive against state-of-the-art methods on several synthetic and real-world high-dimensional optimization tasks. \nFurthermore, GTBO aids in the discovery of active parameters in applications, thereby enhancing practitioners' understanding of the problem at hand.", "keywords": "Bayesian optimization;Gaussian process;group testing;high-dimensional", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Erik Orm Hellsten;Carl Hvarfner;Leonard Papenmeier;Luigi Nardi", "authorids": "~Erik_Orm_Hellsten1;~Carl_Hvarfner1;~Leonard_Papenmeier1;~Luigi_Nardi1", "gender": "M;M;M;M", "homepage": ";https://portal.research.lu.se/portal/sv/persons/carl-hvarfner(cd140b82-9fed-4e88-868e-1cf569dcbeb7).html;https://portal.research.lu.se/en/persons/leonard-papenmeier;", "dblp": ";319/3033;;60/7206", "google_scholar": "https://scholar.google.se/citations?user=mK5N-xQAAAAJ;https://scholar.google.se/citations?hl=en;85BUIRcAAAAJ;https://scholar.google.it/citations?user=Kgs3zQoAAAAJ", "orcid": ";;0000-0001-9338-1567;0000-0002-4601-2264", "linkedin": ";carl-hvarfner-a97421153/;leonard-papenmeier-a90a60135;nardiluigi/", "or_profile": "~Erik_Orm_Hellsten1;~Carl_Hvarfner1;~Leonard_Papenmeier1;~Luigi_Nardi1", "aff": "Lund University;Lund University;Lund University;Stanford University", "aff_domain": "lu.se;lu.se;lu.se;stanford.edu", "position": "Postdoc;PhD student;PhD student;Researcher", "bibtex": "@misc{\nhellsten2024highdimensional,\ntitle={High-dimensional Bayesian Optimization with Group Testing},\nauthor={Erik Orm Hellsten and Carl Hvarfner and Leonard Papenmeier and Luigi Nardi},\nyear={2024},\nurl={https://openreview.net/forum?id=20KYsQ8Q4Z}\n}", "github": "", "project": "", "reviewers": "fy4F;yfZG;JQFf;9ChC", "site": "https://openreview.net/forum?id=20KYsQ8Q4Z", "pdf_size": 2957796, "rating": "5;5;6;6", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "contribution": "2;3;2;3", "presentation": "3;3;3;2", "wc_summary": "59;196;151;150", "wc_strengths": "25;35;59;115", "wc_weaknesses": "82;142;56;174", "wc_questions": "118;52;210;32", "wc_review": "284;425;476;471", "wc_reply_reviewers": "284;0;0;0", "wc_reply_authors": "955;651;394;756", "reply_reviewers": "2;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 139.0, 49.78453575157651 ], "wc_strengths_avg": [ 58.5, 34.88194375317981 ], "wc_weaknesses_avg": [ 113.5, 46.82680856090878 ], "wc_questions_avg": [ 103.0, 69.49100661236676 ], "wc_review_avg": [ 414.0, 77.64341568993471 ], "wc_reply_reviewers_avg": [ 71.0, 122.97560733739029 ], "wc_reply_authors_avg": [ 689.0, 202.30793360617375 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10031675274478304412&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Lund University;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.lunduniversity.lu.se;https://www.stanford.edu", "aff_unique_abbr": "LU;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Sweden;United States" }, { "id": "20L7txbIa8", "title": "UniPredict: Large Language Models are Universal Tabular Predictors", "track": "main", "status": "Reject", "tldr": "", "abstract": "Tabular data prediction is a fundamental machine learning task for many applications. Existing methods predominantly employ discriminative modeling and operate under the assumption of a fixed target column, necessitating re-training for every new predictive task. Inspired by the generative power of large language models (LLMs), this paper exploits the idea of building universal tabular data predictors based on generative modeling, namely UniPredict. Here, we show that scaling up an LLM to extensive tabular datasets with the capability of comprehending diverse tabular inputs and predicting for target variables following the input instructions. Specifically, we train a single LLM on an aggregation of 169 tabular datasets with diverse targets and compare its performance against baselines that are trained on each dataset separately. We observe this versatile UniPredict model demonstrates an advantage over other models, ranging from 5.4% to 13.4%, when compared with the best tree-boosting baseline and the best neural network baseline, respectively. We further test UniPredict in few-shot learning settings on another 62 tabular datasets. Our method achieves strong performance in quickly adapting to new tasks, where our method outperforms XGBoost over 100\\% on the low-resource setup and shows a significant margin over all baselines. We envision that UniPredict sheds light on developing a universal tabular data prediction system that learns from data at scale and serves a wide range of prediction tasks.", "keywords": "tabular prediction;large language model;AI for healthcare", "primary_area": "generative models", "supplementary_material": "/attachment/19ca4f07dc73c9fd88c32e596081afe059ceb7b4.zip", "author": "Ruiyu Wang;Zifeng Wang;Jimeng Sun", "authorids": "~Ruiyu_Wang2;~Zifeng_Wang3;~Jimeng_Sun3", "gender": "M;M;", "homepage": "https://www.cs.toronto.edu/~rwang;https://zifengwang.xyz;http://sunlab.org", "dblp": ";;", "google_scholar": "9ucHLzcAAAAJ;kMlWwTAAAAAJ;9jmmp5sAAAAJ", "orcid": "0009-0003-3541-238X;;0000-0003-1512-6426", "linkedin": "rui-yu-wang/;;jimengsun/", "or_profile": "~Ruiyu_Wang2;~Zifeng_Wang3;~Jimeng_Sun3", "aff": "University of Toronto;University of Illinois, Urbana Champaign;Georgia Institute of Technology", "aff_domain": "cs.toronto.edu;illinois.edu;gatech.edu", "position": "Undergrad student;PhD student;Associate Professor", "bibtex": "@misc{\nwang2024unipredict,\ntitle={UniPredict: Large Language Models are Universal Tabular Predictors},\nauthor={Ruiyu Wang and Zifeng Wang and Jimeng Sun},\nyear={2024},\nurl={https://openreview.net/forum?id=20L7txbIa8}\n}", "github": "", "project": "", "reviewers": "HUKy;Mbbo;qPUm;brjf;p7B3", "site": "https://openreview.net/forum?id=20L7txbIa8", "pdf_size": 1527791, "rating": "3;5;5;5;8", "confidence": "3;3;5;5;3", "soundness": "2;2;2;3;4", "contribution": "2;2;2;2;3", "presentation": "2;3;3;2;3", "wc_summary": "217;92;229;60;53", "wc_strengths": "97;92;139;87;31", "wc_weaknesses": "286;135;316;193;87", "wc_questions": "381;4;6;91;102", "wc_review": "981;323;690;431;273", "wc_reply_reviewers": "0;0;284;0;0", "wc_reply_authors": "686;511;731;369;329", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.2, 1.6 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.8 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 130.2, 76.99714280413268 ], "wc_strengths_avg": [ 89.2, 34.46969683649684 ], "wc_weaknesses_avg": [ 203.4, 86.99103402075411 ], "wc_questions_avg": [ 116.8, 138.3378473159099 ], "wc_review_avg": [ 539.6, 263.52958088229866 ], "wc_reply_reviewers_avg": [ 56.8, 113.6 ], "wc_reply_authors_avg": [ 525.2, 162.05233722473736 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.10206207261596574, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Toronto;University of Illinois Urbana-Champaign;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utoronto.ca;https://illinois.edu;https://www.gatech.edu", "aff_unique_abbr": "U of T;UIUC;Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United States" }, { "id": "20oxNYWQl9", "title": "Sensitivity Sampling for Coreset-Based Data Selection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Given the sustained growth in both training data and model \nparameters, the problem of finding the most useful training data \nhas become of primary importance for training state-of-the-art and\nnext generation models. \n\nWe work in the context of active learning and consider the problem \nof finding the best representative subset of a dataset to \ntrain a machine learning model. Assuming embedding representation of\nthe data (coming for example from either a pre-trained model or a \ngeneric all-purpose embedding) and that the model loss is Lipshitz\nwith respect to these embedding, we provide a new active learning\napproach based on k-means clustering and sensitivity sampling.\n\nWe prove that our new approach allows to select a set of ``typical'' \n$k$ \nelements whose average loss corresponds to the average loss of the \nwhole dataset, up to a multiplicative $(1\\pm\\epsilon)$ factor and an additive $\\epsilon \\lambda \\Phi_k$, where $\\Phi_k$ represents the $k$-means cost for the input data and $\\lambda$ is the Lipshitz constant. \nOur approach is particularly efficient since it only\nrequires very few inferences from the model ($O(k + 1/\\epsilon^2)$).\nWe furthermore demonstrate the performance of our approach on classic\ndatasets and show that it outperforms state-of-the-art methods.", "keywords": "clustering;data selection;coreset", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Kyriakos Axiotis;Vincent Cohen-Addad;Monika Henzinger;Vahab Mirrokni;David Saulpic;David Woodruff", "authorids": "~Kyriakos_Axiotis1;~Vincent_Cohen-Addad1;~Monika_Henzinger1;~Vahab_Mirrokni2;~David_Saulpic1;~David_Woodruff1", "gender": ";;;M;;M", "homepage": ";;;https://people.csail.mit.edu/mirrokni/Welcome.html;http://www.normalesup.org/~saulpic/;http://www.cs.cmu.edu/~dwoodruf/", "dblp": "176/5139;136/5814;;m/VahabSMirrokni;https://dblp.uni-trier.de/pers/hd/s/Saulpic:David;w/DPWoodruff", "google_scholar": "Xhv2tkcAAAAJ;;NXbggxYAAAAJ;opbZfw0AAAAJ;;https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ", "orcid": ";;;;0000-0003-4208-8541;", "linkedin": ";;;;;", "or_profile": "~Kyriakos_Axiotis1;~Vincent_Cohen-Addad1;~Monika_Henzinger1;~Vahab_Mirrokni2;~David_Saulpic1;~David_Woodruff1", "aff": "Google;Google;Institute of Science and Technology;Google Research;Institute of Science and Technology;Carnegie Mellon University", "aff_domain": "google.com;google.com;ist.ac.at;google.com;ist.ac.at;cmu.edu", "position": "Researcher;Researcher;Full Professor;VP, Google Fellow;Postdoc;Full Professor", "bibtex": "@misc{\naxiotis2024sensitivity,\ntitle={Sensitivity Sampling for Coreset-Based Data Selection},\nauthor={Kyriakos Axiotis and Vincent Cohen-Addad and Monika Henzinger and Vahab Mirrokni and David Saulpic and David Woodruff},\nyear={2024},\nurl={https://openreview.net/forum?id=20oxNYWQl9}\n}", "github": "", "project": "", "reviewers": "nKWs;eXnh;mfTM;MWNf", "site": "https://openreview.net/forum?id=20oxNYWQl9", "pdf_size": 453594, "rating": "5;6;6;6", "confidence": "4;2;3;3", "soundness": "3;3;3;2", "contribution": "2;2;3;2", "presentation": "2;2;2;1", "wc_summary": "117;64;15;159", "wc_strengths": "42;30;109;61", "wc_weaknesses": "250;305;65;5", "wc_questions": "9;69;294;264", "wc_review": "418;468;483;489", "wc_reply_reviewers": "30;23;0;38", "wc_reply_authors": "501;466;430;395", "reply_reviewers": "1;1;0;1", "reply_authors": "7;7;8;5", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 54.2787942017875 ], "wc_strengths_avg": [ 60.5, 30.103986446980738 ], "wc_weaknesses_avg": [ 156.25, 124.61816681367127 ], "wc_questions_avg": [ 159.0, 122.32129822725068 ], "wc_review_avg": [ 464.5, 27.91504970441572 ], "wc_reply_reviewers_avg": [ 22.75, 14.16642156650719 ], "wc_reply_authors_avg": [ 448.0, 39.5790348543266 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 6.75, 1.0897247358851685 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wD4GdDwwQpMJ:scholar.google.com/&scioq=Sensitivity+Sampling+for+Coreset-Based+Data+Selection&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;1;2", "aff_unique_norm": "Google;Institute of Science and Technology;Carnegie Mellon University", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;;https://www.cmu.edu", "aff_unique_abbr": "Google;;CMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "228XQpErvW", "title": "Automatic Fine-Tuned Offline-to-Online Reinforcement Learning via Increased Simple Moving Average Q-value", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline-to-online reinforcement learning starts with pre-trained offline models and continuously learns via\n interacting with the environment in online mode. The challenge of it is to adapt to distribution drift while \n maintaining the quality of the learned policy simultaneously. \n We propose a novel policy regularization method that aims to automatically fine-tune the model by \n selectively increasing the average estimated Q-value in the sampled batches. As a result, our models maintain the\n performance of the pre-trained model and improve it, unlike methods that require learning from scratch. \n Furthermore, we added efficient $\\mathcal{O}(1)$ complexity replay buffer techniques to adapt to distribution\n drift efficiently. Our experimental results indicate that the proposed method outperforms state-of-the-art methods \n on the D4RL benchmark.", "keywords": "Reinforcement Learning;Machine Learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Hsin-Yu Liu;Bharathan Balaji;Rajesh K. Gupta;Dezhi Hong", "authorids": "~Hsin-Yu_Liu1;~Bharathan_Balaji1;~Rajesh_K._Gupta1;~Dezhi_Hong1", "gender": "M;M;;M", "homepage": "https://www.synergylabs.org/bharath/;http://mesl.ucsd.edu/gupta/;https://cseweb.ucsd.edu/~dehong/;https://hydesmondliu.github.io/", "dblp": "82/8968;213/9138-1.html;60/11186.html;10/6919", "google_scholar": "F0JrXQIAAAAJ;I1w51gUAAAAJ;NsPO1GUAAAAJ;Cv17_VEAAAAJ", "orcid": "0000-0002-9490-2018;0000-0002-6489-7633;;0000-0002-9316-2150", "linkedin": "bharathanbalaji/;rajeshgupta4/;;desmondliu", "or_profile": "~Bharathan_Balaji1;~Rajesh_K._Gupta1;~Dezhi_Hong1;~Hsin_Yu_Liu1", "aff": "Amazon;University of California, San Diego;Amazon;University of California, San Diego", "aff_domain": "amazon.com;ucsd.edu;amazon.com;ucsd.edu", "position": "Researcher;Full Professor;Researcher;PhD student", "bibtex": "@misc{\nliu2024automatic,\ntitle={Automatic Fine-Tuned Offline-to-Online Reinforcement Learning via Increased Simple Moving Average Q-value},\nauthor={Hsin-Yu Liu and Bharathan Balaji and Rajesh K. Gupta and Dezhi Hong},\nyear={2024},\nurl={https://openreview.net/forum?id=228XQpErvW}\n}", "github": "", "project": "", "reviewers": "AHJ2;74i3;g2Zo;Thik", "site": "https://openreview.net/forum?id=228XQpErvW", "pdf_size": 1996677, "rating": "3;3;6;6", "confidence": "5;4;4;4", "soundness": "1;2;3;3", "contribution": "1;2;3;3", "presentation": "2;1;3;3", "wc_summary": "44;75;38;97", "wc_strengths": "35;46;24;189", "wc_weaknesses": "334;398;15;365", "wc_questions": "62;64;571;116", "wc_review": "475;583;648;767", "wc_reply_reviewers": "0;143;267;0", "wc_reply_authors": "117;400;401;448", "reply_reviewers": "0;1;3;0", "reply_authors": "1;2;4;1", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 63.5, 23.90083680543424 ], "wc_strengths_avg": [ 73.5, 67.13605588653536 ], "wc_weaknesses_avg": [ 278.0, 153.52035695633333 ], "wc_questions_avg": [ 203.25, 213.42138482354574 ], "wc_review_avg": [ 618.25, 105.80022447991308 ], "wc_reply_reviewers_avg": [ 102.5, 111.48206133723936 ], "wc_reply_authors_avg": [ 341.5, 131.0581931815024 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:r-iom0jm4S0J:scholar.google.com/&scioq=Automatic+Fine-Tuned+Offline-to-Online+Reinforcement+Learning+via+Increased+Simple+Moving+Average+Q-value&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Amazon;University of California, San Diego", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.ucsd.edu", "aff_unique_abbr": "Amazon;UCSD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "RA-DIT: Retrieval-Augmented Dual Instruction Tuning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19562", "id": "22OTbutug9", "author_site": "Victoria Lin, Xilun Chen, Mingda Chen, Weijia Shi, Maria Lomeli, Richard James, Pedro Rodriguez, Jacob Kahn, Gergely Szilvasy, Mike Lewis, Luke Zettlemoyer, Scott Yih", "tldr": "", "abstract": "Retrieval-augmented language models (RALMs) improve performance by accessing long-tail and up-to-date knowledge from external data stores, but are challenging to build. Existing approaches require either expensive retrieval-specific modifications to LM pre-training or use post-hoc integration of the data store that leads to suboptimal performance. We introduce Retrieval-Augmented Dual Instruction Tuning (RA-DIT), a lightweight fine-tuning methodology that provides a third option by retrofitting any LLM with retrieval capabilities. Our approach operates in two distinct fine-tuning steps: (1) one updates a pre-trained LM to better use retrieved information, while (2) the other updates the retriever to return more relevant results, as preferred by the LM. By fine-tuning over tasks that require both knowledge utilization and contextual awareness, we demonstrate that each stage yields significant performance improvements, and using both leads to additional gains. Our best model, RA-DIT 65B, achieves state-of-the-art performance across a range of knowledge-intensive zero- and few-shot learning benchmarks, significantly outperforming existing in-context RALM approaches by up to +8.9% in 0-shot setting and +1.4% in 5-shot setting on average.", "keywords": "retrieval-augmented language model;large language model;knowledge intensive NLP", "primary_area": "generative models", "supplementary_material": "/attachment/b6da7054aeda7183488cdd1e45f8a3c869210ddc.pdf", "author": "Xi Victoria Lin;Xilun Chen;Mingda Chen;Weijia Shi;Maria Lomeli;Richard James;Pedro Rodriguez;Jacob Kahn;Gergely Szilvasy;Mike Lewis;Luke Zettlemoyer;Wen-tau Yih", "authorids": "~Xi_Victoria_Lin1;~Xilun_Chen1;~Mingda_Chen1;~Weijia_Shi1;~Maria_Lomeli2;~Richard_James2;~Pedro_Rodriguez1;~Jacob_Kahn1;gsz@meta.com;~Mike_Lewis1;~Luke_Zettlemoyer1;~Wen-tau_Yih1", "gender": "F;;M;;F;M;M;M;;M;M;M", "homepage": "http://victorialin.net;https://xilunchen.com;https://mingdachen.github.io/;https://weijiashi.notion.site/;https://mlomeli1.github.io;http://www.richjames.ai;https://www.pedro.ai;https://jacobkahn.me/;;;https://www.cs.washington.edu/people/faculty/lsz/;http://scottyih.org", "dblp": "215/5264;96/10207-2.html;220/2003;132/80601;132/9008;;96/4035;232/2341;;19/6214;21/6793;07/7129", "google_scholar": "gYUOJwMAAAAJ;eUk_hy8AAAAJ;aRncxakAAAAJ;https://scholar.google.com/citations?hl=en;8SK2fPAAAAAJ;;JjpA4qwAAAAJ;_-pugt8AAAAJ;;SnQnQicAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;8rDNIMsAAAAJ", "orcid": ";;;0000-3200-0000-0011;;;;0000-0003-2911-2500;;;;0000-0003-4263-395X", "linkedin": "xivictorialin/;;;weijia-shi-773768112;;duttonrichard/;pedrorodriguezscience/;jacobdavidkahn/;;;luke-zettlemoyer-a0109b226/;scottyih/", "or_profile": "~Xi_Victoria_Lin1;~Xilun_Chen1;~Mingda_Chen1;~Weijia_Shi1;~Maria_Lomeli2;~Richard_James2;~Pedro_Rodriguez1;~Jacob_Kahn1;gsz@meta.com;~Mike_Lewis1;~Luke_Zettlemoyer1;~Wen-tau_Yih1", "aff": "Meta;Meta FAIR;Meta FAIR;University of Washington, Seattle;Meta;Research, Facebook;Meta FAIR;Meta AI;;Facebook AI Research;Meta;Meta Platforms, Inc.", "aff_domain": "fb.com;meta.com;fb.com;uw.edu;meta.com;research.facebook.com;fb.com;meta.com;;fb.com;meta.com;meta.com", "position": "Research Scientist;Research Scientist;Researcher;PhD student;Researcher;Researcher;Research Scientist;Research Engineer;;Research Scientist;Researcher;Research Scientist", "bibtex": "@inproceedings{\nlin2024radit,\ntitle={{RA}-{DIT}: Retrieval-Augmented Dual Instruction Tuning},\nauthor={Xi Victoria Lin and Xilun Chen and Mingda Chen and Weijia Shi and Maria Lomeli and Richard James and Pedro Rodriguez and Jacob Kahn and Gergely Szilvasy and Mike Lewis and Luke Zettlemoyer and Wen-tau Yih},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=22OTbutug9}\n}", "github": "", "project": "", "reviewers": "i49b;HttW;5vF7;68qv", "pdf_size": 2186728, "rating": "5;6;6;8", "confidence": "3;3;4;4", "soundness": "2;3;2;4", "contribution": "2;3;3;4", "presentation": "2;4;2;3", "wc_summary": "99;50;13;87", "wc_strengths": "90;26;16;52", "wc_weaknesses": "142;94;38;95", "wc_questions": "34;3;229;72", "wc_review": "365;173;296;306", "wc_reply_reviewers": "21;0;0;63", "wc_reply_authors": "462;358;676;126", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 62.25, 33.6851228289285 ], "wc_strengths_avg": [ 46.0, 28.600699292150182 ], "wc_weaknesses_avg": [ 92.25, 36.84002578717882 ], "wc_questions_avg": [ 84.5, 86.93244503636143 ], "wc_review_avg": [ 285.0, 69.831941115796 ], "wc_reply_reviewers_avg": [ 21.0, 25.71964229922337 ], "wc_reply_authors_avg": [ 405.5, 197.95138292015037 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 142, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6775356328711894537&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=22OTbutug9", "pdf": "https://openreview.net/pdf?id=22OTbutug9", "email": "fb.com;meta.com;fb.com;uw.edu;meta.com;research.facebook.com;fb.com;meta.com;;fb.com;meta.com;meta.com", "author_num": 12, "aff_unique_index": "0;0;0;1;0;0;0;0;0;0;0", "aff_unique_norm": "Meta;University of Washington", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.washington.edu", "aff_unique_abbr": "Meta;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "PromptAgent: Strategic Planning with Language Models Enables Expert-level Prompt Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19561", "id": "22pyNMuIoa", "author_site": "Xinyuan Wang, Chenxi Li, Zhen Wang, Fan Bai, Haotian Luo, Jiayou Zhang, Nebojsa Jojic, Eric Xing, Zhiting Hu", "tldr": "", "abstract": "Expert-level prompts, carefully engineered by human experts who have a deep understanding of both large language models (LLMs) and domain knowledge, are the future of prompting and pivotal to harnessing the full power of advanced LLMs. Discovering such prompts with an automated process remains a sought-after and unresolved challenge. Existing prompt optimization techniques, though automated through iterative sampling, often fall short in injecting domain knowledge and exploring the vast prompt space for complex expert-level prompts efficiently. To address this pressing need and achieve expert-level prompting, we introduce PromptAgent, which autonomously discovers prompts equivalent in quality to those handcrafted by experts. At its core, PromptAgent views prompt optimization as a strategic planning problem and employs a principled planning algorithm (rooted in Monte Carlo Tree Search) to strategically explore the vast expert-level prompt space. PromptAgent interacts with the LLM in a human-like trial-and-error manner during the planning, and injects expert-level knowledge by reflecting on model errors and generating insightful error feedback. This novel formulation allows it to iteratively evaluate intermediate prompts, refine them based on errors, simulate future rewards, and search for high-reward paths leading to expert-level prompts. We apply PromptAgent to 12 tasks spanning three practical domains: BIG-Bench Hard (BBH), domain-expert, and general NLU tasks, showing PromptAgent consistently outperforms strong prompting and prompt optimization baselines by great margins. Our qualitative analysis further emphasizes PromptAgent's capability to distill insightful errors into expert-level prompts.", "keywords": "Large Language Models;Expert-level Prompt Optimization;Strategic Planning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Xinyuan Wang;Chenxi Li;Zhen Wang;Fan Bai;Haotian Luo;Jiayou Zhang;Nebojsa Jojic;Eric Xing;Zhiting Hu", "authorids": "xiw136@ucsd.edu;chl078@ucsd.edu;~Zhen_Wang6;~Fan_Bai5;1203616626@sjtu.edu.cn;~Jiayou_Zhang1;~Nebojsa_Jojic1;~Eric_Xing1;~Zhiting_Hu3", "gender": ";;M;M;;M;;M;M", "homepage": ";;https://zhenwang9102.github.io;https://bflashcp3f.github.io/;;https://jiayouzhang.github.io/;www.research.microsoft.com/~jojic;http://www.cs.cmu.edu/~epxing/;http://zhiting.ucsd.edu", "dblp": ";;78/6727;84/4809-6;;157/3933;20/1944;36/3855;134/4031", "google_scholar": ";;asBaytUAAAAJ;CLc9XQIAAAAJ;;;;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;N7_xhHoAAAAJ", "orcid": ";;0000-0001-7407-5118;;;;;;", "linkedin": ";;zhenwang9102/;;;jiayou-zhang-403072245/;;;", "or_profile": "xiw136@ucsd.edu;chl078@ucsd.edu;~Zhen_Wang6;~Fan_Bai5;1203616626@sjtu.edu.cn;~Jiayou_Zhang1;~Nebojsa_Jojic1;~Eric_Xing1;~Zhiting_Hu3", "aff": ";;University of California, San Diego;Johns Hopkins University;;Mohamed bin Zayed University of Artificial Intelligence;Microsoft Research;School of Computer Science, Carnegie Mellon University;Amazon", "aff_domain": ";;ucsd.edu;jh.edu;;mbzuai.ac.ae; ;cs.cmu.edu;amazon.com", "position": ";;Postdoc;Postdoc;;PhD student;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nwang2024promptagent,\ntitle={PromptAgent: Strategic Planning with Language Models Enables Expert-level Prompt Optimization},\nauthor={Xinyuan Wang and Chenxi Li and Zhen Wang and Fan Bai and Haotian Luo and Jiayou Zhang and Nebojsa Jojic and Eric Xing and Zhiting Hu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=22pyNMuIoa}\n}", "github": "", "project": "", "reviewers": "zmiy;j86n;gPc5;WJJm", "pdf_size": 1719960, "rating": "3;6;6;8", "confidence": "4;4;2;4", "soundness": "2;3;2;4", "contribution": "1;2;3;3", "presentation": "3;3;3;3", "wc_summary": "75;164;75;41", "wc_strengths": "27;75;79;17", "wc_weaknesses": "49;290;195;18", "wc_questions": "75;56;113;131", "wc_review": "226;585;462;207", "wc_reply_reviewers": "0;0;275;12", "wc_reply_authors": "1516;1893;3214;522", "reply_reviewers": "0;0;1;1", "reply_authors": "3;3;6;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.75, 45.60907256237513 ], "wc_strengths_avg": [ 49.5, 27.76238462380348 ], "wc_weaknesses_avg": [ 138.0, 110.31092420970826 ], "wc_questions_avg": [ 93.75, 29.72688177390962 ], "wc_review_avg": [ 370.0, 159.6824974754591 ], "wc_reply_reviewers_avg": [ 71.75, 117.44865899617585 ], "wc_reply_authors_avg": [ 1786.25, 964.5243322488034 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.08084520834544431, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11923415573257859854&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=22pyNMuIoa", "pdf": "https://openreview.net/pdf?id=22pyNMuIoa", "email": ";;ucsd.edu;jh.edu;;mbzuai.ac.ae; ;cs.cmu.edu;amazon.com", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "University of California, San Diego;Johns Hopkins University;Mohamed bin Zayed University of Artificial Intelligence;Microsoft;Carnegie Mellon University;Amazon", "aff_unique_dep": ";;;Microsoft Research;School of Computer Science;Amazon.com, Inc.", "aff_unique_url": "https://www.ucsd.edu;https://www.jhu.edu;https://mbzuai.ac.ae;https://www.microsoft.com/en-us/research;https://www.cmu.edu;https://www.amazon.com", "aff_unique_abbr": "UCSD;JHU;MBZUAI;MSR;CMU;Amazon", "aff_campus_unique_index": "0;2", "aff_campus_unique": "San Diego;;Pittsburgh", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "22to0JZ4zh", "title": "Symmetrized Schr\u00f6dinger Bridge Matching", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Schr\u00f6dinger bridge (SB) has demonstrated numerous applications in probabilistic generative modeling. Finding the solution of probability paths aligns with entropy-regularized optimal transport that employs the Sinkhorn algorithm, which is characterized by performing iterative proportional fitting between marginal densities. This paper argues that the standard training of the SB is prone to exaggerate the amount of learning due to its inherent geometric nature. We leverage a symmetrized variant of Sinkhorn to study more lenient convergence of Schr\u00f6dinger potentials and prove distinctive theoretical properties of the symmetrization such as linear convergence and monotonic improvements. To this end, we propose a dynamic SB algorithm named Symmetrized Schr\u00f6dinger Bridge Matching (SSBM). Inspired by score and flow matching models, the concurrent projection scheme of SSBM is conceptualized as matching forward and backward drifts concurrently, constructing a time-symmetric learning objective for the SB model. We empirically validate our SB method by solving classical optimal transportation and model-based stochastic optimal control problems with physical dynamics.", "keywords": "schr\u00f6dinger bridge;probabilistic generative method;stochastic optimal control", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Dong-Sig Han;Jaein Kim;HEE BIN YOO;Byoung-Tak Zhang", "authorids": "~Dong-Sig_Han2;~Jaein_Kim2;~HEE_BIN_YOO1;~Byoung-Tak_Zhang1", "gender": "M;M;M;M", "homepage": ";https://github.com/heebinYoo;https://bi.snu.ac.kr/~btzhang/;https://dshan4585.github.io", "dblp": "27/9295-4.html;;09/5682;218/7109", "google_scholar": "https://scholar.google.co.kr/citations?user=6xSSU-8AAAAJ;;sYTUOu8AAAAJ;h1hMIKcAAAAJ", "orcid": "0000-0001-7148-4346;;;", "linkedin": ";;;", "or_profile": "~Jaein_Kim2;~HEE_BIN_YOO1;~Byoung-Tak_Zhang1;~Dong-Sig_Han_Han1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Full Professor;PhD student", "bibtex": "@misc{\nhan2024symmetrized,\ntitle={Symmetrized Schr\\\"odinger Bridge Matching},\nauthor={Dong-Sig Han and Jaein Kim and HEE BIN YOO and Byoung-Tak Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=22to0JZ4zh}\n}", "github": "", "project": "", "reviewers": "pujg;8RJL;Ksvt;EfaF", "site": "https://openreview.net/forum?id=22to0JZ4zh", "pdf_size": 719917, "rating": "3;3;3;6", "confidence": "4;4;4;4", "soundness": "3;2;1;3", "contribution": "3;2;2;2", "presentation": "4;1;1;3", "wc_summary": "287;200;53;32", "wc_strengths": "53;78;49;59", "wc_weaknesses": "700;594;408;428", "wc_questions": "441;79;22;128", "wc_review": "1481;951;532;647", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 1.299038105676658 ], "wc_summary_avg": [ 143.0, 105.36365597301567 ], "wc_strengths_avg": [ 59.75, 11.121488209767612 ], "wc_weaknesses_avg": [ 532.5, 120.68450604779389 ], "wc_questions_avg": [ 167.5, 162.29987677136418 ], "wc_review_avg": [ 902.75, 367.2753565106159 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-R7CsRECyCQJ:scholar.google.com/&scioq=Symmetrized+Schr%C3%B6dinger+Bridge+Matching&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "23OEmHVkpq", "title": "Disentanglement Learning via Topology", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose TopDis (Topological Disentanglement), a method for learning disentangled representations via adding a multi-scale topological loss term. Disentanglement is a crucial property of data representations substantial for the explainability and robustness of deep learning models and a step towards high-level cognition. The state-of-the-art methods are based on VAE and encourage the joint distribution of latent variables to be factorized. We take a different perspective on disentanglement by analyzing topological properties of data manifolds. In particular, we optimize the topological similarity for data manifolds traversals. To the best of our knowledge, our paper is the first one to propose a differentiable topological loss for disentanglement learning. Our experiments have shown that the proposed TopDis loss improves disentanglement scores such as MIG, FactorVAE score, SAP score and DCI disentanglement score with respect to state-of-the-art results while preserving the reconstruction quality. Our method works in an unsupervised manner, permitting to apply it for problems without labeled factors of variation. The TopDis loss works even when factors of variation are correlated. Additionally, we show how to use the proposed topological loss to find disentangled directions in a trained GAN.", "keywords": "representation learning;variational autoencoders;disentangled representations;topological data analysis", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/3ca1407e87a500b1df6e1af36f18584899bf8f55.zip", "author": "Nikita Balabin;Daria Voronkova;Ilya Trofimov;Evgeny Burnaev;Serguei Barannikov", "authorids": "~Nikita_Balabin1;~Daria_Voronkova1;~Ilya_Trofimov1;~Evgeny_Burnaev1;~Serguei_Barannikov1", "gender": "M;;;M;", "homepage": ";;;http://faculty.skoltech.ru/people/evgenyburnaev;", "dblp": "310/1857;;130/0370;144/7845;255/5203", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.ru/citations?user=V1c6KjgAAAAJ;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;https://scholar.google.fr/citations?user=-soT8KcAAAAJ", "orcid": ";;0000-0002-2961-7368;0000-0001-8424-0690;0000-0002-9323-0651", "linkedin": "nikita-balabin-10455b17a/;;https://ru.linkedin.com/in/ilya-trofimov-ba122748;;", "or_profile": "~Nikita_Balabin1;~Daria_Voronkova1;~Ilya_Trofimov1;~Evgeny_Burnaev1;~Serguei_Barannikov1", "aff": "Skolkovo Institute of Science and Technology;;Skoltech;Skolkovo Institute of Science and Technology;CNRS, Institut Mathematiques de Jussieu, Paris Diderot University", "aff_domain": "skoltech.ru;;skoltech.ru;skoltech.ru;imj-prg.fr", "position": "PhD student;;Research scientist;Full Professor;Researcher", "bibtex": "@misc{\nbalabin2024disentanglement,\ntitle={Disentanglement Learning via Topology},\nauthor={Nikita Balabin and Daria Voronkova and Ilya Trofimov and Evgeny Burnaev and Serguei Barannikov},\nyear={2024},\nurl={https://openreview.net/forum?id=23OEmHVkpq}\n}", "github": "", "project": "", "reviewers": "T3Gf;2nX5;VGnt;1sM8", "site": "https://openreview.net/forum?id=23OEmHVkpq", "pdf_size": 21582958, "rating": "5;5;5;6", "confidence": "3;3;4;4", "soundness": "3;3;2;3", "contribution": "2;3;2;2", "presentation": "2;2;2;3", "wc_summary": "56;57;41;83", "wc_strengths": "57;58;63;133", "wc_weaknesses": "145;142;90;363", "wc_questions": "2;56;51;2", "wc_review": "260;313;245;581", "wc_reply_reviewers": "125;0;0;0", "wc_reply_authors": "1095;1133;952;800", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 59.25, 15.105876340020794 ], "wc_strengths_avg": [ 77.75, 31.979485611873123 ], "wc_weaknesses_avg": [ 185.0, 105.0690249312327 ], "wc_questions_avg": [ 27.75, 25.810608284191986 ], "wc_review_avg": [ 349.75, 135.88115211463287 ], "wc_reply_reviewers_avg": [ 31.25, 54.12658773652741 ], "wc_reply_authors_avg": [ 995.0, 131.26118999917682 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15099455684142021084&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Paris Diderot University", "aff_unique_dep": ";Institut Mathematiques de Jussieu", "aff_unique_url": "https://www.skoltech.ru;https://www.univ-paris-diderot.fr", "aff_unique_abbr": "Skoltech;Paris Diderot", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Russian Federation;France" }, { "title": "RETSim: Resilient and Efficient Text Similarity", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19560", "id": "23b9KSNQTX", "author_site": "Marina Zhang, Owen Vallis, Aysegul Bumin, Tanay Vakharia, Elie Bursztein", "tldr": "", "abstract": "This paper introduces RETSim (Resilient and Efficient Text Similarity), a lightweight, multilingual deep learning model trained to produce robust metric embeddings for near-duplicate text retrieval, clustering, and dataset deduplication tasks. We demonstrate that RETSim is significantly more robust and accurate than MinHash and neural text embeddings, achieving new state-of-the-art performance on dataset deduplication, adversarial text retrieval benchmarks, and spam clustering tasks. Additionally, we introduce the W4NT3D benchmark (Wiki-40B 4dversarial Near-T3xt Dataset), enabling the evaluation of models on typo-laden near-duplicate text retrieval in a multilingual setting. RETSim and the W4NT3D benchmark are released under the MIT License at https://github.com/google/unisim.", "keywords": "text similarity;text embedding;metric learning;near-duplicate detection;dataset deduplication", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/7e56b306b8c8975180840afc4567947bdb7faaf7.zip", "author": "Marina Zhang;Owen Skipper Vallis;Aysegul Bumin;Tanay Vakharia;Elie Bursztein", "authorids": "~Marina_Zhang1;~Owen_Skipper_Vallis1;~Aysegul_Bumin1;~Tanay_Vakharia1;~Elie_Bursztein1", "gender": "F;;F;;", "homepage": "https://marinazhang.github.io/;https://www.owenvallis.com;;;https://elie.net", "dblp": "340/6965;28/4883;308/4098;;20/7004", "google_scholar": "V9ehnpwAAAAJ;https://scholar.google.com/citations?hl=en;;;Ayw4GsAAAAAJ", "orcid": ";;;;", "linkedin": "marina-zhang;;aysegulbumin;tanay-vakharia-450151130;bursztein", "or_profile": "~Marina_Zhang1;~Owen_Skipper_Vallis1;~Aysegul_Bumin1;~Tanay_Vakharia1;~Elie_Bursztein1", "aff": "Google;Google;;;Google", "aff_domain": "google.com;google.com;;;google.com", "position": "Researcher;Researcher;;;Researcher", "bibtex": "@inproceedings{\nzhang2024retsim,\ntitle={{RETS}im: Resilient and Efficient Text Similarity},\nauthor={Marina Zhang and Owen Skipper Vallis and Aysegul Bumin and Tanay Vakharia and Elie Bursztein},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=23b9KSNQTX}\n}", "github": "", "project": "", "reviewers": "XEvv;XRFc;rc5k;pVcX", "pdf_size": 442973, "rating": "6;6;8;8", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "contribution": "1;2;3;3", "presentation": "2;4;3;3", "wc_summary": "72;486;86;65", "wc_strengths": "76;53;21;80", "wc_weaknesses": "72;14;106;214", "wc_questions": "29;53;74;8", "wc_review": "249;606;287;367", "wc_reply_reviewers": "44;10;78;0", "wc_reply_authors": "830;510;1068;1115", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;3;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 177.25, 178.41717266003292 ], "wc_strengths_avg": [ 57.5, 23.4574082114798 ], "wc_weaknesses_avg": [ 101.5, 72.80624973173663 ], "wc_questions_avg": [ 41.0, 24.829418035870273 ], "wc_review_avg": [ 377.25, 138.7666656657859 ], "wc_reply_reviewers_avg": [ 33.0, 30.675723300355934 ], "wc_reply_authors_avg": [ 880.75, 239.77424277849363 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14914000300701128593&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=23b9KSNQTX", "pdf": "https://openreview.net/pdf?id=23b9KSNQTX", "email": "google.com;google.com;;;google.com", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "24CZaossxH", "title": "PyTorch Geometric High Order: A Unified Library for High Order Graph Neural Network", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce PyTorch Geometric High Order (PyGHO), a library designed for High Order Graph Neural Networks (HOGNNs) built upon PyTorch Geometric (PyG). In contrast to ordinary Message Passing Neural Networks (MPNNs) which facilitate message exchange between nodes and are readily implemented using PyG, HOGNNs, encompassing subgraph GNNs and k-WL GNNs, encode node tuples. Such node tuple encoding lacks a universal framework and often necessitates intricate code implementation. The primary objective of PyGHO is to furnish an intuitive and user-friendly interface catering to various HOGNNs. It integrates streamlined data structures for node tuples, offers comprehensive data preprocessing and mini-batch data loading utilities, presents a versatile framework for high order message propagation, and encompasses a repertoire of representative high order GNN methodologies. In this work, we present an detailed overview of the PyGHO library, elucidating its features, and undertake a comparative analysis of existing HOGNNs implemented with PyGHO on real-world tasks.", "keywords": "High Order Graph Neural Network;Library", "primary_area": "infrastructure, software libraries, hardware, etc.", "supplementary_material": "/attachment/7738778ed7f2253d9fe13c65d488034e64b21038.zip", "author": "Xiyuan Wang;Muhan Zhang", "authorids": "~Xiyuan_Wang1;~Muhan_Zhang1", "gender": ";M", "homepage": ";https://muhanzhang.github.io/", "dblp": "95/8542;157/5518", "google_scholar": ";https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ", "orcid": ";0000-0002-7680-6401", "linkedin": "%E5%B8%8C%E5%85%83-%E7%8E%8B-969660221/;jerry-muhan-zhang-a33a1777/", "or_profile": "~Xiyuan_Wang1;~Muhan_Zhang1", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nwang2024pytorch,\ntitle={PyTorch Geometric High Order: A Unified Library for High Order Graph Neural Network},\nauthor={Xiyuan Wang and Muhan Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=24CZaossxH}\n}", "github": "", "project": "", "reviewers": "gNYJ;tzV7;nV5W;skCY", "site": "https://openreview.net/forum?id=24CZaossxH", "pdf_size": 284290, "rating": "3;3;5;5", "confidence": "5;3;3;3", "soundness": "2;2;2;3", "contribution": "2;2;2;2", "presentation": "1;2;2;3", "wc_summary": "35;51;73;17", "wc_strengths": "13;28;139;21", "wc_weaknesses": "80;210;136;75", "wc_questions": "13;5;2;4", "wc_review": "141;294;350;117", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 44.0, 20.615528128088304 ], "wc_strengths_avg": [ 50.25, 51.51395442013746 ], "wc_weaknesses_avg": [ 125.25, 54.47648575302926 ], "wc_questions_avg": [ 6.0, 4.183300132670378 ], "wc_review_avg": [ 225.5, 98.87492098606198 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11402743817870288895&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "258EqEA05w", "title": "A Simple Data Augmentation for Feature Distribution Skewed Federated Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning (FL) facilitates collaborative learning among multiple clients in a distributed manner and ensures privacy protection. However, its performance inevitably degrades, while suffering from data heterogeneity, i.e., non-IID data. In this paper, we focus on the feature distribution skewed FL scenario, which is a common setting in real-world applications. The main challenge of this scenario is feature shift, which is caused by the different underlying distributions of local datasets. Although the previous attempts achieved impressive progress, few studies pay attention to the data itself, i.e., the root of this issue. To this end, the primary goal of this paper is to develop a general data augmentation technique at the input level, to mitigate the feature shift problem. To achieve this goal, we propose a simple yet remarkably effective data augmentation method, namely FedRDN, for feature distribution skewed FL, which randomly injects the statistics of the dataset from the entire federation into the client's data. Then, our method can effectively improve the generalization of features, and thereby mitigate the feature shift problem. Moreover, our FedRDN is a plug-and-play component, which can be seamlessly integrated into the data augmentation flow with only a few lines of code. Extensive experiments on several datasets show that the performance of various representative FL works can be further improved by integrating our FedRDN, which demonstrates its strong scalability and generalizability. The source code will be released.", "keywords": "federated learning;data heterogeneity;data augmentation.", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Yunlu Yan;Huazhu Fu;Yuexiang Li;Lei Zhu", "authorids": "~Yunlu_Yan1;~Huazhu_Fu4;~Yuexiang_Li1;~Lei_Zhu1", "gender": "M;M;M;M", "homepage": ";https://hzfu.github.io;https://yuexiangli.github.io;https://sites.google.com/site/indexlzhu/home?authuser=0", "dblp": "294/8769;63/7767;165/6204;99/549-3", "google_scholar": "Ja0QBOgAAAAJ;https://scholar.google.com/citations?hl=en;WsKu4EMAAAAJ;https://scholar.google.com.hk/citations?user=AQtqhaYAAAAJ", "orcid": ";0000-0002-9702-5524;;", "linkedin": ";;;", "or_profile": "~Yunlu_Yan1;~Huazhu_Fu4;~Yuexiang_Li1;~Lei_Zhu1", "aff": "The Hong Kong University of Science and Technology (Guangzhou);Institute of High Performance Computing, Singapore, A*STAR;Medical AI ReSearch (MARS) Group @ GXMU;Hong Kong University of Science and Technology (Guangzhou) & HKUST", "aff_domain": "hkust-gz.edu.cn;ihpc.a-star.edu.sg;gxmu.edu.cn;ust.hk", "position": "PhD student;Principal Scientist;Full Professor;Assistant Professor", "bibtex": "@misc{\nyan2024a,\ntitle={A Simple Data Augmentation for Feature Distribution Skewed Federated Learning},\nauthor={Yunlu Yan and Huazhu Fu and Yuexiang Li and Lei Zhu},\nyear={2024},\nurl={https://openreview.net/forum?id=258EqEA05w}\n}", "github": "", "project": "", "reviewers": "9DEf;rKgT;BGPc;aJPJ", "site": "https://openreview.net/forum?id=258EqEA05w", "pdf_size": 1797449, "rating": "5;5;5;6", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "37;67;136;46", "wc_strengths": "68;73;43;35", "wc_weaknesses": "98;120;26;131", "wc_questions": "54;109;112;152", "wc_review": "257;369;317;364", "wc_reply_reviewers": "137;0;0;27", "wc_reply_authors": "1122;696;706;590", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 38.797551469132685 ], "wc_strengths_avg": [ 54.75, 16.099301227071937 ], "wc_weaknesses_avg": [ 93.75, 40.88016022473493 ], "wc_questions_avg": [ 106.75, 34.86671048435743 ], "wc_review_avg": [ 326.75, 45.090880452703516 ], "wc_reply_reviewers_avg": [ 41.0, 56.51106086422374 ], "wc_reply_authors_avg": [ 778.5, 203.46191289772148 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17299839277902079733&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Institute of High Performance Computing;Guangxi Medical University", "aff_unique_dep": ";;Medical AI ReSearch (MARS) Group", "aff_unique_url": "https://www.ust.hk;https://www.ihpc.a-star.edu.sg;http://www.gxmu.edu.cn", "aff_unique_abbr": "HKUST;IHPC;GXMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Guangzhou;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Singapore" }, { "id": "25VG15SnkH", "title": "United We Train, Divided We Fail! Representation Learning for Time Series by Pretraining from 75 Datasets at Once", "track": "main", "status": "Reject", "tldr": "", "abstract": "In natural language processing and vision, pretraining is utilized to learn effective representations. Unfortunately, the success of pretraining does not easily carry over to time series due to potential mismatch between sources and target. Actually, common belief is that multi-dataset pretraining does not work for time series! Au contraire, we introduce a new self-supervised contrastive pretraining approach to learn one encoding from many unlabeled and diverse time series datasets, so that the single learned representation can then be reused in several target domains for, say, classification. Specifically, we propose the XD-MixUp interpolation method and the Soft Interpolation Contextual Contrasting (SICC) loss. Empirically, this outperforms both supervised training and other self-supervised pretraining methods when finetuning on low-data regimes. This disproves the common belief: We can actually learn from multiple time series datasets, even from 75 at once.", "keywords": "time series;classification;pretraining;representation learning;multi-dataset;transfer learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Maurice Kraus;Felix Divo;Devendra Singh Dhami;Kristian Kersting", "authorids": "~Maurice_Kraus1;~Felix_Divo1;~Devendra_Singh_Dhami1;~Kristian_Kersting1", "gender": "M;;M;M", "homepage": ";;https://sites.google.com/view/devendradhami;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "335/8577;;201/2130;40/3793", "google_scholar": "HKA7sQkAAAAJ;;aVlaHfkAAAAJ;QY-earAAAAAJ", "orcid": "0000-0002-6411-3325;;;0000-0002-2873-9152", "linkedin": ";;;", "or_profile": "~Maurice_Kraus1;~Felix_Divo1;~Devendra_Singh_Dhami1;~Kristian_Kersting1", "aff": "Technische Universit\u00e4t Darmstadt;;Eindhoven University of Technology;TU Darmstadt", "aff_domain": "cs.tu-darmstadt.de;;tue.nl;tu-darmstadt.de", "position": "PhD student;;Assistant Professor;Full Professor", "bibtex": "@misc{\nkraus2024united,\ntitle={United We Train, Divided We Fail! Representation Learning for Time Series by Pretraining from 75 Datasets at Once},\nauthor={Maurice Kraus and Felix Divo and Devendra Singh Dhami and Kristian Kersting},\nyear={2024},\nurl={https://openreview.net/forum?id=25VG15SnkH}\n}", "github": "", "project": "", "reviewers": "Gxh9;kMWa;KhAg;4uSe", "site": "https://openreview.net/forum?id=25VG15SnkH", "pdf_size": 2198730, "rating": "3;3;3;6", "confidence": "4;4;3;4", "soundness": "2;3;2;4", "contribution": "2;1;2;3", "presentation": "3;3;3;4", "wc_summary": "67;94;101;198", "wc_strengths": "43;64;52;28", "wc_weaknesses": "334;136;200;169", "wc_questions": "40;22;42;8", "wc_review": "484;316;395;403", "wc_reply_reviewers": "167;0;0;105", "wc_reply_authors": "929;596;867;851", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;2;2", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 115.0, 49.57317823178175 ], "wc_strengths_avg": [ 46.75, 13.141061600951424 ], "wc_weaknesses_avg": [ 209.75, 75.22092461542866 ], "wc_questions_avg": [ 28.0, 13.92838827718412 ], "wc_review_avg": [ 399.5, 59.466377054601196 ], "wc_reply_reviewers_avg": [ 68.0, 71.44578364046404 ], "wc_reply_authors_avg": [ 810.75, 127.36242577777796 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6022853401423522021&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Eindhoven University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.tue.nl", "aff_unique_abbr": "TUD;TU/e", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Netherlands" }, { "title": "Zero-Shot Continuous Prompt Transfer: Generalizing Task Semantics Across Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19559", "id": "26XphugOcS", "author_site": "Zijun Wu, Yongkang Wu, Lili Mou", "tldr": "", "abstract": "Prompt tuning in natural language processing (NLP) has become an increasingly popular method for adapting large language models to specific tasks. However, the transferability of these prompts, especially continuous prompts, between different models remains a challenge. In this work, we propose a zero-shot continuous prompt transfer method, where source prompts are encoded into relative space and the corresponding target prompts are searched for transferring to target models. Experimental results confirm the effectiveness of our method, showing that 'task semantics' in continuous prompts can be generalized across various language models. Moreover, we find that combining 'task semantics' from multiple source models can further enhance the performance of transfer.", "keywords": "continuous prompt tuning;zero-shot prompt transfer;cross-model prompt transfer", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/a5a62ca65940640c67d97235770b7058762be598.pdf", "author": "Zijun Wu;Yongkang Wu;Lili Mou", "authorids": "~Zijun_Wu2;~Yongkang_Wu1;~Lili_Mou1", "gender": "M;;M", "homepage": ";;https://lili-mou.github.io/", "dblp": ";;", "google_scholar": "rmET9UUAAAAJ;YYJIxacAAAAJ;https://scholar.google.com.hk/schhp?hl=en", "orcid": ";;", "linkedin": "zijun-wu-844474195/?originalSubdomain=ca;;", "or_profile": "~Zijun_Wu2;~Yongkang_Wu1;~Lili_Mou1", "aff": "University of Alberta;;University of Alberta", "aff_domain": "ualberta.ca;;ualberta.ca", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nwu2024zeroshot,\ntitle={Zero-Shot Continuous Prompt Transfer: Generalizing Task Semantics Across Language Models},\nauthor={Zijun Wu and Yongkang Wu and Lili Mou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=26XphugOcS}\n}", "github": "", "project": "", "reviewers": "AxcL;NAhq;2xK8;6csB", "pdf_size": 482692, "rating": "6;6;8;8", "confidence": "4;4;3;2", "soundness": "3;3;3;3", "contribution": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "74;166;130;91", "wc_strengths": "54;14;51;140", "wc_weaknesses": "52;15;148;43", "wc_questions": "118;1;13;19", "wc_review": "298;196;342;293", "wc_reply_reviewers": "72;25;100;19", "wc_reply_authors": "805;337;224;413", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 115.25, 35.64670391494843 ], "wc_strengths_avg": [ 64.75, 46.21349911010851 ], "wc_weaknesses_avg": [ 64.5, 50.102395152327794 ], "wc_questions_avg": [ 37.75, 46.78341052125208 ], "wc_review_avg": [ 282.25, 53.32154817707378 ], "wc_reply_reviewers_avg": [ 54.0, 33.56337289367682 ], "wc_reply_authors_avg": [ 444.75, 218.59137105567547 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5565109728828905576&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=26XphugOcS", "pdf": "https://openreview.net/pdf?id=26XphugOcS", "email": "ualberta.ca;;ualberta.ca", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "27YiINkhw3", "title": "ToolDec: Syntax Error-Free and Generalizable Tool Use for LLMs via Finite-State Decoding", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) have shown promising capabilities in using external tools to solve complex problems.\nHowever, existing approaches either involve fine-tuning on tool demonstrations, which does not generalize to new tools without additional training, or providing tool documentation in context, limiting the number of tools. Both approaches often generate syntactically invalid tool calls.\nIn this paper, we propose ToolDec, a finite-state machine-guided decoding algorithm for tool-augmented LLMs.\nToolDec eliminates tool-related errors for any tool-augmented LLMs by ensuring valid tool names and type-conforming arguments.\nFurthermore, ToolDec enables LLM to effectively select tools using only the information contained in their names, with no need for fine-tuning or in-context documentation.\nWe evaluated multiple prior methods and their ToolDec-enhanced versions on a variety of tasks involving tools like math functions, knowledge graph relations, and complex real-world RESTful APIs.\nOur experiments show that ToolDec reduces syntactic errors to zero, consequently achieving significantly better performance and as much as a 2x speedup.\nWe also show that ToolDec achieves superior generalization performance on unseen tools, performing up to 8x better than the baselines", "keywords": "large language models;augmented language models;finite-state machines", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "", "author": "Kexun Zhang;Hongqiao Chen;Lei Li;William Yang Wang", "authorids": "~Kexun_Zhang1;~Hongqiao_Chen2;~Lei_Li11;~William_Yang_Wang2", "gender": "M;;M;", "homepage": "https://zkx06111.github.io;;https://www.cs.cmu.edu/~leili;", "dblp": "295/8815;;13/7007-5.html;", "google_scholar": ";;BYXqAlwAAAAJ;", "orcid": ";;0000-0003-3095-9776;", "linkedin": ";;;", "or_profile": "~Kexun_Zhang1;~Hongqiao_Chen2;~Lei_Li11;~William_Yang_Wang2", "aff": "Carnegie Mellon University;;School of Computer Science, Carnegie Mellon University;", "aff_domain": "cmu.edu;;cs.cmu.edu;", "position": "PhD student;;Assistant Professor;", "bibtex": "@misc{\nzhang2024tooldec,\ntitle={ToolDec: Syntax Error-Free and Generalizable Tool Use for {LLM}s via Finite-State Decoding},\nauthor={Kexun Zhang and Hongqiao Chen and Lei Li and William Yang Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=27YiINkhw3}\n}", "github": "", "project": "", "reviewers": "uQYf;R8ud;ydwb;7DQS", "site": "https://openreview.net/forum?id=27YiINkhw3", "pdf_size": 701731, "rating": "5;5;6;8", "confidence": "4;5;4;4", "soundness": "2;3;3;4", "contribution": "1;2;3;3", "presentation": "2;3;3;4", "wc_summary": "128;141;65;79", "wc_strengths": "64;68;50;47", "wc_weaknesses": "187;116;175;81", "wc_questions": "80;52;17;1", "wc_review": "459;377;307;208", "wc_reply_reviewers": "24;19;28;82", "wc_reply_authors": "1032;841;1391;653", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;4;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 103.25, 31.971667144520318 ], "wc_strengths_avg": [ 57.25, 8.926785535678562 ], "wc_weaknesses_avg": [ 139.75, 43.27455950093542 ], "wc_questions_avg": [ 37.5, 30.696090956341656 ], "wc_review_avg": [ 337.75, 92.22628421442555 ], "wc_reply_reviewers_avg": [ 38.25, 25.459526704163217 ], "wc_reply_authors_avg": [ 979.25, 272.88859906562607 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11606521451468694141&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Ground-A-Video: Zero-shot Grounded Video Editing using Text-to-image Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19558", "id": "28L2FCtMWq", "author_site": "Hyeonho Jeong, Jong Chul YE", "tldr": "", "abstract": "This paper introduces a novel grounding-guided video-to-video translation framework called Ground-A-Video for multi-attribute video editing.\nRecent endeavors in video editing have showcased promising results in single-attribute editing or style transfer tasks, either by training T2V models on text-video data or adopting training-free methods.\nHowever, when confronted with the complexities of multi-attribute editing scenarios, they exhibit shortcomings such as omitting or overlooking intended attribute changes, modifying the wrong elements of the input video, and failing to preserve regions of the input video that should remain intact.\nGround-A-Video attains temporally consistent multi-attribute editing of input videos in a training-free manner without aforementioned shortcomings.\nCentral to our method is the introduction of cross-frame gated attention which incorporates groundings information into the latent representations in a temporally consistent fashion, along with Modulated Cross-Attention and optical flow guided inverted latents smoothing.\nExtensive experiments and applications demonstrate that Ground-A-Video's zero-shot capacity outperforms other baseline methods in terms of edit-accuracy and frame consistency.\nFurther results and code are available at our project page ( http://ground-a-video.github.io )", "keywords": "Computer Vision;Diffusion Models;Video Editing", "primary_area": "generative models", "supplementary_material": "", "author": "Hyeonho Jeong;Jong Chul Ye", "authorids": "~Hyeonho_Jeong1;~Jong_Chul_Ye1", "gender": "M;M", "homepage": "https://hyeonho99.github.io/;https://bispl.weebly.com/", "dblp": "272/8078;15/5613", "google_scholar": "Pa7EfsEAAAAJ;HNMjoNEAAAAJ", "orcid": "0000-0002-6864-4190;", "linkedin": "hyeonho-jeong-jhh/;", "or_profile": "~Hyeonho_Jeong1;~Jong_Chul_Ye1", "aff": "Adobe Systems;Korea Advanced Institute of Science & Technology", "aff_domain": "adobe.com;kaist.ac.kr", "position": "Intern;Full Professor", "bibtex": "@inproceedings{\njeong2024groundavideo,\ntitle={Ground-A-Video: Zero-shot Grounded Video Editing using Text-to-image Diffusion Models},\nauthor={Hyeonho Jeong and Jong Chul Ye},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=28L2FCtMWq}\n}", "github": "", "project": "", "reviewers": "NYGw;TXKT;F2zK;vcyz", "pdf_size": 50110846, "rating": "6;6;6;8", "confidence": "4;3;4;4", "soundness": "2;3;2;4", "contribution": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "50;44;114;111", "wc_strengths": "48;111;58;50", "wc_weaknesses": "52;215;157;92", "wc_questions": "68;9;25;3", "wc_review": "218;379;354;256", "wc_reply_reviewers": "47;18;73;0", "wc_reply_authors": "1151;1020;1175;427", "reply_reviewers": "1;1;2;0", "reply_authors": "4;3;5;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.75, 32.83576556135093 ], "wc_strengths_avg": [ 66.75, 25.82029240732955 ], "wc_weaknesses_avg": [ 129.0, 62.20530523998737 ], "wc_questions_avg": [ 26.25, 25.410381736605217 ], "wc_review_avg": [ 301.75, 66.71722041572175 ], "wc_reply_reviewers_avg": [ 34.5, 27.84331158465171 ], "wc_reply_authors_avg": [ 943.25, 303.8390815876062 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7963234025556491814&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=28L2FCtMWq", "pdf": "https://openreview.net/pdf?id=28L2FCtMWq", "email": "adobe.com;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Adobe;Korea Advanced Institute of Science and Technology", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://www.kaist.ac.kr", "aff_unique_abbr": "Adobe;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;South Korea" }, { "id": "28gMnEAgl9", "title": "Large Language Models Are Not Strong Abstract Reasoners", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large Language Models have shown tremendous performance on a large variety of natural language processing tasks, ranging from text comprehension to common sense reasoning. \nHowever, the mechanisms responsible for this success remain opaque, and it is unclear whether LLMs can achieve human-like cognitive capabilities or whether these models are still fundamentally circumscribed.\nAbstract reasoning is a fundamental task for cognition, consisting of finding and applying a general pattern from few data. Evaluating deep neural architectures on this task could give insight into their potential limitations regarding reasoning and their broad generalisation abilities, yet this is currently an under-explored area.\nIn this paper, we introduce a new benchmark for evaluating language models beyond memorization on abstract reasoning tasks. We perform extensive evaluations of state-of-the-art LLMs, showing that they achieve very limited performance in contrast with other natural language tasks, and we examine the reasons for this difference. We apply techniques that have been shown to improve performance on other NLP tasks and show that their impact for abstract reasoning is limited.", "keywords": "Abstract Reasoning;Large Language Models;Natural Language Processing", "primary_area": "causal reasoning", "supplementary_material": "/attachment/36340cd345a805ed81fdde638144cdad20dc5245.zip", "author": "Gael Gendron;Qiming Bao;Michael Witbrock;Gillian Dobbie", "authorids": "~Gael_Gendron1;~Qiming_Bao1;~Michael_Witbrock1;~Gillian_Dobbie1", "gender": ";M;F;M", "homepage": ";https://14h034160212.github.io/;https://profiles.auckland.ac.nz/g-dobbie;", "dblp": "310/1562;126/9037-1;d/GDobbie;w/MichaelJWitbrock", "google_scholar": "https://scholar.google.com/citations?hl=en;t-PqsgcAAAAJ;https://scholar.google.com.au/citations?user=v19BUHIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-2457-934X;0000-0002-1000-7383;0000-0001-7245-0367;0000-0002-7554-0971", "linkedin": ";qiming-bill-bao-773757166/;gill-dobbie-3734822/;witbrock", "or_profile": "~Gael_Gendron1;~Qiming_Bao1;~Gillian_Dobbie1;~Michael_J._Witbrock1", "aff": "University of Auckland;University of Auckland;University of Auckland;University of Auckland", "aff_domain": "auckland.ac.nz;aucklanduni.ac.nz;auckland.ac.nz;auckland.ac.nz", "position": "PhD student;PhD student;Full Professor;Professor", "bibtex": "@misc{\ngendron2024large,\ntitle={Large Language Models Are Not Strong Abstract Reasoners},\nauthor={Gael Gendron and Qiming Bao and Michael Witbrock and Gillian Dobbie},\nyear={2024},\nurl={https://openreview.net/forum?id=28gMnEAgl9}\n}", "github": "", "project": "", "reviewers": "As6m;2kzP;LbQe", "site": "https://openreview.net/forum?id=28gMnEAgl9", "pdf_size": 403704, "rating": "5;5;6", "confidence": "4;4;3", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "68;35;58", "wc_strengths": "36;39;75", "wc_weaknesses": "64;65;65", "wc_questions": "49;1;92", "wc_review": "217;140;290", "wc_reply_reviewers": "75;0;0", "wc_reply_authors": "815;894;761", "reply_reviewers": "1;0;0", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 53.666666666666664, 13.816254517375139 ], "wc_strengths_avg": [ 50.0, 17.72004514666935 ], "wc_weaknesses_avg": [ 64.66666666666667, 0.4714045207910317 ], "wc_questions_avg": [ 47.333333333333336, 37.16928241916375 ], "wc_review_avg": [ 215.66666666666666, 61.24450088692771 ], "wc_reply_reviewers_avg": [ 25.0, 35.35533905932738 ], "wc_reply_authors_avg": [ 823.3333333333334, 54.61583002105607 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6764763760076446864&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Auckland", "aff_unique_dep": "", "aff_unique_url": "https://www.auckland.ac.nz", "aff_unique_abbr": "UoA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "New Zealand" }, { "id": "28kAFnQZ5V", "title": "TENSORIZED ATTENTION MODEL", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, attention mechanisms have played a crucial role in the\nsuccess of Transformer models, as seen in platforms like OpenAI's\nChatGPT. However, these models often struggle to compute attention\nweights across various object types, such as 'comments,' 'replies,' and\nspecific 'subjects,' which naturally express relationships in many\nreal-world scenarios. This limitation can potentially impact prediction\naccuracy.\nTo overcome this limitation, we introduce the Tensorized Attention Model\n(TAM). By leveraging Tucker decomposition, TAM calculates attention\nweights across a diverse array of objects and seamlessly integrates them\ninto Transformer outputs. \nWe have implemented TAM within the Transformer encoder and have\nshowcased its effectiveness in response selection tasks. Our model takes\ninto account relationships based on 'the current context in the\ndialogue', 'the entire dialogue history', and 'the subject matter of the\ndialogue'. Evaluation using the Reddit dataset across a wide variety of\ntopics indicates that TAM significantly outperforms existing\nTransformer-based methods in terms of accuracy.", "keywords": "Attention model;Tensorized Transformer;Encoder model", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Makoto Nakatsuji;Yasuhiro Fujiwara;Narichika Nomoto;Atsushi Fukayama", "authorids": "~Makoto_Nakatsuji1;~Yasuhiro_Fujiwara1;~Narichika_Nomoto1;~Atsushi_Fukayama1", "gender": "M;M;M;M", "homepage": "https://tw.rpi.edu/web/person/MakotoNakatsuji;http://www.linkedin.com/in/yasuhiro-fujiwara-8960b0180;https://dl.acm.org/profile/99660959690;", "dblp": "86/2250;02/2520;;", "google_scholar": "https://scholar.google.co.jp/citations?user=NILKxCIAAAAJ;https://scholar.google.co.jp/citations?user=kCaZaaMAAAAJ;;", "orcid": "0000-0003-2181-0056;0000-0001-9578-1118;;", "linkedin": "https://linkedin.com/in/makoto-nakatsuji-a7693916;;;atsushi-fukayama-5551563a/", "or_profile": "~Makoto_Nakatsuji1;~Yasuhiro_Fujiwara1;~Narichika_Nomoto1;~Atsushi_Fukayama1", "aff": "NTT, ;NTT;NTT, The University of Tokyo;Nippon Telegraph and Telephone Corporation", "aff_domain": "ntt.co.jp;ntt.co.jp;ntt.co.jp;group.ntt", "position": "Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nnakatsuji2024tensorized,\ntitle={{TENSORIZED} {ATTENTION} {MODEL}},\nauthor={Makoto Nakatsuji and Yasuhiro Fujiwara and Narichika Nomoto and Atsushi Fukayama},\nyear={2024},\nurl={https://openreview.net/forum?id=28kAFnQZ5V}\n}", "github": "", "project": "", "reviewers": "4BZc;iEpi;qc5V", "site": "https://openreview.net/forum?id=28kAFnQZ5V", "pdf_size": 813413, "rating": "5;5;6", "confidence": "3;5;4", "soundness": "3;2;3", "contribution": "2;2;3", "presentation": "3;2;3", "wc_summary": "53;46;115", "wc_strengths": "96;26;61", "wc_weaknesses": "78;117;29", "wc_questions": "65;11;262", "wc_review": "292;200;467", "wc_reply_reviewers": "0;0;54", "wc_reply_authors": "958;887;1150", "reply_reviewers": "0;0;1", "reply_authors": "3;3;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 71.33333333333333, 31.008959278820623 ], "wc_strengths_avg": [ 61.0, 28.577380332470412 ], "wc_weaknesses_avg": [ 74.66666666666667, 36.003086287459055 ], "wc_questions_avg": [ 112.66666666666667, 107.87132252003875 ], "wc_review_avg": [ 319.6666666666667, 110.74394891921735 ], "wc_reply_reviewers_avg": [ 18.0, 25.45584412271571 ], "wc_reply_authors_avg": [ 998.3333333333334, 111.09255400590787 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:v8polrHrx_wJ:scholar.google.com/&scioq=TENSORIZED+ATTENTION+MODEL&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "NTT Corporation;University of Tokyo;Nippon Telegraph and Telephone Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntt.co.jp;https://www.u-tokyo.ac.jp;https://www.ntt.co.jp", "aff_unique_abbr": "NTT;UTokyo;NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "29pGC6IYaL", "title": "Maximizing LLMs Potential: Enhancing Mongolian Chinese Machine Translation with RL Agents and Adversarial Multi Knowledge Distillation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Despite the impressive performance of Large Language Models (LLMs) in Natural Language Processing (NLP), they still face challenges in low-resource translation tasks, particularly in Mongolian to Chinese machine translation, often yielding suboptimal results. To address this issue, we propose an innovative approach that combines multi-source knowledge distillation and incorporates Reinforcement Learning (RL) to help models acquire and transfer knowledge from LLMs more effectively. RL plays a crucial role in this, making dynamic decisions to determine useful information for low-resource translation models and how to extract it efficiently. We introduce a new reward function to comprehensively guide knowledge distillation, and experiments show that this approach harnesses the potential of LLMs, significantly improving translation quality in low-resource settings.", "keywords": "Large Language Models;Reinforcement Learning;Adversarial Knowledge Distillation;Mongolian Chinese Machine Translation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/7bc668c29cd2f97957fc2cab260dc869647cf12c.zip", "author": "Xu Liuxu;YiLa Su;Nier Wu;Yatu Ji;Huinuan Zhang", "authorids": "~Xu_Liuxu1;~YiLa_Su1;~Nier_Wu1;~Yatu_Ji1;~Huinuan_Zhang1", "gender": "M;M;M;M;", "homepage": "https://github.com/;https://github.com/;https://id.qq.com/index.html#info;;", "dblp": ";;;245/8301;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Xu_Liuxu1;~YiLa_Su1;~Nier_Wu1;~Yatu_Ji1;~Huinuan_Zhang1", "aff": "Inner Mongolia University of Technology;;inner mongolia university;Inner Mongolia University;", "aff_domain": "imut.edu;;imu.edu.cn;imu.edu.cn;", "position": "MS student;;PhD student;PhD student;", "bibtex": "@misc{\nanonymous2025maximizing,\ntitle={Maximizing {LLM}s Potential: Enhancing Mongolian Chinese Machine Translation with {RL} Agents and Adversarial Multi Knowledge Distillation},\nauthor={Anonymous},\nyear={2025},\nurl={https://openreview.net/forum?id=29pGC6IYaL}\n}", "github": "", "project": "", "reviewers": "Mjnh;NGkR;hubM", "site": "https://openreview.net/forum?id=29pGC6IYaL", "pdf_size": 510324, "rating": "1;5;5", "confidence": "5;3;5", "soundness": "1;2;3", "contribution": "2;2;2", "presentation": "1;3;3", "wc_summary": "80;96;54", "wc_strengths": "24;24;43", "wc_weaknesses": "165;105;70", "wc_questions": "42;54;2", "wc_review": "311;279;169", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 76.66666666666667, 17.30767331432956 ], "wc_strengths_avg": [ 30.333333333333332, 8.956685895029603 ], "wc_weaknesses_avg": [ 113.33333333333333, 39.228674319799396 ], "wc_questions_avg": [ 32.666666666666664, 22.23110933404409 ], "wc_review_avg": [ 253.0, 60.81666438293592 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:McokaE6rt08J:scholar.google.com/&scioq=Maximizing+LLMs+Potential:+Enhancing+Mongolian+Chinese+Machine+Translation+with+RL+Agents+and+Adversarial+Multi+Knowledge+Distillation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Inner Mongolia University of Technology;Inner Mongolia University", "aff_unique_dep": ";", "aff_unique_url": "http://www.imut.edu.cn;http://www.imu.edu.cn", "aff_unique_abbr": "IMUT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "2A199SAhW3", "title": "Learning Pseudo 3D Guidance for View-consistent 3D Texturing with 2D Diffusion", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Text-driven 3D texturing requires the generation of high-fidelity texture that conforms to given geometry and description. Recently, the high-quality text-to-image generation ability of 2D diffusion model has significantly promoted this task, by converting it into a texture optimization process guided by multi-view synthesized images. Thus the generation of high-quality and multi-view consistency images becomes the key issue. State-of-the-art methods introduce global consistency by treating novel view image generation as image inpainting conditioned on the texture generated by previously seen views. However, due to the error accumulation of inpainting itself and the occlusion between object parts, these inpainting-based methods often fail to deal with long-range texture consistency and the learned texture is of low quality. To address these, we present P3G, a text to 3D texturing approach based on learned Pseudo 3D Guidance. The key idea of P3D is to first learn a coarse but view-consistent texture, to serve as a semantics and layout guidance for high-quality view-consistent multi-view image generation. To this end, we propose a novel method to enable the learning of the pseudo 3D guidance, and design an efficient framework for high-quality and multi-view consistent image generation that incorporates both the depth map, the learned high-level semantics and layout guidance, and the previously generated texture. Quantitative and qualitative evaluation on variant 3D shapes demonstrates the superiority of our P3G on both consistency and quality.", "keywords": "3D Texturing;Diffusion Model", "primary_area": "generative models", "supplementary_material": "/attachment/1ebf2a5d9f44a50b3c8fcffbe805dd40812325fc.zip", "author": "Kehan Li;Yanbo Fan;Yang Wu;Zhongqian Sun;Yang Wei;Li Yuan;Jie Chen", "authorids": "~Kehan_Li1;~Yanbo_Fan1;~Yang_Wu1;~Zhongqian_Sun1;~Yang_Wei2;~Li_Yuan2;~Jie_Chen15", "gender": "M;M;M;M;M;M;M", "homepage": ";https://sites.google.com/site/yanbofan0124/;;;;https://aimia-pku.github.io/;https://yuanli2333.github.io/", "dblp": "206/5336-2;181/4574;56/1428-1;70/8500;03/1094-32.html;92/6289-1;98/4583-7", "google_scholar": ";OlOqHyUAAAAJ;https://scholar.google.com.hk/citations?user=vwOQ-UIAAAAJ;;;https://scholar.google.fi/citations?user=ZAZFfwwAAAAJ;-5juAR0AAAAJ", "orcid": ";0000-0002-8530-485X;;;;;0000-0002-2120-5588", "linkedin": ";;;;;;", "or_profile": "~Kehan_Li1;~Yanbo_Fan1;~Yang_Wu1;~Zhongqian_Sun1;~Yang_Wei2;~Jie_Chen15;~Yuan_LI2", "aff": "Peking University;Ant Research;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Peking University;Peking University", "aff_domain": "pku.edu.cn;antgroup.com;tencent.com;tencent.com;tencent.com;pku.edu.cn;pku.edu.cn", "position": "MS student;Associate Professor;Principal Researcher;Researcher;Researcher;Associate Professor;Assistant Professor", "bibtex": "@misc{\nli2024learning,\ntitle={Learning Pseudo 3D Guidance for View-consistent 3D Texturing with 2D Diffusion},\nauthor={Kehan Li and Yanbo Fan and Yang Wu and Zhongqian Sun and Yang Wei and Li Yuan and Jie Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=2A199SAhW3}\n}", "github": "", "project": "", "reviewers": "ruEM;ij6J;Nu8g;vaRt", "site": "https://openreview.net/forum?id=2A199SAhW3", "pdf_size": 3347506, "rating": "3;3;5;5", "confidence": "4;4;4;3", "soundness": "2;2;2;2", "contribution": "2;2;2;2", "presentation": "2;2;3;2", "wc_summary": "85;68;30;61", "wc_strengths": "68;54;40;27", "wc_weaknesses": "202;95;85;211", "wc_questions": "48;27;4;41", "wc_review": "403;244;159;340", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.0, 19.912307751739878 ], "wc_strengths_avg": [ 47.25, 15.31951369985353 ], "wc_weaknesses_avg": [ 148.25, 58.443883341201754 ], "wc_questions_avg": [ 30.0, 16.80773631397161 ], "wc_review_avg": [ 286.5, 92.86684015298464 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2815068178139544979&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;2;2;0;0", "aff_unique_norm": "Peking University;Ant Research;Tencent", "aff_unique_dep": ";;Tencent AI Lab", "aff_unique_url": "http://www.pku.edu.cn;https://www.antgroup.com;https://ai.tencent.com", "aff_unique_abbr": "Peking U;Ant Research;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "2BfZMh9td4", "title": "Beyond One-Preference-for-All: Multi-Objective Direct Preference Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Language models (LMs), despite aligning well with an average labeler through reinforcement learning from human feedback (RLHF), may not universally suit diverse human preferences. Recent approaches therefore opt for customization by collecting multi-dimensional feedback and creating distinct rewards for each dimension (e.g., helpfulness, harmlessness, honesty). LMs can then be tailored to different preferences using multi-objective RL (MORL) with different reward weightings. Yet, RL fine-tuning is unstable and resource-heavy, especially for MORLHF with diverse and usually conflicting objectives. In this paper, we present Multi-Objective Direct Preference Optimization (MODPO), an RL-free algorithm that extends Direct Preference Optimization (DPO) for multiple alignment objectives. Essentially, MODPO trains different LMs to represent different collective reward models that combine all objectives with specific weightings. With a simple cross-entropy loss, the LMs optimized against the MODPO objective are analytically the exact solutions of the original MORLHF objective. Empirical results in safety alignment and long-form question answering confirm that MODPO matches or outperforms existing methods, efficiently producing a Pareto-optimal set of LMs that cater to diverse preferences with 3 times less computational resources compared with MORLHF.", "keywords": "large language model;human feedback;multi objective", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Zhanhui Zhou;Jie Liu;Chao Yang;Jing Shao;Yu Liu;Xiangyu Yue;Wanli Ouyang;Yu Qiao", "authorids": "~Zhanhui_Zhou1;~Jie_Liu13;~Chao_Yang3;~Jing_Shao3;~Yu_Liu2;~Xiangyu_Yue1;~Wanli_Ouyang1;~Yu_Qiao1", "gender": "M;;;F;M;M;;", "homepage": "https://zhziszz.github.io/;;;https://amandajshao.github.io/;http://liuyu.us;http://xyue.io/;;", "dblp": ";;;;97/2274-15;207/7518;;", "google_scholar": "SbACfYQAAAAJ;;;VU5ObUwAAAAJ;;-xQ-C1sAAAAJ;;", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Zhanhui_Zhou1;~Jie_Liu13;~Chao_Yang3;~Jing_Shao3;~Yu_Liu2;~Xiangyu_Yue1;~Wanli_Ouyang1;~Yu_Qiao1", "aff": "Shanghai Artificial Intelligence Laboratory;;;Shanghai AI Laboratory;SenseTime;The Chinese University of Hong Kong;;", "aff_domain": "pjlab.org.cn;;;pjlab.org.cn;sensetime.com;ie.cuhk.edu;;", "position": "Researcher;;;Researcher;Principal Researcher;Assistant Professor;;", "bibtex": "@misc{\nzhou2024beyond,\ntitle={Beyond One-Preference-for-All: Multi-Objective Direct Preference Optimization},\nauthor={Zhanhui Zhou and Jie Liu and Chao Yang and Jing Shao and Yu Liu and Xiangyu Yue and Wanli Ouyang and Yu Qiao},\nyear={2024},\nurl={https://openreview.net/forum?id=2BfZMh9td4}\n}", "github": "", "project": "", "reviewers": "nwoD;TGqd;GBhK;c1KT", "site": "https://openreview.net/forum?id=2BfZMh9td4", "pdf_size": 1991679, "rating": "5;5;6;10", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "116;53;77;80", "wc_strengths": "49;14;92;70", "wc_weaknesses": "111;57;60;23", "wc_questions": "30;30;108;54", "wc_review": "306;154;337;227", "wc_reply_reviewers": "34;0;0;0", "wc_reply_authors": "1063;755;1146;349", "reply_reviewers": "1;0;0;0", "reply_authors": "4;3;3;2", "rating_avg": [ 6.5, 2.0615528128088303 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.5, 22.5 ], "wc_strengths_avg": [ 56.25, 28.74347752099596 ], "wc_weaknesses_avg": [ 62.75, 31.419540098480116 ], "wc_questions_avg": [ 55.5, 31.85514087239295 ], "wc_review_avg": [ 256.0, 71.24956140215882 ], "wc_reply_reviewers_avg": [ 8.5, 14.722431864335457 ], "wc_reply_authors_avg": [ 828.25, 312.697437629412 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9801960588196067, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7079115486357626350&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;SenseTime;Chinese University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.shailab.org/;https://www.shanghai-ai-lab.com;https://www.sensetime.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "Shanghai AI Lab;SAIL;SenseTime;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "2C3CWCPxNS", "title": "Preconditioning for Physics-Informed Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Physics-informed neural networks (PINNs) have shown promise in solving complex partial differential equations (PDEs). However, certain training pathologies have emerged, compromising both convergence and prediction accuracy in practical applications. In this paper, we propose to use condition number as an innovative metric to diagnose and rectify the pathologies in PINNs. Inspired by classical numerical analysis, where the condition number measures sensitivity and stability, we highlight its pivotal role in the training dynamics of PINNs. We delineate a theory that elucidates the relationship between reduced condition numbers and improved error control, as well as better convergence. Subsequently, we present an algorithm that leverages preconditioning to enhance the condition number. Evaluations on 16 PDE problems showcase the superior performance of our method. Significantly, in 7 of these problems, our method reduces errors by an order of magnitude. Furthermore, in 2 distinct cases, our approach pioneers a solution, slashing relative errors from roughly $100\\\\%$ to below $6\\\\%$ and $21\\\\%$, respectively.", "keywords": "physics-informed neural network;partial differential equation;condition number;application", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/73b0e5d9a8305629c7f2c1026c67620cfaac045c.zip", "author": "Songming Liu;Chang Su;Jiachen Yao;Zhongkai Hao;Hang Su;Youjia Wu;Jun Zhu", "authorids": "~Songming_Liu1;~Chang_Su7;~Jiachen_Yao3;~Zhongkai_Hao1;~Hang_Su3;~Youjia_Wu1;~Jun_Zhu2", "gender": "M;M;M;M;M;M;M", "homepage": ";https://github.com/EdwardIX;https://jiachenyao.com/;;http://ml.cs.tsinghua.edu.cn/~jun;;https://haozhongkai.github.io/", "dblp": "285/4585;;213/4920;;50/2644-1;26/5371-6;270/0220.html", "google_scholar": "6urFg8kAAAAJ;;Z_bCoGcAAAAJ;;axsP38wAAAAJ;dxN1_X0AAAAJ;dfSzq27ZiVoC", "orcid": ";;0000-0001-7655-7831;0000-0002-7399-1306;;;", "linkedin": "%E6%9D%BE%E9%93%AD-%E5%88%98-7b8339254/;;jiachen-y-05a05932a/;;;;", "or_profile": "~Songming_Liu1;~Chang_Su7;~Jiachen_Yao3;~Youjia_Wu1;~Jun_Zhu2;~Hang_Su2;~Hao_Zhongkai1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Bosch;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;bosch.com;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Researcher;Professor;Associate Professor;PhD student", "bibtex": "@misc{\nliu2024preconditioning,\ntitle={Preconditioning for Physics-Informed Neural Networks},\nauthor={Songming Liu and Chang Su and Jiachen Yao and Zhongkai Hao and Hang Su and Youjia Wu and Jun Zhu},\nyear={2024},\nurl={https://openreview.net/forum?id=2C3CWCPxNS}\n}", "github": "", "project": "", "reviewers": "8B8P;zaqP;tNfv;MgQM", "site": "https://openreview.net/forum?id=2C3CWCPxNS", "pdf_size": 0, "rating": "3;5;6;6", "confidence": "3;3;2;3", "soundness": "1;2;3;4", "contribution": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "47;81;253;52", "wc_strengths": "7;29;127;103", "wc_weaknesses": "233;111;61;182", "wc_questions": "7;113;49;105", "wc_review": "294;334;490;442", "wc_reply_reviewers": "572;136;0;0", "wc_reply_authors": "2272;1435;1805;1504", "reply_reviewers": "2;1;0;0", "reply_authors": "5;4;4;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 108.25, 84.57356265405875 ], "wc_strengths_avg": [ 66.5, 49.84726672546851 ], "wc_weaknesses_avg": [ 146.75, 65.78896184011418 ], "wc_questions_avg": [ 68.5, 43.229041164476456 ], "wc_review_avg": [ 390.0, 79.14543574963751 ], "wc_reply_reviewers_avg": [ 177.0, 234.71472045868788 ], "wc_reply_authors_avg": [ 1754.0, 329.8431445399464 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11405597999913649388&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Tsinghua University;Robert Bosch GmbH", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.bosch.com", "aff_unique_abbr": "THU;Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;Germany" }, { "id": "2CFagKoXXx", "title": "High Dimensional Causal Inference with Variational Backdoor Adjustment", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Backdoor adjustment is a technique in causal inference for estimating interventional quantities from purely observational data. In medical settings, backdoor adjustment can be used to control for confounding and isolate the effectiveness of a treatment. However, high dimensional treatments and confounders pose a series of potential pitfalls: tractability, identifiability, optimization. In this work, we take a generative modeling approach to backdoor adjustment for high dimensional treatments and confounders. We cast backdoor adjustment as an optimization problem in variational inference without reliance on proxy variables and hidden confounders. Empirically, our method is able to estimate interventional likelihood in a variety of high dimensional settings, including semi-synthetic X-ray medical data. To the best of our knowledge, this is the first application of backdoor adjustment in which all the relevant variables are high dimensional.", "keywords": "causality;causal inference;backdoor adjustment;variational inference;generative model;high dimensional;intervention;treatment;confounding;identifiable;optimization", "primary_area": "causal reasoning", "supplementary_material": "/attachment/a7680ebe8eb534b7fa9cbdec56728c58588a7278.pdf", "author": "Daniel Mingyi Israel;Aditya Grover;Guy Van den Broeck", "authorids": "~Daniel_Mingyi_Israel1;~Aditya_Grover1;~Guy_Van_den_Broeck1", "gender": "M;M;M", "homepage": "https://danielmisrael.github.io/;https://aditya-grover.github.io;http://web.cs.ucla.edu/~guyvdb/", "dblp": ";162/5052;96/7521.html", "google_scholar": ";oOhnPUgAAAAJ;d0KQ9z0AAAAJ", "orcid": ";;0000-0003-3434-2503", "linkedin": "daniel-israel-248757160;;guyvdb", "or_profile": "~Daniel_Mingyi_Israel1;~Aditya_Grover1;~Guy_Van_den_Broek1", "aff": ";University of California, Los Angeles;University of California, Los Angeles", "aff_domain": ";ucla.edu;ucla.edu", "position": ";Assistant Professor;Associate Professor", "bibtex": "@misc{\nisrael2024high,\ntitle={High Dimensional Causal Inference with Variational Backdoor Adjustment},\nauthor={Daniel Mingyi Israel and Aditya Grover and Guy Van den Broeck},\nyear={2024},\nurl={https://openreview.net/forum?id=2CFagKoXXx}\n}", "github": "", "project": "", "reviewers": "m1yM;wak4;3NGK", "site": "https://openreview.net/forum?id=2CFagKoXXx", "pdf_size": 1381415, "rating": "3;3;5", "confidence": "3;3;3", "soundness": "3;2;3", "contribution": "1;2;2", "presentation": "2;2;3", "wc_summary": "76;65;96", "wc_strengths": "87;81;73", "wc_weaknesses": "440;665;84", "wc_questions": "172;9;4", "wc_review": "775;820;257", "wc_reply_reviewers": "125;294;0", "wc_reply_authors": "604;379;99", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 79.0, 12.832251036613439 ], "wc_strengths_avg": [ 80.33333333333333, 5.734883511361751 ], "wc_weaknesses_avg": [ 396.3333333333333, 239.19355249578857 ], "wc_questions_avg": [ 61.666666666666664, 78.04414705081278 ], "wc_review_avg": [ 617.3333333333334, 255.45558431076734 ], "wc_reply_reviewers_avg": [ 139.66666666666666, 120.47221901980372 ], "wc_reply_authors_avg": [ 360.6666666666667, 206.57255921884257 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10661663977020613431&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "2CxkRDMIG4", "title": "Precision and Recall Reject Curves for Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "For some classification scenarios, it is desirable to use only those classification instances that a trained model associates with a high certainty. To obtain such high-certainty instances, previous work has proposed accuracy-reject curves. Reject curves allow to evaluate and compare the performance of different certainty measures over a range of thresholds for accepting or rejecting classifications. However, the accuracy may not be the most suited evaluation metric for all applications, and instead precision or recall may be preferable. This is the case, for example, for data with imbalanced class distributions. We therefore propose reject curves that evaluate precision and recall, the recall-reject curve and the precision-reject curve. Using prototype-based classifiers from learning vector quantization, we first validate the proposed curves on artificial benchmark data against the accuracy reject curve as a baseline. We then show on imbalanced benchmarks and medical, real-world data that for these scenarios, the proposed precision- and recall-curves yield more accurate insights into classifier performance than accuracy reject curves.", "keywords": "reject option;precision;recall;evaluating classifiers;transparent machine learning;imbalanced data", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Lydia Fischer;Patricia Wollstadt", "authorids": "~Lydia_Fischer1;~Patricia_Wollstadt1", "gender": ";F", "homepage": ";", "dblp": "https://dblp.uni-trier.de/pid/146/0233.html;", "google_scholar": ";", "orcid": ";0000-0002-7105-5207", "linkedin": ";", "or_profile": "~Lydia_Fischer1;~Patricia_Wollstadt1", "aff": "Honda Research Institute;", "aff_domain": "honda-ri.de;", "position": "Researcher;", "bibtex": "@misc{\nfischer2024precision,\ntitle={Precision and Recall Reject Curves for Classification},\nauthor={Lydia Fischer and Patricia Wollstadt},\nyear={2024},\nurl={https://openreview.net/forum?id=2CxkRDMIG4}\n}", "github": "", "project": "", "reviewers": "EkHi;uffc;zM2w;YWY1", "site": "https://openreview.net/forum?id=2CxkRDMIG4", "pdf_size": 563752, "rating": "1;1;1;3", "confidence": "4;4;4;4", "soundness": "3;1;2;1", "contribution": "1;1;1;1", "presentation": "2;3;3;3", "wc_summary": "36;42;107;29", "wc_strengths": "43;29;36;15", "wc_weaknesses": "99;318;94;77", "wc_questions": "15;1;81;24", "wc_review": "193;390;318;145", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 1.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 1.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.5, 31.228992939254383 ], "wc_strengths_avg": [ 30.75, 10.353139620424328 ], "wc_weaknesses_avg": [ 147.0, 99.06311119685269 ], "wc_questions_avg": [ 30.25, 30.425112982534674 ], "wc_review_avg": [ 261.5, 97.42817867537092 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11613881960874242025&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Honda Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.honda-ri.com", "aff_unique_abbr": "HRI", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "id": "2DDwxbjP9g", "title": "In Defence Of Wasserstein", "track": "main", "status": "Reject", "tldr": "", "abstract": "Since the introduction of Wasserstein GANs, there has been considerable debate whether they should be viewed as minimizing the Wasserstein distance between the training images and the generated images. In particular, several recent works have shown that minimizing this Wasserstein distance leads to blurry images that are of much lower quality than those generated by state-of-the-art WGANs.\nIn this paper we present theoretical and experimental results that suggest that with the appropriate parameter settings, WGANs $\\textbf{do}$ minimize the Wasserstein distance but the form of the distance that is minimized depends highly on the discriminator architecture. We focus on discrete generators for which the Wasserstein distance between the generator distribution and the training distribution can be computed exactly and show that when the discriminator is fully connected, standard WGANs indeed minimize the Wasserstein distance between the generated images and the training images, while when the discriminator is convolutional they minimize the Wasserstein distance between $\\textbf{patches}$ in the generated images and the training images. Our experiments indicate that minimizing the patch Wasserstein metric yields sharp and realistic samples for the same datasets in which minimizing the image Wasserstein distance yields blurry and low quality samples. Our results also suggest alternative methods that directly optimize the patch Wasserstein distance without a discriminator and/or a generator.", "keywords": "GAN;Wasserstein;Optimal transport;Image patches;Generative models", "primary_area": "generative models", "supplementary_material": "/attachment/8c918c3ba9901ec4b0b39a01a73b32fee69dd907.zip", "author": "Ariel Elnekave;Yair Weiss", "authorids": "~Ariel_Elnekave1;~Yair_Weiss1", "gender": "M;M", "homepage": "https://github.com/ariel415el;http://www.cs.huji.ac.il/~yweiss/", "dblp": ";44/1092", "google_scholar": "https://scholar.google.co.il/citations?user=pSVU5zsAAAAJ;https://scholar.google.com.tw/citations?user=9DXQi8gAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ariel_Elnekave1;~Yair_Weiss1", "aff": "Hebrew University of Jerusalem;Hebrew University of Jerusalem", "aff_domain": "huji.ac.il;huji.ac.il", "position": "PhD student;Full Professor", "bibtex": "@misc{\nelnekave2024in,\ntitle={In Defence Of Wasserstein},\nauthor={Ariel Elnekave and Yair Weiss},\nyear={2024},\nurl={https://openreview.net/forum?id=2DDwxbjP9g}\n}", "github": "", "project": "", "reviewers": "hGFX;L98k;scef;ZrfD", "site": "https://openreview.net/forum?id=2DDwxbjP9g", "pdf_size": 8181526, "rating": "3;3;3;8", "confidence": "4;3;4;3", "soundness": "2;2;2;3", "contribution": "2;2;2;3", "presentation": "2;3;2;4", "wc_summary": "93;110;88;209", "wc_strengths": "62;57;42;91", "wc_weaknesses": "494;485;385;95", "wc_questions": "383;2;56;24", "wc_review": "1032;654;571;419", "wc_reply_reviewers": "345;146;162;0", "wc_reply_authors": "962;777;760;64", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 125.0, 49.178247223747206 ], "wc_strengths_avg": [ 63.0, 17.76231966833161 ], "wc_weaknesses_avg": [ 364.75, 161.50909417119522 ], "wc_questions_avg": [ 116.25, 155.2004751925715 ], "wc_review_avg": [ 669.0, 225.8860332114405 ], "wc_reply_reviewers_avg": [ 163.25, 122.45688016604049 ], "wc_reply_authors_avg": [ 640.75, 342.2815909452333 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eHs5x8D3dAsJ:scholar.google.com/&scioq=In+Defence+Of+Wasserstein&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Jerusalem", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "2DJMtdfgfH", "title": "Deep ResNIDS: A Multistage AI Framework for Novelty Detection in Network Traffic", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "Ensuring computer and network system security is crucial in today's digital landscape. Network intrusion detection systems (NIDS) monitor network traffic to identify potential threats. However, traditional NIDS struggle to adapt to evolving cyberattack tactics. To address this, we propose an AI-enabled novelty detection framework to handle zero-day, out-of-distribution, and adversarial evasion attacks. Our framework comprises three sequential deep neural network architectures: one for the classifier and two for specific autoencoders, designed to effectively detect both known attack patterns and novel, previously unseen samples. We use innovative transfer learning, unfreezing specific neurons, and layer combinations to enhance resilience. Leveraging the one-shot learning approach in the transfer learning component of the framework, we demonstrate continuous improvement in detection accuracy for both known and novel network traffic patterns. Our experiments on benchmark intrusion detection data sets achieved, on average, 98.5% accuracy in detecting various attacks.", "keywords": "Multistage network intrusion detection system;novelty detector;anomaly detector;malicious packet classifier;sequential deep neural network architectures", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Soumyadeep Hore;Ankit Shah;Nathaniel D. Bastian;Jalal Ghadermazi", "authorids": "~Soumyadeep_Hore1;~Ankit_Shah4;~Nathaniel_D._Bastian1;~Jalal_Ghadermazi1", "gender": "M;;M;M", "homepage": ";https://ankitshah.co;https://cyber.army.mil/About-Us/ACI-Research-Team/Bastian/;", "dblp": ";;132/5837.html;", "google_scholar": "ZaPw_kUAAAAJ;;M2aMMxQAAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;0000-0001-9957-2778;", "linkedin": ";;nathanielbastian/;", "or_profile": "~Soumyadeep_Hore1;~Ankit_Shah4;~Nathaniel_D._Bastian1;~Jalal_Ghadermazi1", "aff": "University of South Florida;;United States Military Academy;", "aff_domain": "usf.edu;;westpoint.edu;", "position": "PhD student;;Principal Researcher;", "bibtex": "@misc{\nhore2024deep,\ntitle={Deep Res{NIDS}: A Multistage {AI} Framework for Novelty Detection in Network Traffic},\nauthor={Soumyadeep Hore and Ankit Shah and Nathaniel D. Bastian and Jalal Ghadermazi},\nyear={2024},\nurl={https://openreview.net/forum?id=2DJMtdfgfH}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=2DJMtdfgfH", "pdf_size": 613704, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1H1M_o7xzsUJ:scholar.google.com/&scioq=Deep+ResNIDS:+A+Multistage+AI+Framework+for+Novelty+Detection+in+Network+Traffic&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of South Florida;United States Military Academy", "aff_unique_dep": ";", "aff_unique_url": "https://www.usf.edu;https://www.usma.edu/", "aff_unique_abbr": "USF;USMA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "2DJUXmHZ2O", "title": "Generalizing Poincar\u00e9 Policy Representations in Multi-agent Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning policy representations is essential for comprehending the intricacies of agent interactions and their decision-making processes.\nRecent studies have found that the evolution of any state under Markov decision processes (MDPs) can be divided into multiple hierarchies based on time sequences. This conceptualization resembles a tree-growing process, where the policy and environment dynamics determine the possible branches. In this paper, the multiple agent's trajectory growing paths can be projected into a Poincar\u00e9 ball, which requires the tree to grow from the origin to the boundary of the ball, deriving a new geometric idea of learning Poincar\u00e9 Policy Representations (P2R) for MARL.\nSpecifically, P2R captures the policy representation of the Poincar\u00e9 ball by a hyperbolic neural network and introduces a contrast objective function that encourages embeddings of the same policy to move closer together while embeddings of different policies to move apart, which enables embed policies with low distortion.\nExperimental results provide empirical evidence for the effectiveness of the P2R framework in cooperative and competitive games, demonstrating the potential of Poincar\u00e9 policy representations for optimizing policies in complex multi-agent environments.", "keywords": "policy representation;reinforcement learning;multi-agent", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Bohao Qu;Xiaofeng Cao;Zhen Fang;Qing Guo;Yi Chang", "authorids": "~Bohao_Qu1;~Xiaofeng_Cao2;~Zhen_Fang2;~Qing_Guo3;~Yi_Chang4", "gender": "M;M;M;M;M", "homepage": "https://ieeexplore.ieee.org/author/37088517338;https://fang-zhen.github.io/index.html;https://tsingqguo.github.io;http://www.yichang-cs.com;https://xiaofengcaoml.github.io/", "dblp": "275/7652.html;;25/3038-5;02/5438.html;117/3982-2.html", "google_scholar": "Xr4GORcAAAAJ;OzD6WJcAAAAJ;Rj2x4QUAAAAJ;https://scholar.google.com.hk/citations?user=drEkR50AAAAJ;", "orcid": "0000-0003-3192-8736;0000-0003-0602-6255;0000-0003-0974-9299;0000-0003-2697-8093;", "linkedin": ";;;;", "or_profile": "~Bohao_Qu1;~Zhen_Fang2;~Qing_Guo3;~Yi_Chang4;~Xiaofeng_Cao1", "aff": "Jilin University;University of Technology Sydney; Agency for Science, Technology and Research (A*STAR));Jilin University, China;Jilin University", "aff_domain": "jlu.edu.cn;uts.edu.au;cfar.a-star.edu.sg;jlu.edu.cn;jlu.edu.cn", "position": "PhD student;Assistant Professor;Researcher;Full Professor;Associate Professor", "bibtex": "@misc{\nqu2024generalizing,\ntitle={Generalizing Poincar\\'e Policy Representations in Multi-agent Reinforcement Learning},\nauthor={Bohao Qu and Xiaofeng Cao and Zhen Fang and Qing Guo and Yi Chang},\nyear={2024},\nurl={https://openreview.net/forum?id=2DJUXmHZ2O}\n}", "github": "", "project": "", "reviewers": "VeDg;iHns;MnTe;SaBP", "site": "https://openreview.net/forum?id=2DJUXmHZ2O", "pdf_size": 4202293, "rating": "3;5;5;6", "confidence": "2;2;4;3", "soundness": "2;3;2;3", "contribution": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "81;50;94;67", "wc_strengths": "30;40;43;21", "wc_weaknesses": "55;78;195;38", "wc_questions": "502;34;58;19", "wc_review": "668;202;390;145", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 16.355427233796124 ], "wc_strengths_avg": [ 33.5, 8.674675786448736 ], "wc_weaknesses_avg": [ 91.5, 61.41864537744218 ], "wc_questions_avg": [ 153.25, 201.8308388230104 ], "wc_review_avg": [ 351.25, 204.11194844986414 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.48420012470625223, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UIGmecXqHeMJ:scholar.google.com/&scioq=Generalizing+Poincar%C3%A9+Policy+Representations+in+Multi-agent+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Jilin University;University of Technology Sydney;Agency for Science, Technology and Research", "aff_unique_dep": ";;", "aff_unique_url": "http://www.jlu.edu.cn;https://www.uts.edu.au;https://www.a-star.edu.sg", "aff_unique_abbr": "JLU;UTS;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "China;Australia;Singapore" }, { "title": "Neural Spectral Methods: Self-supervised learning in the spectral domain", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19557", "id": "2DbVeuoa6a", "author_site": "Yiheng Du, Nithin Chalapathi, Aditi Krishnapriyan", "tldr": "", "abstract": "We present Neural Spectral Methods, a technique to solve parametric Partial Differential Equations (PDEs), grounded in classical spectral methods. Our method uses orthogonal bases to learn PDE solutions as mappings between spectral coefficients, instantiating a spectral-based neural operator. In contrast to current machine learning approaches which enforce PDE constraints by minimizing the numerical quadrature of the residuals in the spatiotemporal domain, we leverage Parseval's identity and introduce a new training strategy through a spectral loss. Our spectral loss enables more efficient differentiation through the neural network, and substantially reduces training complexity. At inference time, the computational cost of our method remains constant, regardless of the spatiotemporal resolution of the domain. Our experimental results demonstrate that our method significantly outperforms previous machine learning approaches in terms of speed and accuracy by one to two orders of magnitude on multiple different problems, including reaction-diffusion, and forced and unforced Navier-Stokes equations. When compared to numerical solvers of the same accuracy, our method demonstrates a $10\\times$ increase in performance speed. Our source code is publicly available at https://github.com/ASK-Berkeley/Neural-Spectral-Methods.", "keywords": "Machine learning for PDEs;spectral methods;neural network differentiation;spectral loss;PDEs;neural operators", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Yiheng Du;Nithin Chalapathi;Aditi S. Krishnapriyan", "authorids": "~Yiheng_Du1;~Nithin_Chalapathi1;~Aditi_S._Krishnapriyan1", "gender": "M;M;", "homepage": "https://github.com/mrlazy1708;https://github.com/nithinvc/;https://a1k12.github.io", "dblp": ";255/5081;256/5472", "google_scholar": ";;7HoFN1wAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yiheng_Du1;~Nithin_Chalapathi1;~Aditi_Krishnapriyan1", "aff": "Peking University;University of California, Berkeley;University of California, Berkeley", "aff_domain": "pku.edu.cn;berkeley.edu;berkeley.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndu2024neural,\ntitle={Neural Spectral Methods: Self-supervised learning in the spectral domain},\nauthor={Yiheng Du and Nithin Chalapathi and Aditi S. Krishnapriyan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2DbVeuoa6a}\n}", "github": "", "project": "", "reviewers": "KbDK;4uLV;dP1d;9377", "pdf_size": 2056367, "rating": "3;8;8;8", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "97;69;66;109", "wc_strengths": "82;48;148;67", "wc_weaknesses": "330;104;43;42", "wc_questions": "189;215;228;71", "wc_review": "698;436;485;289", "wc_reply_reviewers": "359;20;13;197", "wc_reply_authors": "1788;559;739;703", "reply_reviewers": "1;1;1;2", "reply_authors": "3;2;2;3", "rating_avg": [ 6.75, 2.165063509461097 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.25, 18.280795934531955 ], "wc_strengths_avg": [ 86.25, 37.632266740126084 ], "wc_weaknesses_avg": [ 129.75, 118.30971008332325 ], "wc_questions_avg": [ 175.75, 62.086129690938215 ], "wc_review_avg": [ 477.0, 146.56909633343585 ], "wc_reply_reviewers_avg": [ 147.25, 142.76619873065192 ], "wc_reply_authors_avg": [ 947.25, 490.0573308297714 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12743670651235522506&as_sdt=5,30&sciodt=0,30&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=2DbVeuoa6a", "pdf": "https://openreview.net/pdf?id=2DbVeuoa6a", "email": "pku.edu.cn;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Peking University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.berkeley.edu", "aff_unique_abbr": "Peking U;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "2DldCIjAdX", "title": "LayerNAS: Neural Architecture Search in Polynomial Complexity", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural Architecture Search (NAS) has become a popular method for discovering effective model architectures, especially for target hardware. As such, NAS methods that find optimal architectures under constraints are essential. In our paper, we propose LayerNAS to address the challenge of multi-objective NAS by transforming it into a combinatorial optimization problem, which effectively constrains the search complexity to be polynomial. LayerNAS rigorously derives its method from the fundamental assumption that modifications to previous layers have no impact on the subsequent layers. When dealing with search spaces containing $L$ layers that meet this requirement, the method performs layerwise-search for each layer, selecting from a set of search options $\\mathbb{S}$. LayerNAS groups model candidates based on one objective, such as model size or latency, and searches for the optimal model based on another objective, thereby splitting the cost and reward elements of the search. This approach limits the search complexity to $ O(H \\cdot |\\mathbb{S}| \\cdot L) $, where $H$ is a constant set in LayerNAS. Our experiments show that LayerNAS is able to consistently discover superior models across a variety of search spaces in comparison to strong baselines, including search spaces derived from NATS-Bench, MobileNetV2 and MobileNetV3.", "keywords": "AutoML;Neural Architecture Search;Model Optimization", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/33d4417722fdee95d0620788259a596dca14567e.zip", "author": "Yicheng Fan;Dana Alon;JINGYUE SHEN;Daiyi Peng;Keshav Kumar;Xinyu Feng;Yun Long;Xin Wang;Fotis Iliopoulos;Da-Cheng Juan;Erik Vee", "authorids": "~Yicheng_Fan1;~Dana_Alon1;~JINGYUE_SHEN1;~Daiyi_Peng1;~Keshav_Kumar1;~Xinyu_Feng3;~Yun_Long1;~Xin_Wang30;~Fotis_Iliopoulos1;~Da-Cheng_Juan1;~Erik_Vee1", "gender": ";;;M;M;;M;M;M;;", "homepage": ";;https://www.linkedin.com/in/jingyue-brian-shen/;http://www.daiyip.org;https://keshv.in;;https://scholar.google.com/citations?user=DNHsO0gAAAAJ&hl=en&oi=sra;;http://www.filiop.org/;;", "dblp": "289/0992;136/8637;229/7228;;;;;;147/4790;47/1564;", "google_scholar": "LTZjlnwAAAAJ;0WEF4fkAAAAJ;wWAFw8UAAAAJ;_8Egwg8AAAAJ;;;DNHsO0gAAAAJ;7BjA8ccAAAAJ;v3e5F-AAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;;;;;;", "linkedin": "https://linkedin.com/in/yicheng-fan-b4b79124;dana-alon;;;keshav-kr;;;;;;", "or_profile": "~Yicheng_Fan1;~Dana_Alon1;~JINGYUE_SHEN1;~Daiyi_Peng1;~Keshav_Kumar1;~Xinyu_Feng3;~Yun_Long1;~Xin_Wang30;~Fotis_Iliopoulos1;~Da-Cheng_Juan1;~Erik_Vee1", "aff": "Google;Research, Google;Google;;;;;Google;Google;Google Research;", "aff_domain": "google.com;research.google.com;google.com;;;;;google.com;google.com;google.com;", "position": "Software Engineer;Researcher;Software Engineer;;;;;Software Engineer;Researcher;Senior Software Engineer;", "bibtex": "@misc{\nfan2024layernas,\ntitle={Layer{NAS}: Neural Architecture Search in Polynomial Complexity},\nauthor={Yicheng Fan and Dana Alon and JINGYUE SHEN and Daiyi Peng and Keshav Kumar and Xinyu Feng and Yun Long and Xin Wang and Fotis Iliopoulos and Da-Cheng Juan and Erik Vee},\nyear={2024},\nurl={https://openreview.net/forum?id=2DldCIjAdX}\n}", "github": "", "project": "", "reviewers": "4co7;sD5g;nAfG;xmki", "site": "https://openreview.net/forum?id=2DldCIjAdX", "pdf_size": 3857560, "rating": "5;5;5;8", "confidence": "5;3;4;4", "soundness": "2;2;2;4", "contribution": "2;2;2;3", "presentation": "2;2;2;3", "wc_summary": "99;17;24;50", "wc_strengths": "78;28;22;77", "wc_weaknesses": "233;350;144;16", "wc_questions": "47;35;25;1", "wc_review": "457;430;215;144", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1213;1234;1310;10", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 47.5, 32.17530108639234 ], "wc_strengths_avg": [ 51.25, 26.337947907914163 ], "wc_weaknesses_avg": [ 185.75, 122.23824074323059 ], "wc_questions_avg": [ 27.0, 16.911534525287763 ], "wc_review_avg": [ 311.5, 134.70430579606577 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 941.75, 539.1550681390281 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14910580986110710568&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2EamGPuWSc", "title": "A Shot-Efficient Differential Equation Integrator using Quantum Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Physics-informed regularisation on quantum neural networks provides a promising means for solving differential equations on near-term quantum computers.\nHowever, most demonstrations of this technique assume idealised simulated quantum circuits where the respective expectations are available.\nIn real quantum hardware, such ideal expectations are not accessible and must be averaged over many shots, introducing additional computations, the cost of which has not been considered in the majority of the preceding studies.\nThe requirements of higher-order derivatives for physics-informed regularisers are especially high in terms of circuit repetitions (shots) compared to lower-order derivatives required for supervised learning.\nWe demonstrate how to construct a global formulation of physics-informed losses especially amenable to solve ordinary differential equations on near-term quantum computers in a shot-efficient manner.\nThe resulting approach can reduce the order of derivatives required to calculate a loss compared to Physics-informed Neural Networks (PINNs). \nIn the case of initial value problems in ordinary differential equations (ODEs) and some partial differential equations (PDEs), our method removes completely the need for higher-order automatic differentiation,\nthus providing an $\\mathcal{O}(N)$ improvement in shot-efficiency, where $N$ is the number of data-encodings of the quantum neural network.\nOur formulation naturally incorporates boundary conditions and physics-informed losses into a single optimisation term.\nNumerical experiments demonstrate favourable empirical performance, in terms of both shot-efficiency and error, on (simulated) quantum circuits compared to existing quantum methodologies.\nWe demonstrate that the relative performance of quantum neural network algorithms in the infinite shot limit does not necessarily correspond to relative performance in the finite shot limit.\nWe hope this works provides insights on how to efficiently design schemes that will reduce the shot requirements and will become the basis for further developing efficient quantum algorithms for the solution of differential equations.", "keywords": "Variational Quantum Algorithms;Physics-Informed Machine Learning;Quantum Computing", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "", "author": "Atiyo Ghosh;Gergana V. Velikova;Panagiotis Barkoutsos;Vincent Emanuel Elfving", "authorids": "~Atiyo_Ghosh1;~Gergana_V._Velikova1;~Panagiotis_Barkoutsos1;~Vincent_Emanuel_Elfving1", "gender": ";F;M;", "homepage": ";;https://pasqal.com;", "dblp": ";;183/4846;", "google_scholar": "https://scholar.google.co.uk/citations?user=MTGN-gwAAAAJ;txNXEWEAAAAJ;SVcvKVYAAAAJ;", "orcid": "0000-0003-1606-8520;0000-0001-5910-8604;0000-0001-9428-913X;", "linkedin": "atiyo-ghosh-211255b5/;;pbarkoutsos/;", "or_profile": "~Atiyo_Ghosh1;~Gergana_V._Velikova1;~Panagiotis_Barkoutsos1;~Vincent_Emanuel_Elfving1", "aff": "Pasqal;PASQAL;PASQAL SAS;", "aff_domain": "pasqal.com;pasqal.com;pasqal.com;", "position": "Researcher;Researcher;VP Quantum Algorithms;", "bibtex": "@misc{\nghosh2024a,\ntitle={A Shot-Efficient Differential Equation Integrator using Quantum Neural Networks},\nauthor={Atiyo Ghosh and Gergana V. Velikova and Panagiotis Barkoutsos and Vincent Emanuel Elfving},\nyear={2024},\nurl={https://openreview.net/forum?id=2EamGPuWSc}\n}", "github": "", "project": "", "reviewers": "Nx77;bf4n;k8bX;1rNZ", "site": "https://openreview.net/forum?id=2EamGPuWSc", "pdf_size": 4362113, "rating": "3;3;3;5", "confidence": "3;4;4;3", "soundness": "2;2;3;3", "contribution": "2;2;2;2", "presentation": "2;2;2;2", "wc_summary": "53;52;208;86", "wc_strengths": "78;30;109;60", "wc_weaknesses": "167;197;242;194", "wc_questions": "4;35;145;3", "wc_review": "302;314;704;343", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 99.75, 63.978023570598054 ], "wc_strengths_avg": [ 69.25, 28.647643882176418 ], "wc_weaknesses_avg": [ 200.0, 26.91653766738954 ], "wc_questions_avg": [ 46.75, 58.16517428840044 ], "wc_review_avg": [ 415.75, 167.08736487239244 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xM1WQPST50IJ:scholar.google.com/&scioq=A+Shot-Efficient+Differential+Equation+Integrator+using+Quantum+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Pasqal;PASQAL SAS", "aff_unique_dep": ";", "aff_unique_url": "https://www.pasqal.com;", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "id": "2Ed7b52z53", "title": "On the Matrix Form of the Quaternion Fourier Transform and Quaternion Convolution", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study matrix forms of quaternionic versions of the Fourier Transform and Convolution operations. Quaternions offer a powerful representation unit, however they are related to difficulties in their use that stem foremost from non-commutativity of quaternion multiplication, \nand due to that $\\mu^2 = -1$ posseses infinite solutions in the quaternion domain. Handling of quaternionic matrices is consequently complicated in several aspects (definition of eigenstructure, determinant, etc.). Our research findings clarify the relation of the Quaternion Fourier Transform matrix to the standard (complex) Discrete Fourier Transform matrix, and the extend on which well-known complex-domain theorems extend to quaternions. We focus especially on the relation of Quaternion Fourier Transform matrices to Quaternion Circulant matrices (representing quaternionic convolution), and the eigenstructure of the latter. A proof-of-concept application that makes direct use of our theoretical results is presented, where we present a method to bound the Lipschitz constant of a Quaternionic Convolutional Neural Network.", "keywords": "Quaternions;Fourier Transform;Convolution;Circulant Matrix", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Giorgos Sfikas;George Retsinas", "authorids": "~Giorgos_Sfikas1;~George_Retsinas2", "gender": "M;M", "homepage": "http://www.cs.uoi.gr/~sfikas;http://users.iit.demokritos.gr/~georgeretsi/", "dblp": "01/747;171/5669", "google_scholar": "X73G9lYAAAAJ;https://scholar.google.gr/", "orcid": "0000-0002-7305-2886;", "linkedin": "giorgos-sfikas-15a30484/;george-retsinas-9b073b88/", "or_profile": "~Giorgos_Sfikas1;~George_Retsinas2", "aff": "University of West Attica;National Technical University of Athens", "aff_domain": "uniwa.gr;ntua.gr", "position": "Assistant Professor;Postdoc", "bibtex": "@misc{\nsfikas2024on,\ntitle={On the Matrix Form of the Quaternion Fourier Transform and Quaternion Convolution},\nauthor={Giorgos Sfikas and George Retsinas},\nyear={2024},\nurl={https://openreview.net/forum?id=2Ed7b52z53}\n}", "github": "", "project": "", "reviewers": "ZfTZ;JYL4;YJPK", "site": "https://openreview.net/forum?id=2Ed7b52z53", "pdf_size": 1346330, "rating": "1;5;5", "confidence": "4;3;3", "soundness": "1;4;3", "contribution": "1;1;2", "presentation": "1;3;3", "wc_summary": "44;67;60", "wc_strengths": "15;69;64", "wc_weaknesses": "318;163;215", "wc_questions": "1;143;6", "wc_review": "378;442;345", "wc_reply_reviewers": "41;0;0", "wc_reply_authors": "350;2785;1858", "reply_reviewers": "1;0;0", "reply_authors": "3;7;5", "rating_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 1.247219128924647 ], "contribution_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 57.0, 9.626352718795768 ], "wc_strengths_avg": [ 49.333333333333336, 24.36299561949547 ], "wc_weaknesses_avg": [ 232.0, 64.41014412859722 ], "wc_questions_avg": [ 50.0, 65.79260343432738 ], "wc_review_avg": [ 388.3333333333333, 40.2685430026411 ], "wc_reply_reviewers_avg": [ 13.666666666666666, 19.3275853524323 ], "wc_reply_authors_avg": [ 1664.3333333333333, 1003.4727477891742 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.0, 1.632993161855452 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OH0SXWewsXYJ:scholar.google.com/&scioq=On+the+Matrix+Form+of+the+Quaternion+Fourier+Transform+and+Quaternion+Convolution&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of West Attica;National Technical University of Athens", "aff_unique_dep": ";", "aff_unique_url": "https://www.uoa.gr;https://www.ntua.gr", "aff_unique_abbr": ";NTUA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Greece" }, { "id": "2FAPahXyVh", "title": "OptiMUS: Optimization Modeling Using mip Solvers and large language models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Optimization problems are pervasive across various sectors, from manufacturing and distribution to healthcare. However, most such problems are still solved heuristically by hand rather than optimally by state-of-the-art solvers, as the expertise required to formulate and solve these problems limits the widespread adoption of optimization tools and techniques. We introduce OptiMUS, a Large Language Model (LLM)-based agent designed to formulate and solve MLIP problems from their natural language descriptions. OptiMUS is capable of developing mathematical models, writing and debugging solver code, developing tests, and checking the validity of generated solutions. To benchmark our agent, we present NLP4LP, a novel dataset of linear programming (LP) and mixed integer linear programming (MILP) problems. Our experiments demonstrate that OptiMUS is able to solve 67\\% more problems compared to a basic LLM prompting strategy. The code OptiMUS and the data for NLP4LP are available at \\href{https://anonymous.4open.science/r/nlp4lp-8F62/README.md}{https://anonymous.4open.science/r/nlp4lp-8F62/README.md}", "keywords": "LLM;AI;Optimization modeling;optimization solvers;mathematical formulation;autonomous agents", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/30597840440b73aabdda6a4c833ef8b7673c6530.pdf", "author": "Ali AhmadiTeshnizi;Wenzhi Gao;Madeleine Udell", "authorids": "~Ali_AhmadiTeshnizi1;~Wenzhi_Gao1;~Madeleine_Udell1", "gender": ";M;F", "homepage": "https://teshnizi.github.io/;https://github.com/Gwzwpxz;https://people.orie.cornell.edu/mru8", "dblp": ";;153/2166", "google_scholar": "475ARYgAAAAJ;4lDkX_YAAAAJ;tZ9pEDMAAAAJ", "orcid": ";;0000-0002-3985-915X", "linkedin": "teshnizi/;;", "or_profile": "~Ali_AhmadiTeshnizi1;~Wenzhi_Gao1;~Madeleine_Udell1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nahmaditeshnizi2024optimus,\ntitle={Opti{MUS}: Optimization Modeling Using mip Solvers and large language models},\nauthor={Ali AhmadiTeshnizi and Wenzhi Gao and Madeleine Udell},\nyear={2024},\nurl={https://openreview.net/forum?id=2FAPahXyVh}\n}", "github": "", "project": "", "reviewers": "8JDZ;GUNL;QHcC;Bafe", "site": "https://openreview.net/forum?id=2FAPahXyVh", "pdf_size": 1434421, "rating": "3;5;5;6", "confidence": "4;3;3;3", "soundness": "1;2;2;3", "contribution": "2;3;3;3", "presentation": "1;2;3;3", "wc_summary": "255;58;36;30", "wc_strengths": "47;61;35;14", "wc_weaknesses": "177;131;26;83", "wc_questions": "58;343;181;12", "wc_review": "537;593;278;139", "wc_reply_reviewers": "367;0;80;119", "wc_reply_authors": "992;624;337;887", "reply_reviewers": "3;0;1;3", "reply_authors": "4;1;2;4", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 94.75, 93.10578660856693 ], "wc_strengths_avg": [ 39.25, 17.239127008059313 ], "wc_weaknesses_avg": [ 104.25, 56.08642883978262 ], "wc_questions_avg": [ 148.5, 128.16883396520387 ], "wc_review_avg": [ 386.75, 185.95748842141313 ], "wc_reply_reviewers_avg": [ 141.5, 137.07753280534342 ], "wc_reply_authors_avg": [ 710.0, 253.66217692040726 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15204204105868547127&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "2GJm8yT2jN", "title": "URLOST: Unsupervised Representation Learning without Stationarity or Topology", "track": "main", "status": "Reject", "tldr": "", "abstract": "Unsupervised representation learning has seen tremendous progress but is constrained by its reliance on data modality specific stationarity and topology, a limitation not found in biological intelligence systems. For instance, human vision processes visual signals derived from irregular and non-stationary sampling lattices yet accurately perceives the geometry of the world. We introduce a novel framework that learns from high-dimensional data lacking stationarity and topology. Our model combines spectral clustering, and masked autoencoders and a learnable self-organizing layer. We evaluate its effectiveness on simulated biological vision data, neural recordings from the primary visual cortex, and gene expression datasets. Compared to state-of-the-art unsupervised learning methods like SimCLR and MAE, our model excels at learning meaningful representations across diverse modalities without depending on stationarity or topology. It also outperforms other methods not dependent on these factors, setting a new benchmark in the field. This work represents a step toward unsupervised learning methods that can generalize across diverse high dimensional data modalities.", "keywords": "Unsupervised learning;Self-supervised learning;Deep learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Zeyu Yun;Juexiao Zhang;Bruno Olshausen;Yann LeCun;Yubei Chen", "authorids": "~Zeyu_Yun1;~Juexiao_Zhang1;~Bruno_Olshausen1;~Yann_LeCun1;~Yubei_Chen1", "gender": "M;M;M;M;M", "homepage": "https://zeyuyun1.github.io/;https://juexzz.github.io/;http://redwood.berkeley.edu/bruno/;http://yann.lecun.com;https://redwood.berkeley.edu/people/yubei-chen/", "dblp": "289/2186;250/9589;30/3869;l/YannLeCun;30/10064", "google_scholar": ";TYxPbcEAAAAJ;4aqK_74AAAAJ;WLN3QrAAAAAJ;WeyLqFUAAAAJ", "orcid": ";;;;", "linkedin": ";juexiao-zhang-788453146/Juexiao-Zhang;;;yubei-chen-05998a39/", "or_profile": "~Zeyu_Yun1;~Juexiao_Zhang1;~Bruno_Olshausen1;~Yann_LeCun1;~Yubei_Chen1", "aff": "University of California, Berkeley;New York University;UC Berkeley;New York University;University of California, Davis", "aff_domain": "berkeley.edu;nyu.edu;;nyu.edu;ucdavis.edu", "position": "MS student;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nyun2024urlost,\ntitle={{URLOST}: Unsupervised Representation Learning without Stationarity or Topology},\nauthor={Zeyu Yun and Juexiao Zhang and Bruno Olshausen and Yann LeCun and Yubei Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=2GJm8yT2jN}\n}", "github": "", "project": "", "reviewers": "JGWe;qyGv;EzDe", "site": "https://openreview.net/forum?id=2GJm8yT2jN", "pdf_size": 7983547, "rating": "5;6;6", "confidence": "1;3;4", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "2;4;3", "wc_summary": "28;95;43", "wc_strengths": "30;111;66", "wc_weaknesses": "96;126;163", "wc_questions": "2;5;42", "wc_review": "156;337;314", "wc_reply_reviewers": "0;31;0", "wc_reply_authors": "1255;1208;1614", "reply_reviewers": "0;1;0", "reply_authors": "3;2;4", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 55.333333333333336, 28.709270666845967 ], "wc_strengths_avg": [ 69.0, 33.13608305156178 ], "wc_weaknesses_avg": [ 128.33333333333334, 27.402351886086148 ], "wc_questions_avg": [ 16.333333333333332, 18.190351532856337 ], "wc_review_avg": [ 269.0, 80.45288476286396 ], "wc_reply_reviewers_avg": [ 10.333333333333334, 14.613540144521982 ], "wc_reply_authors_avg": [ 1359.0, 181.33026958196103 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5809278398021347415&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "University of California, Berkeley;New York University;University of California, Davis", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.nyu.edu;https://www.ucdavis.edu", "aff_unique_abbr": "UC Berkeley;NYU;UC Davis", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Berkeley;;Davis", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2GMTfqr7eb", "title": "Retro: Reusing teacher projection head for efficient embedding distillation on Lightweight Models via Self-supervised Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Self-supervised learning (SSL) is gaining attention for its ability to learn effective representations with large amounts of unlabeled data. Lightweight models can be distilled from larger self-supervised pre-trained models using contrastive and consistency constraints, but the different sizes of the projection heads make it challenging for students to accurately mimic the teacher's embedding. We propose \\textsc{Retro}, which reuses the teacher's projection head for students, and our experimental results demonstrate significant improvements over the state-of-the-art on all lightweight models. For instance, when training EfficientNet-B0 using ResNet-50/101/152 as teachers, our approach improves the linear result on ImageNet to $66.9%$, $69.3%$, and $69.8%$, respectively, with significantly fewer parameters.", "keywords": "Self-supervised learning;knowledge distillation;lightweight models;contrastive learning;consistency learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/df05b343cde141bc250174f0cd4bd513db9ad5b9.pdf", "author": "Khanh-Binh Nguyen", "authorids": "~Khanh-Binh_Nguyen1", "gender": "M", "homepage": "", "dblp": "325/4115", "google_scholar": "LoOglv4AAAAJ", "orcid": "0000-0002-9948-1400", "linkedin": "", "or_profile": "~Khanh-Binh_Nguyen1", "aff": "National Cancer Center", "aff_domain": "ncc.re.kr", "position": "Researcher", "bibtex": "@misc{\nnguyen2024retro,\ntitle={Retro: Reusing teacher projection head for efficient embedding distillation on Lightweight Models via Self-supervised Learning},\nauthor={Khanh-Binh Nguyen},\nyear={2024},\nurl={https://openreview.net/forum?id=2GMTfqr7eb}\n}", "github": "", "project": "", "reviewers": "dnj5;96a3;qqvw", "site": "https://openreview.net/forum?id=2GMTfqr7eb", "pdf_size": 472968, "rating": "3;5;5", "confidence": "5;4;4", "soundness": "3;2;3", "contribution": "2;2;2", "presentation": "2;3;3", "wc_summary": "55;53;51", "wc_strengths": "27;80;35", "wc_weaknesses": "124;208;131", "wc_questions": "1;31;43", "wc_review": "207;372;260", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 53.0, 1.632993161855452 ], "wc_strengths_avg": [ 47.333333333333336, 23.328570942563587 ], "wc_weaknesses_avg": [ 154.33333333333334, 38.055515004033545 ], "wc_questions_avg": [ 25.0, 17.663521732655695 ], "wc_review_avg": [ 279.6666666666667, 68.78145744958367 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tz9qD1zgPBkJ:scholar.google.com/&scioq=Retro:+Reusing+teacher+projection+head+for+efficient+embedding+distillation+on+Lightweight+Models+via+Self-supervised+Learning&hl=en&as_sdt=0,23", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "National Cancer Center", "aff_unique_dep": "", "aff_unique_url": "https://www.ncc.re.kr", "aff_unique_abbr": "NCC", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "What does the Knowledge Neuron Thesis Have to do with Knowledge?", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19556", "id": "2HJRwwbV3G", "author_site": "Jingcheng Niu, Andrew Liu, Zining Zhu, Gerald Penn", "tldr": "", "abstract": "We reassess the Knowledge Neuron (KN) Thesis: an interpretation of the mechanism underlying the ability of large language models to recall facts from a training corpus. This nascent thesis proposes that facts are recalled from the training corpus through the MLP weights in a manner resembling key-value memory, implying in effect that \"knowledge\" is stored in the network. Furthermore, by modifying the MLP modules, one can control the language model's generation of factual information. The plausibility of the KN thesis has been demonstrated by the success of KN-inspired model editing methods (Dai et al., 2022; Meng et al., 2022).\n\nWe find that this thesis is, at best, an oversimplification. Not only have we found that we can edit the expression of certain linguistic phenomena using the same model editing methods but, through a more comprehensive evaluation, we have found that the KN thesis does not adequately explain the process of factual expression. While it is possible to argue that the MLP weights store complex patterns that are interpretable both syntactically and semantically, these patterns do not constitute \"knowledge.\" To gain a more comprehensive understanding of the knowledge representation process, we must look beyond the MLP weights and explore recent models' complex layer structures and attention mechanisms.", "keywords": "language model;knowledge neuron;model editing;formal and function competence;syntax;fact", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Jingcheng Niu;Andrew Liu;Zining Zhu;Gerald Penn", "authorids": "~Jingcheng_Niu1;~Andrew_Liu6;~Zining_Zhu1;~Gerald_Penn1", "gender": "M;M;;M", "homepage": "http://www.cs.toronto.edu/~niu/;https://www.cs.toronto.edu/~aliu/;http://ziningzhu.github.io;http://www.cs.toronto.edu/~gpenn/", "dblp": "245/8596.html;;188/5709;37/1531", "google_scholar": "XQuH0EEAAAAJ;;https://scholar.google.ca/citations?user=Xr_hCJMAAAAJ;ZnKtf4YAAAAJ", "orcid": ";;;0000-0003-3553-8305", "linkedin": ";andrew-liu438/;zining-zhu/;gerald-penn-1391bb3/", "or_profile": "~Jingcheng_Niu1;~Andrew_Liu6;~Zining_Zhu1;~Gerald_Penn1", "aff": "University of Toronto;University of Waterloo;University of Toronto;Department of Computer Science, University of Toronto", "aff_domain": "cs.toronto.edu;uwaterloo.ca;toronto.edu;cs.toronto.edu", "position": "PhD student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nniu2024what,\ntitle={What does the Knowledge Neuron Thesis Have to do with Knowledge?},\nauthor={Jingcheng Niu and Andrew Liu and Zining Zhu and Gerald Penn},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2HJRwwbV3G}\n}", "github": "", "project": "", "reviewers": "ZPK4;7T91;c8FP", "pdf_size": 1172918, "rating": "6;8;8", "confidence": "3;3;4", "soundness": "3;4;3", "contribution": "2;4;4", "presentation": "3;2;3", "wc_summary": "114;138;46", "wc_strengths": "81;81;42", "wc_weaknesses": "179;100;23", "wc_questions": "321;141;1", "wc_review": "695;460;112", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "792;248;33", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 99.33333333333333, 38.96437118987322 ], "wc_strengths_avg": [ 68.0, 18.384776310850235 ], "wc_weaknesses_avg": [ 100.66666666666667, 63.68847793928053 ], "wc_questions_avg": [ 154.33333333333334, 130.97921802925666 ], "wc_review_avg": [ 422.3333333333333, 239.49437478896152 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 357.6666666666667, 319.4164818679351 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5330732570030088119&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=2HJRwwbV3G", "pdf": "https://openreview.net/pdf?id=2HJRwwbV3G", "email": "cs.toronto.edu;uwaterloo.ca;toronto.edu;cs.toronto.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Toronto;University of Waterloo", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://uwaterloo.ca", "aff_unique_abbr": "U of T;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "id": "2J25Vi9W8I", "title": "RegCLIP: A Label-Efficient Coarse-to-Fine Learner for Ordinal Regression", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Ordinal regression is a fundamental problem within the field of computer vision. While pre-trained vision-language models have exhibited impressive performance on various vision tasks, their potential for ordinal regression has received less exploration.\nIn this paper, we introduce a novel method called RegCLIP, a label-efficient coarse-to-fine method for ordinal regression. This approach incorporates language prior information to gradually refine predictions and achieve fine-grained results.\nOur RegCLIP framework encompasses two levels of coarse-to-fine concepts. The first level is a stagewise approach, performing intermediate classification initially and then refining the predictions. The second level is to generate coarse semantic labels as intermediate classes and subsequently refine them into fine-grained labels. To achieve it, we propose a novel coarse semantic label generation via large language models, which generates coarse labels. To further enhance the precision of predictions, we propose a novel fine-grained cross-modal ranking-based loss specifically designed to update fine-grained semantic labels with both semantic and ordinal alignment. Experimental results on three general ordinal regression tasks demonstrate the effectiveness of RegCLIP, exceeding state-of-the-art methods with a large margin, with 10% overall accuracy improvement on historical image dating, 1.74% overall accuracy improvement on image aesthetics assessment, and 1.33 MAE reduction on age estimation under 1-shot setting.", "keywords": "ordinal regression;contrastive learning;representation learning;vision-language", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yao DU;Qiang Zhai;Weihang Dai;Xiaomeng Li", "authorids": "~Yao_DU4;~Qiang_Zhai1;~Weihang_Dai1;~Xiaomeng_Li1", "gender": ";M;M;F", "homepage": ";;;https://xmengli.github.io/", "dblp": ";;152/9822;02/9850-1", "google_scholar": ";3I5VuhUAAAAJ;;uVTzPpoAAAAJ", "orcid": ";;;", "linkedin": ";;weihang-dai-89122120;", "or_profile": "~Yao_DU4;~Qiang_Zhai1;~Weihang_Dai1;~Xiaomeng_Li1", "aff": ";Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": ";ust.hk;ust.hk;ust.hk", "position": ";Postdoc;PhD student;Assistant Professor", "bibtex": "@misc{\ndu2024regclip,\ntitle={Reg{CLIP}: A Label-Efficient Coarse-to-Fine Learner for Ordinal Regression},\nauthor={Yao DU and Qiang Zhai and Weihang Dai and Xiaomeng Li},\nyear={2024},\nurl={https://openreview.net/forum?id=2J25Vi9W8I}\n}", "github": "", "project": "", "reviewers": "BLNi;YX7j;oR5C;i2rY;DJCj", "site": "https://openreview.net/forum?id=2J25Vi9W8I", "pdf_size": 2314841, "rating": "3;3;5;5;8", "confidence": "4;5;5;4;5", "soundness": "3;2;2;3;3", "contribution": "2;2;2;2;3", "presentation": "2;3;3;2;3", "wc_summary": "65;72;83;54;76", "wc_strengths": "69;47;89;89;36", "wc_weaknesses": "85;119;169;77;180", "wc_questions": "124;151;5;13;79", "wc_review": "343;389;346;233;371", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.8, 1.8330302779823362 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 70.0, 9.899494936611665 ], "wc_strengths_avg": [ 66.0, 21.577766334817884 ], "wc_weaknesses_avg": [ 126.0, 42.180564244685016 ], "wc_questions_avg": [ 74.4, 58.1982817615778 ], "wc_review_avg": [ 336.4, 54.38970490819012 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.35634832254989907, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D9tgn8I8_lYJ:scholar.google.com/&scioq=RegCLIP:+A+Label-Efficient+Coarse-to-Fine+Learner+for+Ordinal+Regression&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Lipsum-FT: Robust Fine-Tuning of Zero-Shot Models Using Random Text Guidance", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19555", "id": "2JF8mJRJ7M", "author_site": "Giung Nam, Byeongho Heo, Juho Lee", "tldr": "", "abstract": "Large-scale contrastive vision-language pre-trained models provide the zero-shot model achieving competitive performance across a range of image classification tasks without requiring training on downstream data. Recent works have confirmed that while additional fine-tuning of the zero-shot model on the reference data results in enhanced downstream performance, it compromises the model's robustness against distribution shifts. Our investigation begins by examining the conditions required to achieve the goals of robust fine-tuning, employing descriptions based on feature distortion theory and joint energy-based models. Subsequently, we propose a novel robust fine-tuning algorithm, Lipsum-FT, that effectively utilizes the language modeling aspect of the vision-language pre-trained models. Extensive experiments conducted on distribution shift scenarios in DomainNet and ImageNet confirm the superiority of our proposed Lipsum-FT approach over existing robust fine-tuning methods.", "keywords": "computer vision;vision-langauge model;transfer learning;fine-tuning;distribution shifts", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/f06d7c8550270c7566e73feb061af263396284a3.zip", "author": "Giung Nam;Byeongho Heo;Juho Lee", "authorids": "~Giung_Nam1;~Byeongho_Heo1;~Juho_Lee2", "gender": ";M;M", "homepage": "https://cs-giung.github.io/;https://sites.google.com/view/byeongho-heo/home;https://juho.lee.github.io", "dblp": "304/9008;142/2705;55/3410-1", "google_scholar": "https://scholar.google.co.kr/citations?user=HO-fMd8AAAAJ;https://scholar.google.co.kr/citations?user=4_7rLDIAAAAJ;Py4URJUAAAAJ", "orcid": ";;", "linkedin": ";byeongho-heo-1a7756122/;", "or_profile": "~Giung_Nam1;~Byeongho_Heo1;~Juho_Lee2", "aff": "Korea Advanced Institute of Science & Technology;NAVER AI Lab;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;navercorp.com;kaist.ac.kr", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nnam2024lipsumft,\ntitle={Lipsum-{FT}: Robust Fine-Tuning of Zero-Shot Models Using Random Text Guidance},\nauthor={Giung Nam and Byeongho Heo and Juho Lee},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2JF8mJRJ7M}\n}", "github": "", "project": "", "reviewers": "2e7P;Q7od;26HN;MuzV", "pdf_size": 6394826, "rating": "5;6;6;6", "confidence": "4;5;3;4", "soundness": "2;3;3;3", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "21;63;67;103", "wc_strengths": "38;46;86;97", "wc_weaknesses": "18;170;71;86", "wc_questions": "122;17;119;204", "wc_review": "199;296;343;490", "wc_reply_reviewers": "157;38;227;35", "wc_reply_authors": "1326;740;1665;845", "reply_reviewers": "2;1;2;1", "reply_authors": "3;2;4;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 63.5, 29.06458325866724 ], "wc_strengths_avg": [ 66.75, 25.21284394906691 ], "wc_weaknesses_avg": [ 86.25, 54.554445281754994 ], "wc_questions_avg": [ 115.5, 66.31176366226433 ], "wc_review_avg": [ 332.0, 104.96427963836078 ], "wc_reply_reviewers_avg": [ 114.25, 81.60078124626014 ], "wc_reply_authors_avg": [ 1144.0, 373.22312361374395 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5894155217848335541&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=2JF8mJRJ7M", "pdf": "https://openreview.net/pdf?id=2JF8mJRJ7M", "email": "kaist.ac.kr;navercorp.com;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NAVER Corporation", "aff_unique_dep": ";NAVER AI Lab", "aff_unique_url": "https://www.kaist.ac.kr;https://www.naver.com", "aff_unique_abbr": "KAIST;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "2Kf1AIdeyt", "title": "Balancing Information Preservation and Computational Efficiency: L2 Normalization and Geodesic Distance in Manifold Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Distinguishable metric of similarity plays a fundamental role in unsupervised learning, particularly in manifold learning and high-dimensional data visualization tasks, by which differentiate between observations without labels. However, conventional metrics like Euclidean distance after L1-normalization may fail by losing distinguishable information when handling high-dimensional data, where the distance between different observations gradually converges to a shrinking interval. In this article, we discuss the influence of normalization by different p-norms and the defect of Euclidean distance. We discover that observation differences are better preserved when normalizing data by a higher p-norm and using geodesic distance rather than Euclidean distance as the similarity measurement. We further identify that L2-normalization onto the hypersphere is often sufficient in preserving delicate differences even in relatively high dimensional data while maintaining computational efficiency. Subsequently, we present HS-SNE (HyperSphere-SNE), a hypersphere-representation-system-based augmentation to t-SNE, which effectively addresses the intricacy of high-dimensional data visualization and similarity measurement. Our results show that this hypersphere representation system has improved resolution to identify more subtle differences in high-dimensional data, while balancing information preservation and computational efficiency.", "keywords": "Normalization;Geodesic Distance;Manifold Learning;Bioinformatics", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Ziqi Rong;Jinpu Cai;Jiahao Qiu;Pengcheng Xu;Lana Garmire;Qiuyu Lian;Hongyi Xin", "authorids": "~Ziqi_Rong1;~Jinpu_Cai1;~Jiahao_Qiu1;~Pengcheng_Xu2;~Lana_Garmire1;~Qiuyu_Lian1;~Hongyi_Xin1", "gender": "M;M;M;Not Specified;;F;M", "homepage": "https://zqrong.com;https://carroll105.github.io/;;https://explcre.github.io;https://garmiregroup.org/;https://scholar.google.com/citations?user=sfD9B58AAAAJ&hl=en&oi=ao;http://gift.sjtu.edu.cn/novellab/", "dblp": ";279/6161;;;;;", "google_scholar": "oTH0DJcAAAAJ;Bmsg17YAAAAJ;86dbUg4AAAAJ;;6_k2UGsAAAAJ;sfD9B58AAAAJ;U7vpUGkAAAAJ", "orcid": "0000-0003-3760-8450;0009-0004-8636-3925;0009-0000-7752-4169;0009-0000-9858-9316;0000-0002-4654-2126;0000-0002-5279-1989;0000-0003-2864-7386", "linkedin": ";;jiahao-qiu-6a6161224/;pengcheng-xu-ryan/;;;", "or_profile": "~Ziqi_Rong1;~Jinpu_Cai1;~Jiahao_Qiu1;~Pengcheng_Xu2;~Lana_Garmire1;~Qiuyu_Lian1;~Hongyi_Xin1", "aff": "University of Michigan - Ann Arbor;Shanghai Jiaotong University;Princeton University;University of California, Irvine;University of Michigan - Ann Arbor;University of Cambridge;Shanghai Jiaotong University", "aff_domain": "umich.edu;sjtu.edu.cn;princeton.edu;uci.edu;umich.edu;cam.ac.uk;sjtu.edu.cn", "position": "MS student;PhD student;PhD student;PhD student;Associate Professor;Postdoc;Associate Professor", "bibtex": "@misc{\nrong2024balancing,\ntitle={Balancing Information Preservation and Computational Efficiency: L2 Normalization and Geodesic Distance in Manifold Learning},\nauthor={Ziqi Rong and Jinpu Cai and Jiahao Qiu and Pengcheng Xu and Lana Garmire and Qiuyu Lian and Hongyi Xin},\nyear={2024},\nurl={https://openreview.net/forum?id=2Kf1AIdeyt}\n}", "github": "", "project": "", "reviewers": "mtLk;2wH8;FzWv", "site": "https://openreview.net/forum?id=2Kf1AIdeyt", "pdf_size": 14210904, "rating": "3;3;5", "confidence": "5;5;4", "soundness": "2;2;2", "contribution": "1;2;2", "presentation": "3;2;3", "wc_summary": "85;94;73", "wc_strengths": "40;94;33", "wc_weaknesses": "69;158;42", "wc_questions": "374;158;55", "wc_review": "568;504;203", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 84.0, 8.602325267042627 ], "wc_strengths_avg": [ 55.666666666666664, 27.255988129012838 ], "wc_weaknesses_avg": [ 89.66666666666667, 49.56028876249837 ], "wc_questions_avg": [ 195.66666666666666, 132.92688048530874 ], "wc_review_avg": [ 425.0, 159.1372573179099 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TapYTLI9GGIJ:scholar.google.com/&scioq=Balancing+Information+Preservation+and+Computational+Efficiency:+L2+Normalization+and+Geodesic+Distance+in+Manifold+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0;4;1", "aff_unique_norm": "University of Michigan;Shanghai Jiao Tong University;Princeton University;University of California, Irvine;University of Cambridge", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.umich.edu;https://www.sjtu.edu.cn;https://www.princeton.edu;https://www.uci.edu;https://www.cam.ac.uk", "aff_unique_abbr": "UM;SJTU;Princeton;UCI;Cambridge", "aff_campus_unique_index": "0;2;0;3", "aff_campus_unique": "Ann Arbor;;Irvine;Cambridge", "aff_country_unique_index": "0;1;0;0;0;2;1", "aff_country_unique": "United States;China;United Kingdom" }, { "id": "2LhCPowI6i", "title": "Self-Supervised Pseudodata Filtering for Improved Replay with Sub-Optimal Generators", "track": "main", "status": "Reject", "tldr": "", "abstract": "Continual learning on a sequence of tasks without forgetting previously acquired knowledge is one of the main challenges faced by modern deep neural networks. In the class-incremental scenario, one of the most difficult continual learning problems, new classes are presented to a classifier over time. The model needs to be able to learn and recognize these new classes while also retaining its knowledge of previously witnessed ones. To achieve this, the model has to revisit previous classes in some form, either by analysing stored exemplars or by using artificially generated samples. The latter approach, Generative Replay, usually relies on a separate generator trained alongside the main classifier. Since the generator also needs to learn continually, it is retrained on every task, using its own generated samples as training data representing older classes. This can lead to error propagation and accumulating features unimportant or confusing for the classifier, reducing the overall performance for larger numbers of tasks. We propose a simple filtering mechanism for mitigating this issue \u2013 whenever pseudodata is generated for a new task, the classifier can reject samples it is not able to classify with sufficient confidence, thus preventing itself from retraining on poor-quality data. We tested this mechanism using combinations of Bayesian neural classifiers and two different generators: a Variational Autoencoder and Real-value Non-Volume Preserving Normalizing Flow. We show that the improvement in the classification accuracy grows with the number of tasks, suggesting this approach is particularly useful for the most challenging continual learning scenarios, where very many tasks are learned in a sequence.", "keywords": "continual learning;catastrophic forgetting;generative replay;bayesian neural networks;deep learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Mateusz Wasiluk;Mikkel Elle Lepper\u00f8d;Kai Olav Ellefsen", "authorids": "~Mateusz_Wasiluk1;~Mikkel_Elle_Lepper\u00f8d1;kaiolae@ifi.uio.no", "gender": "M;M;", "homepage": "https://www.mn.uio.no/ibv/english/people/aca/mateuwa/index.html;;", "dblp": ";218/9214;", "google_scholar": ";https://scholar.google.no/citations?user=QSYCR88AAAAJ;", "orcid": ";;", "linkedin": "mwasiluk314/;;", "or_profile": "~Mateusz_Wasiluk1;~Mikkel_Elle_Lepper\u00f8d1;kaiolae@ifi.uio.no", "aff": "University of Oslo;Simula Research Laboratory;", "aff_domain": "uio.no;simula.no;", "position": "PhD student;Principal Researcher;", "bibtex": "@misc{\nwasiluk2024selfsupervised,\ntitle={Self-Supervised Pseudodata Filtering for Improved Replay with Sub-Optimal Generators},\nauthor={Mateusz Wasiluk and Mikkel Elle Lepper{\\o}d and Kai Olav Ellefsen},\nyear={2024},\nurl={https://openreview.net/forum?id=2LhCPowI6i}\n}", "github": "", "project": "", "reviewers": "6px8;yxwt;fB4Z", "site": "https://openreview.net/forum?id=2LhCPowI6i", "pdf_size": 892010, "rating": "1;3;3", "confidence": "5;5;4", "soundness": "2;2;2", "contribution": "1;2;1", "presentation": "3;2;2", "wc_summary": "126;55;62", "wc_strengths": "133;10;41", "wc_weaknesses": "550;147;576", "wc_questions": "36;4;282", "wc_review": "845;216;961", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 81.0, 31.94787421201396 ], "wc_strengths_avg": [ 61.333333333333336, 52.23238671765078 ], "wc_weaknesses_avg": [ 424.3333333333333, 196.3913326894941 ], "wc_questions_avg": [ 107.33333333333333, 124.19697616644655 ], "wc_review_avg": [ 674.0, 327.29904776315294 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hlB2n22EuZIJ:scholar.google.com/&scioq=Self-Supervised+Pseudodata+Filtering+for+Improved+Replay+with+Sub-Optimal+Generators&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oslo;Simula Research Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.uio.no;https://www.simula.no", "aff_unique_abbr": "UiO;Simula", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Norway" }, { "id": "2M4GAkUkjA", "title": "Efficient Link Prediction via GNN Layers Induced by Negative Sampling", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Graph neural networks (GNNs) for link prediction can loosely be divided into two broad categories. First, \\emph{node-wise} architectures pre-compute individual embeddings for each node that are later combined by a simple decoder to make predictions. While extremely efficient at inference time (since node embeddings are only computed once and repeatedly reused), model expressiveness is limited such that isomorphic nodes contributing to candidate edges may not be distinguishable, compromising accuracy. In contrast, \\emph{edge-wise} methods rely on the formation of edge-specific subgraph embeddings to enrich the representation of pair-wise relationships, disambiguating isomorphic nodes to improve accuracy, but with the cost of increased model complexity. To better navigate this trade-off, we propose a novel GNN architecture whereby the \\emph{forward pass} explicitly depends on \\emph{both} positive (as is typical) and negative (unique to our approach) edges to inform more flexible, yet still cheap node-wise embeddings. This is achieved by recasting the embeddings themselves as minimizers of a forward-pass-specific energy function (distinct from the actual training loss) that favors separation of positive and negative samples. As demonstrated by extensive empirical evaluations, the resulting architecture retains the inference speed of node-wise models, while producing competitive accuracy with edge-wise alternatives.", "keywords": "Link prediction;Inference speed", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/de12fedf085aa55ceeacfe7e259d100e08382093.zip", "author": "Yuxin Wang;Xiannian Hu;Quan Gan;Xuanjing Huang;Xipeng Qiu;David Wipf", "authorids": "~Yuxin_Wang3;~Xiannian_Hu1;~Quan_Gan1;~Xuanjing_Huang1;~Xipeng_Qiu1;~David_Wipf1", "gender": "M;M;M;F;M;M", "homepage": ";;;https://xuanjing-huang.github.io/;https://xpqiu.github.io/;http://www.davidwipf.com/", "dblp": "68/1041;;72/3872;05/6735-1;69/1395;81/6421", "google_scholar": "bTo8CT0AAAAJ;;;RGsMgZA4H78C;Pq4Yp_kAAAAJ;YJx1WSgAAAAJ", "orcid": ";;0009-0002-0986-457X;0000-0001-9197-9426;0000-0001-7163-5247;", "linkedin": ";\u5148\u5ff5-\u80e1-6b17781a9/;quan-gan-231992136/;;;", "or_profile": "~Yuxin_Wang3;~Xiannian_Hu1;~Quan_Gan1;~Xuanjing_Huang1;~Xipeng_Qiu1;~David_Wipf1", "aff": "Fudan University;Fudan University;Amazon;Fudan University;Fudan University;Amazon AI Research Lab", "aff_domain": "fudan.edu.cn;fudan.edu.cn;amazon.com;fudan.edu.cn;fudan.edu.cn;amazon.com", "position": "PhD student;MS student;Researcher;Full Professor;Full Professor;Principal Research Scientist", "bibtex": "@misc{\nwang2024efficient,\ntitle={Efficient Link Prediction via {GNN} Layers Induced by Negative Sampling},\nauthor={Yuxin Wang and Xiannian Hu and Quan Gan and Xuanjing Huang and Xipeng Qiu and David Wipf},\nyear={2024},\nurl={https://openreview.net/forum?id=2M4GAkUkjA}\n}", "github": "", "project": "", "reviewers": "TNzh;32eE;EieC;pZx7", "site": "https://openreview.net/forum?id=2M4GAkUkjA", "pdf_size": 920133, "rating": "3;5;5;5", "confidence": "4;4;3;2", "soundness": "2;3;3;2", "contribution": "1;2;3;2", "presentation": "3;2;2;2", "wc_summary": "82;25;51;116", "wc_strengths": "28;35;61;30", "wc_weaknesses": "269;174;108;198", "wc_questions": "3;135;3;24", "wc_review": "382;369;223;368", "wc_reply_reviewers": "0;23;0;0", "wc_reply_authors": "756;921;539;1002", "reply_reviewers": "0;1;0;0", "reply_authors": "1;3;1;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.5, 34.04776057246644 ], "wc_strengths_avg": [ 38.5, 13.238202294873727 ], "wc_weaknesses_avg": [ 187.25, 57.56463758246029 ], "wc_questions_avg": [ 41.25, 54.8013457863947 ], "wc_review_avg": [ 335.5, 65.18627156081256 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 804.5, 177.07413701610972 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2917867882034440672&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;0;0;1", "aff_unique_norm": "Fudan University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.fudan.edu.cn;https://www.amazon.com", "aff_unique_abbr": "Fudan;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Pooling Image Datasets with Multiple Covariate Shift and Imbalance", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19554", "id": "2Mo7v69otj", "author_site": "Sotirios Panagiotis Chytas, Vishnu Lokhande, Vikas Singh", "tldr": "", "abstract": "Small sample sizes are common in many disciplines, \nwhich necessitates pooling roughly similar datasets across \nmultiple sites/institutions to study weak but relevant \nassociations between images and disease incidence. Such \ndata often manifest shifts and imbalances in covariates \n(secondary non-imaging data). \nThese issues are well-studied for classical models, but \nthe ideas simply do not apply to overparameterized DNN models. \nConsequently, recent work has shown how strategies from \nfairness and invariant representation learning provides \na meaningful starting point, but the current repertoire \nof methods remains limited to accounting for shifts/imbalances in just a couple of covariates at a time. In this paper, we show how \nviewing this problem from the perspective of Category theory \nprovides a simple and effective solution that completely avoids \nelaborate multi-stage training pipelines that would otherwise be \nneeded. We show the effectiveness of this approach via \nextensive experiments on real datasets. Further, we \ndiscuss how our style of formulation offers a unified \nperspective on at least 5+ distinct \nproblem settings in vision, from self-supervised learning\nto matching problems in 3D reconstruction.", "keywords": "image harmonization;medical imaging", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Sotirios Panagiotis Chytas;Vishnu Suresh Lokhande;Vikas Singh", "authorids": "~Sotirios_Panagiotis_Chytas1;~Vishnu_Suresh_Lokhande1;~Vikas_Singh1", "gender": ";;M", "homepage": ";;http://vsingh-www.cs.wisc.edu/", "dblp": ";;", "google_scholar": ";;d32BmwcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Sotirios_Panagiotis_Chytas1;~Vishnu_Suresh_Lokhande1;~Vikas_Singh1", "aff": ";;University of Wisconsin, Madison", "aff_domain": ";;wisc.edu", "position": ";;Professor", "bibtex": "@inproceedings{\nchytas2024pooling,\ntitle={Pooling Image Datasets with Multiple Covariate Shift and Imbalance},\nauthor={Sotirios Panagiotis Chytas and Vishnu Suresh Lokhande and Vikas Singh},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2Mo7v69otj}\n}", "github": "", "project": "", "reviewers": "zBw9;Fvtj;SjKQ;KLWx", "pdf_size": 2868021, "rating": "5;6;6;8", "confidence": "1;3;3;4", "soundness": "3;3;4;3", "contribution": "2;2;4;3", "presentation": "3;3;3;3", "wc_summary": "21;73;159;122", "wc_strengths": "11;48;42;119", "wc_weaknesses": "6;92;37;235", "wc_questions": "1;5;46;21", "wc_review": "39;218;284;497", "wc_reply_reviewers": "0;0;0;29", "wc_reply_authors": "16;300;304;476", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.75, 51.91037950159871 ], "wc_strengths_avg": [ 55.0, 39.528470752104745 ], "wc_weaknesses_avg": [ 92.5, 87.8478798833529 ], "wc_questions_avg": [ 18.25, 17.68297203526602 ], "wc_review_avg": [ 259.5, 163.82078622690102 ], "wc_reply_reviewers_avg": [ 7.25, 12.55736835487436 ], "wc_reply_authors_avg": [ 274.0, 165.03332996701 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.894736842105263, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10639822793901611033&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=2Mo7v69otj", "pdf": "https://openreview.net/pdf?id=2Mo7v69otj", "email": ";;wisc.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Wisconsin", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "2MpOjashKU", "title": "Divided Attention: Unsupervised Multiple-object Discovery and Segmentation with Interpretable Contextually Separated Slots", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce a method to segment the visual field into independently moving regions in real-time, trained without ground truth or supervision, needing neither pre-trained image features nor additional data outside the domain of interest. The model consists of an adversarial conditional encoder-decoder architecture based on Slot Attention, modified to use the image as context to decode optical flow without attempting to reconstruct the image itself. One modality (flow) feeds the encoder to produce separate latent codes (slots), whereas the other modality (image) conditions the decoder to generate the first (flow) from the slots. This design frees the representation from having to encode complex nuisance variability in the image due to, for instance, illumination and reflectance properties of the scene. Since customary autoencoding based on minimizing the reconstruction error does not preclude the entire flow from being encoded into a single slot, we design the loss with an adversarial criterion based on Contextual Information Separation. The resulting min-max optimization fosters the separation of objects and their assignment to different attention slots, leading to Divided Attention (DivA). DivA outperforms recent unsupervised multi-object motion segmentation methods while tripling run-time speed up to 104FPS and reducing the performance gap from supervised methods to 12% or less. DivA can handle different numbers of objects and different image resolutions at training and test time, is invariant to the permutation of object labels, and does not require explicit regularization.", "keywords": "Moving object segmentation;Slot attention;Unsupervised object discovery", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Dong Lao;Zhengyang Hu;Francesco Locatello;Yanchao Yang;Stefano Soatto", "authorids": "~Dong_Lao1;~Zhengyang_Hu1;~Francesco_Locatello1;~Yanchao_Yang1;~Stefano_Soatto1", "gender": "M;M;M;M;", "homepage": ";;https://twitter.com/FrancescoLocat8;https://yanchaoyang.github.io/;https://www.cs.ucla.edu/~soatto", "dblp": "180/5522;219/6927-2;195/6074;84/8637-1;08/1262", "google_scholar": "dvQXYW0AAAAJ;qe-fgwYAAAAJ;;r2tKnV4AAAAJ;lH1PdF8AAAAJ", "orcid": ";;;;0000-0003-2902-6362", "linkedin": ";;;;stefano-soatto-5765aa6/", "or_profile": "~Dong_Lao1;~Zhengyang_Hu1;~Francesco_Locatello1;~Yanchao_Yang1;~Stefano_Soatto2", "aff": "University of California, Los Angeles;Hong Kong University;Institute of Science and Technology;University of Hong Kong;UCLA Computer Science Department, University of California, Los Angeles", "aff_domain": "cs.ucla.edu;connect.hku.hk;ist.ac.at;hku.hk;cs.ucla.edu", "position": "Postdoc;PhD student;Assistant Professor;Assistant Professor;Professor", "bibtex": "@misc{\nlao2024divided,\ntitle={Divided Attention: Unsupervised Multiple-object Discovery and Segmentation with Interpretable Contextually Separated Slots},\nauthor={Dong Lao and Zhengyang Hu and Francesco Locatello and Yanchao Yang and Stefano Soatto},\nyear={2024},\nurl={https://openreview.net/forum?id=2MpOjashKU}\n}", "github": "", "project": "", "reviewers": "5Uwa;8fzT;i9DX;RqAo", "site": "https://openreview.net/forum?id=2MpOjashKU", "pdf_size": 8632644, "rating": "5;5;5;6", "confidence": "3;4;5;2", "soundness": "3;3;2;3", "contribution": "3;2;1;3", "presentation": "3;3;3;3", "wc_summary": "66;142;112;58", "wc_strengths": "25;83;48;20", "wc_weaknesses": "187;196;338;6", "wc_questions": "3;15;3;6", "wc_review": "281;436;501;90", "wc_reply_reviewers": "0;135;427;0", "wc_reply_authors": "740;710;1255;59", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.5, 34.30378987808781 ], "wc_strengths_avg": [ 44.0, 24.869660230891775 ], "wc_weaknesses_avg": [ 181.75, 117.82693877038476 ], "wc_questions_avg": [ 6.75, 4.9180788932265 ], "wc_review_avg": [ 327.0, 158.46292941883917 ], "wc_reply_reviewers_avg": [ 140.5, 174.35093920022342 ], "wc_reply_authors_avg": [ 691.0, 424.3471456248999 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oIJpUrYxKIEJ:scholar.google.com/&scioq=Divided+Attention:+Unsupervised+Multiple-object+Discovery+and+Segmentation+with+Interpretable+Contextually+Separated+Slots&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of California, Los Angeles;Hong Kong University;Institute of Science and Technology;University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucla.edu;https://www.hku.hk;;https://www.hku.hk", "aff_unique_abbr": "UCLA;HKU;;HKU", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Los Angeles;Hong Kong SAR;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;China;" }, { "title": "Neural Neighborhood Search for Multi-agent Path Finding", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19553", "id": "2NpAw2QJBY", "author_site": "Zhongxia Yan, Cathy Wu", "tldr": "", "abstract": "Multi-agent path finding (MAPF) is the combinatorial problem of planning optimal collision-avoiding paths for multiple agents, with application to robotics, logistics, and transportation. Though many recent learning-based works have focused on large-scale combinatorial problems by guiding their decomposition into sequences of smaller subproblems, the combined spatiotemporal and time-restricted nature of MAPF poses a particular challenge for learning-based guidance of iterative approaches like large neighborhood search (LNS), which is already a state-of-the-art approach for MAPF even without learning. We address this challenge of neural-guided LNS for MAPF by designing an architecture which interleaves convolution and attention to efficiently represent MAPF subproblems, enabling practical guidance of LNS in benchmark settings. We demonstrate the speedup of our method over existing state-of-the-art LNS-based methods for MAPF as well as the robustness of our method to unseen settings. Our proposed method expands the horizon of effective deep learning-guided LNS methods into multi-path planning problems, and our proposed representation may be more broadly applicable for representing path-wise interactions.", "keywords": "Combinatorial Optimization;Neural Architecture;Multi-agent;Path Planning", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/316fe71d286baf490ed3dadb3478af1d1fc51bd1.pdf", "author": "Zhongxia Yan;Cathy Wu", "authorids": "~Zhongxia_Yan1;~Cathy_Wu1", "gender": "M;F", "homepage": "https://github.com/ZhongxiaYan/;http://wucathy.com", "dblp": "250/0690-1;155/3740", "google_scholar": "jI_wcL8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-1897-7381;0000-0001-8594-303X", "linkedin": "zhongxiayan/;cathywu/", "or_profile": "~Zhongxia_Yan1;~Cathy_Wu1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyan2024neural,\ntitle={Neural Neighborhood Search for Multi-agent Path Finding},\nauthor={Zhongxia Yan and Cathy Wu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2NpAw2QJBY}\n}", "github": "", "project": "", "reviewers": "TyGL;pDBe;TxMF;CV86", "pdf_size": 2380253, "rating": "3;6;6;6", "confidence": "4;3;4;5", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "142;164;122;56", "wc_strengths": "22;126;63;30", "wc_weaknesses": "629;148;140;128", "wc_questions": "103;112;155;96", "wc_review": "896;550;480;310", "wc_reply_reviewers": "222;97;188;28", "wc_reply_authors": "2343;303;659;569", "reply_reviewers": "3;2;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 121.0, 40.36087214122113 ], "wc_strengths_avg": [ 60.25, 40.95347970563674 ], "wc_weaknesses_avg": [ 261.25, 212.43984442660468 ], "wc_questions_avg": [ 116.5, 22.940139493908923 ], "wc_review_avg": [ 559.0, 213.243991709028 ], "wc_reply_reviewers_avg": [ 133.75, 76.26393315322781 ], "wc_reply_authors_avg": [ 968.5, 804.2902150343494 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6814423661655057347&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "openreview": "https://openreview.net/forum?id=2NpAw2QJBY", "pdf": "https://openreview.net/pdf?id=2NpAw2QJBY", "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "2NwHLAffZZ", "title": "Weak Correlations as the Underlying Principle for Linearization of Gradient-Based Learning Systems", "track": "main", "status": "Reject", "tldr": "", "abstract": "Numerous deep learning models including wide neural networks, can be conceptualized as nonlinear dynamical physical systems with a large number of interacting degrees of freedom, which, in the infinite limit, exhibit simplified dynamics. In this work we analyze gradient descent based learning systems that demonstrate a linear learning structure in their parameters, analogous to the neural tangent kernel. We establish that this linearity is equivalent to weak correlations between the first and higher derivatives of the hypothesis function with respect to the parameters around their initial values, suggesting that these weak correlations are the underlying reason for the observed linearization of these systems. We demonstrate the weak correlations structure in the example of neural networks in the large width limit. By leveraging the equivalence between linearity and weak correlations, we derive a bound on the deviation from linearity along the training path for stochastic gradient descent. To facilitate our proof, we introduce a method to bound the asymptotic behavior of random tensors, and demonstrate that any such tensor possesses a unique tight bound.", "keywords": "Neural Tangent Kernel;Deep Learning and representational learning;Kernels;Statistical Mechanics", "primary_area": "metric learning, kernel learning, and sparse coding", "supplementary_material": "/attachment/31b48e0d67630d450c93436c3d0ce3d89860f93e.pdf", "author": "Ori Shem Ur;Yaron Oz", "authorids": "~Ori_Shem_Ur1;~Yaron_Oz1", "gender": "M;", "homepage": "https://scholar.google.com/citations?hl=iw&user=O4NfrZYAAAAJ;", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Ori_Shem_Ur1;~Yaron_Oz1", "aff": "Tel Aviv University;Tel Aviv University, Technion", "aff_domain": "tau.ac.il;tau.ac.il", "position": "MS student;Full Professor", "bibtex": "@misc{\nur2024weak,\ntitle={Weak Correlations as the Underlying Principle for Linearization of Gradient-Based Learning Systems},\nauthor={Ori Shem Ur and Yaron Oz},\nyear={2024},\nurl={https://openreview.net/forum?id=2NwHLAffZZ}\n}", "github": "", "project": "", "reviewers": "VTRJ;YXgU;ZGhT", "site": "https://openreview.net/forum?id=2NwHLAffZZ", "pdf_size": 278044, "rating": "1;3;3", "confidence": "4;3;4", "soundness": "1;2;2", "contribution": "4;2;1", "presentation": "1;2;1", "wc_summary": "130;302;65", "wc_strengths": "85;50;14", "wc_weaknesses": "209;120;614", "wc_questions": "180;34;1", "wc_review": "604;506;694", "wc_reply_reviewers": "64;0;0", "wc_reply_authors": "70;70;70", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 1.247219128924647 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 165.66666666666666, 99.98777703077289 ], "wc_strengths_avg": [ 49.666666666666664, 28.986586936412884 ], "wc_weaknesses_avg": [ 314.3333333333333, 214.98888860176524 ], "wc_questions_avg": [ 71.66666666666667, 77.7788888809525 ], "wc_review_avg": [ 601.3333333333334, 76.77383813658285 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 30.169889330626027 ], "wc_reply_authors_avg": [ 70.0, 0.0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:v-FlDGJtsIwJ:scholar.google.com/&scioq=Weak+Correlations+as+the+Underlying+Principle+for+Linearization+of+Gradient-Based+Learning+Systems&hl=en&as_sdt=0,23", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "2O2FOO8pl4", "title": "Label Privacy Source Coding in Vertical Federated Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We study label privacy protection in vertical federated learning (VFL). VFL enables an active party who possesses labeled data to improve model performance (utility) by collaborating with passive parties who have auxiliary features. Recently, there has been a growing concern for protecting label privacy against semi-honest passive parties who may surreptitiously deduce private labels from the output of their bottom models. However, existing studies do not remove the prior label information in the active party's features from labels in an offline phase, thus leaking unnecessary label privacy to passive parties.\nIn contrast to existing methods that focus on training-phase perturbation, we propose a novel offline-phase data cleansing approach to protect label privacy without compromising utility. Specifically, we first formulate a Label Privacy Source Coding (LPSC) problem to remove the redundant label information in the active party's features from labels, by assigning each sample a new weight and label (i.e., residual) for federated training. We give a privacy guarantee and theoretically prove that gradient boosting efficiently optimizes the LPSC problem. Therefore, we propose the Vertical Federated Gradient Boosting (VFGBoost) framework to address the LPSC problem. Moreover, given that LPSC only provides upper-bounded privacy enhancement, VFGBoost further enables a flexible privacy-utility trade-off by incorporating adversarial training during federated training. Experimental results on four real-world datasets substantiate the efficacy of LPSC and the superiority of our VFGBoost framework.", "keywords": "Label privacy;Vertical Federated learning;Gradient boosting;Adversarial training", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Dashan Gao;Sheng Wan;Hanlin Gu;Lixin Fan;Xin Yao;Qiang Yang", "authorids": "~Dashan_Gao4;~Sheng_Wan2;~Hanlin_Gu1;~Lixin_Fan1;~Xin_Yao1;~Qiang_Yang1", "gender": "M;M;M;M;;", "homepage": "https://dashangao.github.io/;https://scholar.google.com/citations?user=CKfbX4AAAAAJ&hl=en&oi=ao;;;http://www.cs.bham.ac.uk/~xin;", "dblp": "https://dblp.uni-trier.de/pid/37/2326-2.html;;236/6661;36/3111;;", "google_scholar": "7WVd9ZwAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;", "orcid": ";;0000-0001-8266-4561;;;", "linkedin": ";;;;;", "or_profile": "~Dashan_Gao4;~Sheng_Wan2;~Hanlin_Gu1;~Lixin_Fan1;~Xin_Yao1;~Qiang_Yang1", "aff": "Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;webank;WeBank;;", "aff_domain": "cse.ust.hk;hkust.edu;webank.com;webank.com;;", "position": "PhD student;PhD student;Researcher;Principal Researcher;;", "bibtex": "@misc{\ngao2024label,\ntitle={Label Privacy Source Coding in Vertical Federated Learning},\nauthor={Dashan Gao and Sheng Wan and Hanlin Gu and Lixin Fan and Xin Yao and Qiang Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=2O2FOO8pl4}\n}", "github": "", "project": "", "reviewers": "wj8U;pUxt;WYNH;3hcA", "site": "https://openreview.net/forum?id=2O2FOO8pl4", "pdf_size": 1590599, "rating": "5;5;5;6", "confidence": "4;5;4;3", "soundness": "3;3;2;3", "contribution": "3;3;2;3", "presentation": "1;2;3;3", "wc_summary": "135;64;71;115", "wc_strengths": "35;51;12;36", "wc_weaknesses": "140;222;127;21", "wc_questions": "89;55;3;125", "wc_review": "399;392;213;297", "wc_reply_reviewers": "95;151;0;11", "wc_reply_authors": "1997;1721;987;1193", "reply_reviewers": "1;1;0;1", "reply_authors": "5;5;4;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 96.25, 29.710057219736214 ], "wc_strengths_avg": [ 33.5, 13.937359864766353 ], "wc_weaknesses_avg": [ 127.5, 71.46502641152524 ], "wc_questions_avg": [ 68.0, 44.955533585978046 ], "wc_review_avg": [ 325.25, 76.3098126062435 ], "wc_reply_reviewers_avg": [ 64.25, 62.118334652500145 ], "wc_reply_authors_avg": [ 1474.5, 403.3196623027447 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 0.82915619758885 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LVFcbT-J5L4J:scholar.google.com/&scioq=Label+Privacy+Source+Coding+in+Vertical+Federated+Learning&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Hong Kong University of Science and Technology;WeBank", "aff_unique_dep": "Department of Computer Science and Engineering;", "aff_unique_url": "https://www.ust.hk;https://www.webank.com", "aff_unique_abbr": "HKUST;WeBank", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "PerceptionCLIP: Visual Classification by Inferring and Conditioning on Contexts", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19552", "id": "2Oiee202rd", "author_site": "Bang An, Sicheng Zhu, Michael-Andrei Panaitescu-Liess, Chaithanya Kumar Mummadi, Furong Huang", "tldr": "", "abstract": "Vision-language models like CLIP are widely used in zero-shot image classification due to their ability to understand various visual concepts and natural language descriptions. However, how to fully leverage CLIP's unprecedented human-like understanding capabilities to achieve better performance is still an open question. This paper draws inspiration from the human visual perception process: when classifying an object, humans first infer contextual attributes (e.g., background and orientation) which help separate the foreground object from the background, and then classify the object based on this information. Inspired by it, we observe that providing CLIP with contextual attributes improves zero-shot image classification and mitigates reliance on spurious features. We also observe that CLIP itself can reasonably infer the attributes from an image. With these observations, we propose a training-free, two-step zero-shot classification method PerceptionCLIP. Given an image, it first infers contextual attributes (e.g., background) and then performs object classification conditioning on them. Our experiments show that PerceptionCLIP achieves better generalization, group robustness, and interpretability.", "keywords": "vision-language model;CLIP;zero-shot;image classification;human perception;contexts;contextual attributes;spurious feature", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Bang An;Sicheng Zhu;Michael-Andrei Panaitescu-Liess;Chaithanya Kumar Mummadi;Furong Huang", "authorids": "~Bang_An1;~Sicheng_Zhu1;~Michael-Andrei_Panaitescu-Liess1;~Chaithanya_Kumar_Mummadi1;~Furong_Huang1", "gender": ";M;M;M;F", "homepage": "https://bangann.github.io/;https://schzhu.github.io/;;;https://furong-huang.com", "dblp": "188/0741;;263/6954.html;208/6386;72/8513", "google_scholar": "3ce6z_sAAAAJ;;MOP6lhkAAAAJ;XJLtaG4AAAAJ;13yyuCcAAAAJ", "orcid": ";;;0000-0002-1173-2720;", "linkedin": ";;michael-panaitescu-0849a3182/;;", "or_profile": "~Bang_An1;~Sicheng_Zhu1;~Michael-Andrei_Panaitescu-Liess1;~Chaithanya_Kumar_Mummadi1;~Furong_Huang1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Bosch Center for Artificial Intelligence;University of Maryland", "aff_domain": "umd.edu;umd.edu;umd.edu;bosch.com;cs.umd.edu", "position": "PhD student;PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nan2024perceptionclip,\ntitle={Perception{CLIP}: Visual Classification by Inferring and Conditioning on Contexts},\nauthor={Bang An and Sicheng Zhu and Michael-Andrei Panaitescu-Liess and Chaithanya Kumar Mummadi and Furong Huang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2Oiee202rd}\n}", "github": "", "project": "", "reviewers": "vx4m;U13E;k2xX;VSta", "pdf_size": 10417446, "rating": "5;5;6;8", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "contribution": "3;3;3;3", "presentation": "4;3;3;4", "wc_summary": "31;69;100;37", "wc_strengths": "72;33;44;64", "wc_weaknesses": "312;246;263;17", "wc_questions": "5;13;77;44", "wc_review": "420;361;484;162", "wc_reply_reviewers": "214;15;0;0", "wc_reply_authors": "1015;1569;974;195", "reply_reviewers": "1;1;0;0", "reply_authors": "2;5;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 59.25, 27.60774347895894 ], "wc_strengths_avg": [ 53.25, 15.514106484100203 ], "wc_weaknesses_avg": [ 209.5, 113.75082417283842 ], "wc_questions_avg": [ 34.75, 28.411045387313717 ], "wc_review_avg": [ 356.75, 120.55989175509407 ], "wc_reply_reviewers_avg": [ 57.25, 90.70660119307745 ], "wc_reply_authors_avg": [ 938.25, 489.2429738892527 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3534252523528312436&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=2Oiee202rd", "pdf": "https://openreview.net/pdf?id=2Oiee202rd", "email": "umd.edu;umd.edu;umd.edu;bosch.com;cs.umd.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Maryland;Bosch Center for Artificial Intelligence", "aff_unique_dep": ";Center for Artificial Intelligence", "aff_unique_url": "https://www/umd.edu;https://www.bosch-ai.com", "aff_unique_abbr": "UMD;BCAI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Germany" }, { "id": "2OwSqvxjP2", "title": "Boosting Semi-Supervised Learning via Variational Confidence Calibration and Unlabeled Sample Elimination", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite the recent progress of Semi-supervised Learning (SSL), we argue that the existing methods may not employ unlabeled examples effectively and efficiently. Many pseudo-label-based methods select unlabeled examples into the training stage based on the inaccurate confidence scores provided by the output layer of the classifier network. Additionally, most prior work typically adpots all the available unlabeled examples without data pruning, which is incapable of learning from massive unlabeled data. To address these issues, this paper proposes two methods called VCC (Variational Confidence Calibration) and INFUSE (INfluence-Function-based Unlabeled Sample Elimination). VCC is a general-purpose plugin of confidence calibration for SSL. By approximating the calibrated confidence through three types of consistency scores, a variational autoencoder is leveraged to reconstruct the confidence score for selecting more accurate pseudo-labels. Based on the influence function, INFUSE is a data pruning method for constructing a core dataset of unlabeled examples. The effectiveness of our methods is demonstrated through experiments on multiple datasets and in various settings. For example, on the CIFAR-100 dataset with 400 labeled examples, VCC reduces the classification error rate of FixMatch from 46.47\\% to 43.31\\% (with improvement of 3.16\\%). On the SVHN dataset with 250 labeled examples, INFUSE achieves 2.61\\% error rate using only 10\\% unlabeled data, which is better than RETRIEVE (2.90\\%) and the baseline with full unlabeled data (3.80\\%). Putting all the pieces together, the combined VCC-INFUSE plugins can reduce the error rate of FlexMatch from 26.49\\% to 25.41\\% on the CIFAR100 dataset (with improvement of 1.08\\%) while saving nearly half of the original training time (from 223.96 GPU hours to 115.47 GPU hours).", "keywords": "Semi-Supervised Learning;Calibration;Sample Elimination", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Qianhan Feng;Shijie Fang;Tong Lin", "authorids": "~Qianhan_Feng1;~Shijie_Fang1;~Tong_Lin1", "gender": "M;M;M", "homepage": "https://fqhank.github.io/fengqianhan.github.io/;;https://sai.pku.edu.cn/szdw/zzjs/lt.htm", "dblp": "300/5376;;74/5719-2", "google_scholar": "HtigWVwAAAAJ;;", "orcid": "0009-0009-4552-0046;;0000-0002-0000-834X", "linkedin": ";%E4%BB%95%E6%9D%B0-%E6%96%B9-35259817a/;", "or_profile": "~Qianhan_Feng1;~Shijie_Fang1;~Tong_Lin1", "aff": "Peking University;;Peking University", "aff_domain": "stu.pku.edu.cn;;pku.edu.cn", "position": "MS student;;Associate Professor", "bibtex": "@misc{\nfeng2024boosting,\ntitle={Boosting Semi-Supervised Learning via Variational Confidence Calibration and Unlabeled Sample Elimination},\nauthor={Qianhan Feng and Shijie Fang and Tong Lin},\nyear={2024},\nurl={https://openreview.net/forum?id=2OwSqvxjP2}\n}", "github": "", "project": "", "reviewers": "iR75;LFY9;5hwM;o4va", "site": "https://openreview.net/forum?id=2OwSqvxjP2", "pdf_size": 1387865, "rating": "5;5;6;6", "confidence": "5;4;4;3", "soundness": "2;2;3;4", "contribution": "2;2;3;3", "presentation": "3;3;2;4", "wc_summary": "71;111;43;160", "wc_strengths": "67;43;145;82", "wc_weaknesses": "77;144;198;37", "wc_questions": "113;5;89;39", "wc_review": "328;303;475;318", "wc_reply_reviewers": "37;109;0;0", "wc_reply_authors": "481;390;764;391", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 96.25, 44.030529181466804 ], "wc_strengths_avg": [ 84.25, 37.7317836843158 ], "wc_weaknesses_avg": [ 114.0, 61.75354240851289 ], "wc_questions_avg": [ 61.5, 42.1515124283815 ], "wc_review_avg": [ 356.0, 69.27842376959799 ], "wc_reply_reviewers_avg": [ 36.5, 44.5 ], "wc_reply_authors_avg": [ 506.5, 153.19024120354402 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:N2y3YCLFpJsJ:scholar.google.com/&scioq=Boosting+Semi-Supervised+Learning+via+Variational+Confidence+Calibration+and+Unlabeled+Sample+Elimination&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "2PKZtPMyvI", "title": "An Intrinsic Dimension Perspective of Transformers for Sequential Modeling", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Transformers have become immensely popular for sequential modeling, particularly in domains like natural language processing (NLP).\nRecent innovations have introduced various architectures based on the Transformer framework, resulting in significant advancements in applications.\nHowever, the underlying mechanics of these architectures are still somewhat enigmatic.\nIn this study, we explore the geometrical characteristics of data representations learned by Transformers using a mathematical metric known as intrinsic dimension (ID). This can be conceptualized as the minimum parameter count needed for effective modeling.\nA sequence of experiments, predominantly centered on text classification, support the ensuing empirical observations regarding the correlation between embedding dimension, layer depth, individual layer ID, and task performance.\nInterestingly, we note that a higher terminal feature ID, when obtained from Transformers, generally correlates with a lower classification error rate. \nThis stands in contrast to the behavior observed in CNNs (and other models) during image classification tasks. Furthermore, our data suggests that the ID for each layer tends to diminish as layer depth increases, with this decline being notably steeper in more intricate architectures.\nWe also present numerical evidence highlighting the geometrical constructs of data representations as interpreted by Transformers, indicating that only nonlinear dimension reduction is achievable.\nLastly, we delve into how varying sequence lengths impact both ID and task performance, confirming the efficacy of data reduction during training.\nOur ambition is for these insights to offer direction in the choice of hyper-parameters and the application of dimension/data reduction when using Transformers for text classification and other prevalent NLP tasks.", "keywords": "Transformers;Intrinsic Dimension;Hyperparameter Optimization;Natural Language Processing", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Zeping Min;Qian Ge;Zhong Li", "authorids": "~Zeping_Min1;~Qian_Ge4;~Zhong_Li2", "gender": ";M;M", "homepage": ";;https://www.microsoft.com/en-us/research/people/lzhong/", "dblp": ";153/5844;", "google_scholar": ";;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zeping_Min1;~Qian_Ge4;~Zhong_Li2", "aff": ";Peking University;Microsoft Research Asia", "aff_domain": ";pku.edu.cn;microsoft.com", "position": ";MS student;Researcher", "bibtex": "@misc{\nmin2024an,\ntitle={An Intrinsic Dimension Perspective of Transformers for Sequential Modeling},\nauthor={Zeping Min and Qian Ge and Zhong Li},\nyear={2024},\nurl={https://openreview.net/forum?id=2PKZtPMyvI}\n}", "github": "", "project": "", "reviewers": "K7iN;LFkQ;rhBS;wsLx;h8wB", "site": "https://openreview.net/forum?id=2PKZtPMyvI", "pdf_size": 2383678, "rating": "1;3;3;3;5", "confidence": "4;5;4;4;4", "soundness": "2;2;1;2;2", "contribution": "1;2;2;2;2", "presentation": "2;3;1;3;3", "wc_summary": "41;84;74;35;99", "wc_strengths": "1;30;33;63;84", "wc_weaknesses": "41;116;47;428;151", "wc_questions": "2;228;488;26;122", "wc_review": "85;458;642;552;456", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 3.0, 1.2649110640673518 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 1.8, 0.4 ], "contribution_avg": [ 1.8, 0.4000000000000001 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 66.6, 24.743483990739865 ], "wc_strengths_avg": [ 42.2, 28.67333255831976 ], "wc_weaknesses_avg": [ 156.6, 141.92476880375744 ], "wc_questions_avg": [ 173.2, 176.46574738458455 ], "wc_review_avg": [ 438.6, 189.706721019578 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hSxBdp0VlzoJ:scholar.google.com/&scioq=An+Intrinsic+Dimension+Perspective+of+Transformers+for+Sequential+Modeling&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "Peking U;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "2Pup7olzxj", "title": "Differentiable Optimization in Plane-Wave Density Functional Theory for Solid States", "track": "main", "status": "Reject", "tldr": "", "abstract": "Plane-wave density functional theory is a computational quantum mechanical modeling method used to investigate the electronic structure of solids. It employs plane-waves as the basis set for representing electronic wave functions and leverages density functional theory to compute the electronic structure properties of many-body systems. Traditionally, the Self-Consistent Field (SCF) method is predominantly adopted for optimization in current DFT computations. However, this method encounters notable convergence and computational challenges, and its iterative nature obstructs the incorporation of emergent deep learning enhancements. To address these challenges, we introduce a fully differentiable optimization method tailored to resolve the intrinsic challenges associated with the optimization of plane-wave density functional methods. This methodology includes a direct total energy minimization approach for solving Kohn-Sham equations in periodic crystalline systems, which is coherent with deep learning infrastructures. The efficacy of our approach is illustrated through its two applications in solid-state physics: electron band structure prediction and geometry optimization. Our enhancements potentially pave the way for various gradient-based applications within deep learning paradigms in solid-state physics, extending the boundaries of material innovation and design. We illustrate the utility and diverse applications of our method on real crystal structures and compare its effectiveness with several established SCF-based packages, demonstrating its accuracy and robust convergence property.", "keywords": "AI for Science;Quantum Chemisty;Density Functional Theory;Deep Learning;Kohn-Sham Equation;Solid-State Physics", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Tianbo Li;Stephen Gregory Dale;Zekun Shi;Jingshu Li;Giovanni Vignale;A. H. Castro Neto;Kostya S. Novoselov;Min Lin", "authorids": "~Tianbo_Li1;~Stephen_Gregory_Dale1;~Zekun_Shi3;~Jingshu_Li1;~Giovanni_Vignale1;~A._H._Castro_Neto1;~Kostya_S._Novoselov1;~Min_Lin1", "gender": "M;Not Specified;M;M;M;M;M;M", "homepage": ";https://ifim.nus.edu.sg/people/stephen-dale/;https://jasonleejsl.github.io/;;https://graphene.nus.edu.sg/blog/peoples/antonio-castro-neto/;https://ifim.nus.edu.sg/;https://linmin.me;https://zekun-shi.github.io/", "dblp": "153/7013;;;;;;;", "google_scholar": ";gS3ihw0AAAAJ;;;4kNDbNEAAAAJ;;BGONmkIAAAAJ;X9vcv1oAAAAJ", "orcid": ";0000-0002-6867-711X;0009-0006-1576-8487;0000-0003-3851-5790;0000-0003-0613-4010;;;", "linkedin": ";;;;https://sg.linkedin.com/in/antonio-h-castro-neto-ba8187ab;;min-lin-08a3a422/;", "or_profile": "~Tianbo_Li1;~Stephen_Gregory_Dale1;~Jingshu_Li1;~Giovanni_Vignale1;~A._H._Castro_Neto1;~Kostya_S._Novoselov1;~Min_Lin1;~ZEKUN_SHI2", "aff": "Sea AI Lab;;National University of Singapore;National University of Singapore;;;Sea AI Lab;Sea AI Lab", "aff_domain": "sea.com;;u.nus.edu;nus.edu;;;sea.com;sea.com", "position": "Researcher;;PhD student;Researcher;;;Principal Researcher;Researcher", "bibtex": "@misc{\nli2024differentiable,\ntitle={Differentiable Optimization in Plane-Wave Density Functional Theory for Solid States},\nauthor={Tianbo Li and Stephen Gregory Dale and Zekun Shi and Jingshu Li and Giovanni Vignale and A. H. Castro Neto and Kostya S. Novoselov and Min Lin},\nyear={2024},\nurl={https://openreview.net/forum?id=2Pup7olzxj}\n}", "github": "", "project": "", "reviewers": "Arv3;Npa9;AFLR;TgFK", "site": "https://openreview.net/forum?id=2Pup7olzxj", "pdf_size": 2235674, "rating": "3;5;5;6", "confidence": "3;4;3;2", "soundness": "3;2;3;4", "contribution": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "208;55;131;124", "wc_strengths": "22;97;179;113", "wc_weaknesses": "108;546;71;111", "wc_questions": "125;98;91;13", "wc_review": "463;796;472;361", "wc_reply_reviewers": "0;35;21;0", "wc_reply_authors": "1365;1081;992;411", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;3;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 129.5, 54.18717560456533 ], "wc_strengths_avg": [ 102.75, 55.840733340456765 ], "wc_weaknesses_avg": [ 209.0, 195.20373971827487 ], "wc_questions_avg": [ 81.75, 41.67358275934528 ], "wc_review_avg": [ 523.0, 163.53440005087614 ], "wc_reply_reviewers_avg": [ 14.0, 14.849242404917497 ], "wc_reply_authors_avg": [ 962.25, 346.796319905503 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:To3QkK3nRmcJ:scholar.google.com/&scioq=Differentiable+Optimization+in+Plane-Wave+Density+Functional+Theory+for+Solid+States&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Sea AI Lab;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": ";https://www.nus.edu.sg", "aff_unique_abbr": ";NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";Singapore" }, { "title": "GOAt: Explaining Graph Neural Networks via Graph Output Attribution", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19551", "id": "2Q8TZWAHv4", "author_site": "Shengyao Lu, Keith G Mills, Jiao He, Bang Liu, Di Niu", "tldr": "", "abstract": "Understanding the decision-making process of Graph Neural Networks (GNNs) is crucial to their interpretability. Most existing methods for explaining GNNs typically rely on training auxiliary models, resulting in the explanations remain black-boxed. This paper introduces Graph Output Attribution (GOAt), a novel method to attribute graph outputs to input graph features, creating GNN explanations that are faithful, discriminative, as well as stable across similar samples. By expanding the GNN as a sum of scalar products involving node features, edge features and activation patterns, we propose an efficient analytical method to compute contribution of each node or edge feature to each scalar product and aggregate the contributions from all scalar products in the expansion form to derive the importance of each node and edge. Through extensive experiments on synthetic and real-world data, we show that our method not only outperforms various state-of-the-art GNN explainers in terms of the commonly used fidelity metric, but also exhibits stronger discriminability, and stability by a remarkable margin.", "keywords": "Graph Neural Networks;Explainability;Interpretability;Local-level explanation;Instance-level explanation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/f5a45077f8cd166ffe99911b9a864127c65a66ea.zip", "author": "Shengyao Lu;Keith G. Mills;Jiao He;Bang Liu;Di Niu", "authorids": "~Shengyao_Lu1;~Keith_G._Mills1;~Jiao_He1;~Bang_Liu1;~Di_Niu1", "gender": "F;M;M;M;M", "homepage": "https://sluxsr.github.io/;https://kgmills.github.io/;https://github.com/JonHe878;http://www-labs.iro.umontreal.ca/~liubang/;https://www.ualberta.ca/~dniu", "dblp": "320/4184;299/5864;;;82/4953", "google_scholar": "https://scholar.google.ca/citations?user=MSsab9EAAAAJ;CBOD_ngAAAAJ;;lmfAnP4AAAAJ;https://scholar.google.ca/citations?user=3kC5OogAAAAJ", "orcid": ";0000-0001-6054-1798;;0000-0002-9483-8984;0000-0002-5250-7327", "linkedin": ";kgmills/;;bang-liu-12b66789/?originalSubdomain=ca;", "or_profile": "~Shengyao_Lu1;~Keith_G._Mills1;~Jiao_He1;~Bang_Liu1;~Di_Niu1", "aff": "University of Alberta;Huawei Technologies Ltd.;huawei;University of Montreal;University of Alberta", "aff_domain": "ualberta.ca;huawei.com;huawei.com;umontreal.ca;ualberta.ca", "position": "PhD student;Research Intern;Chief engineer;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlu2024goat,\ntitle={{GOA}t: Explaining Graph Neural Networks via Graph Output Attribution},\nauthor={Shengyao Lu and Keith G. Mills and Jiao He and Bang Liu and Di Niu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2Q8TZWAHv4}\n}", "github": "", "project": "", "reviewers": "cqeJ;nBUo;PCGE;8TnY", "pdf_size": 2269280, "rating": "5;6;6;8", "confidence": "4;5;2;3", "soundness": "3;3;3;2", "contribution": "2;3;3;2", "presentation": "3;2;2;2", "wc_summary": "51;61;47;145", "wc_strengths": "33;72;29;61", "wc_weaknesses": "187;100;28;43", "wc_questions": "5;5;5;102", "wc_review": "276;238;109;351", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "671;750;309;319", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.0, 40.162171256046406 ], "wc_strengths_avg": [ 48.75, 18.226011631731172 ], "wc_weaknesses_avg": [ 89.5, 62.37186865887537 ], "wc_questions_avg": [ 29.25, 42.00223208354527 ], "wc_review_avg": [ 243.5, 87.65414993027997 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 512.25, 200.2390758568367 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30779350562554625, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13711532416544159943&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=2Q8TZWAHv4", "pdf": "https://openreview.net/pdf?id=2Q8TZWAHv4", "email": "ualberta.ca;huawei.com;huawei.com;umontreal.ca;ualberta.ca", "author_num": 5, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "University of Alberta;Huawei;University of Montreal", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.ualberta.ca;https://www.huawei.com;https://wwwumontreal.ca", "aff_unique_abbr": "UAlberta;Huawei;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Canada;China" }, { "id": "2RGQwJEcAC", "title": "Visual Transformer with Differentiable Channel Selection: An Information Bottleneck Inspired Approach", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-attention and transformers have been widely used in deep learning. Recent efforts have been devoted to incorporating transformer blocks into different types of neural architectures, including those with convolutions, leading to various visual transformers for computer vision tasks. In this paper, we propose a novel and compact transformer block, Transformer with Differentiable Channel Selection, or DCS-Transformer. DCS-Transformer features channel selection in the computation of the attention weights and the input/output features of the MLP in the transformer block. Our DCS-Transformer is compatible with many popular and compact transformer networks, such as MobileViT and EfficientViT, and it reduces the FLOPs of the visual transformers while maintaining or even improving the prediction accuracy. In the experiments, we replace all the transformer blocks in MobileViT and EfficientViT with DCS-Transformer blocks, leading to DCS-Transformer networks with different backbones. The DCS-Transformer is motivated by reduction of Information Bottleneck, and a novel upper bound for the IB which can be optimized by SGD is derived and incorporated into the training loss of the network with DCS-Transformer. Extensive results on image classification and object detection evidence that DCS-Transformer renders compact and efficient visual transformers with comparable or much better prediction accuracy than the original visual transformers. The code of DCS-Transformer is available at \\url{https://anonymous.4open.science/r/IB-DCS-ViT-273C/}.", "keywords": "Differentiable Channel Selection;Information Bottleneck;Image Classification", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Yancheng Wang;Ping Li;Yingzhen Yang", "authorids": "~Yancheng_Wang2;~Ping_Li3;~Yingzhen_Yang1", "gender": "M;M;M", "homepage": ";http://www.stat.rutgers.edu/home/pingli/;http://yingzhenyang.com", "dblp": ";62/5860-1;66/3838.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";;", "linkedin": ";;yingzhen-yang-9b869122", "or_profile": "~Yancheng_Wang2;~Ping_Li3;~Yingzhen_Yang1", "aff": "Arizona State University;LinkedIn;Arizona State University", "aff_domain": "asu.edu;linkedin.com;asu.edu", "position": "PhD student;Engineer;Assistant Professor", "bibtex": "@misc{\nwang2024visual,\ntitle={Visual Transformer with Differentiable Channel Selection: An Information Bottleneck Inspired Approach},\nauthor={Yancheng Wang and Ping Li and Yingzhen Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=2RGQwJEcAC}\n}", "github": "", "project": "", "reviewers": "wQJ5;Q1sT;JKXw", "site": "https://openreview.net/forum?id=2RGQwJEcAC", "pdf_size": 7064627, "rating": "5;6;6", "confidence": "4;5;3", "soundness": "3;3;3", "contribution": "2;3;3", "presentation": "3;3;2", "wc_summary": "136;62;85", "wc_strengths": "17;63;71", "wc_weaknesses": "347;75;230", "wc_questions": "2;2;13", "wc_review": "502;202;399", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1381;532;1469", "reply_reviewers": "0;0;0", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 94.33333333333333, 30.922843048824312 ], "wc_strengths_avg": [ 50.333333333333336, 23.79542439676633 ], "wc_weaknesses_avg": [ 217.33333333333334, 111.40416908217075 ], "wc_questions_avg": [ 5.666666666666667, 5.185449728701348 ], "wc_review_avg": [ 367.6666666666667, 124.4623994983581 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1127.3333333333333, 422.49444440791825 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:r-UPoEdjLugJ:scholar.google.com/&scioq=Visual+Transformer+with+Differentiable+Channel+Selection:+An+Information+Bottleneck+Inspired+Approach&hl=en&as_sdt=0,5", "gs_version_total": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Arizona State University;LinkedIn Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.asu.edu;https://www.linkedin.com", "aff_unique_abbr": "ASU;LinkedIn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "2RJAzSphy9", "title": "Sample Efficient Reinforcement Learning from Human Feedback via Active Exploration", "track": "main", "status": "Reject", "tldr": "", "abstract": "Preference-based feedback is important for many applications in reinforcement learning where direct evaluation of a reward function is not feasible. A notable recent example arises in reinforcement learning from human feedback (RLHF) on large language models. For many applications of RLHF, the cost of acquiring the human feedback can be substantial. In this work, we take advantage of the fact that one can often choose contexts at which to obtain human feedback in order to most efficiently identify a good policy, and formalize this as an *offline contextual dueling bandit* problem. We give an upper-confidence-bound style algorithm for this problem and prove a polynomial worst-case regret bound. We then provide empirical confirmation in a synthetic setting that our approach outperforms existing methods. After, we extend the setting and methodology for practical use in RLHF training of large language models. Here, our method is able to reach better performance with fewer samples of human preferences than multiple baselines on three real-world datasets.", "keywords": "reinforcement learning;LLMs;contextual bandits;RLHF;exploration", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/3969dbbf0d8f0b1e2cf43c14be805bc3e043ae2e.zip", "author": "Viraj Mehta;Vikramjeet Das;Ojash Neopane;Yijia Dai;Ilija Bogunovic;Jeff Schneider;Willie Neiswanger", "authorids": "~Viraj_Mehta1;~Vikramjeet_Das1;~Ojash_Neopane1;~Yijia_Dai1;~Ilija_Bogunovic2;~Jeff_Schneider1;~Willie_Neiswanger2", "gender": "M;M;M;F;;M;M", "homepage": "http://virajm.com;;https://oneopane.github.io/;https://daiyijia02.github.io;https://www.cs.cmu.edu/~schneide;https://willieneis.github.io/;http://ilijabogunovic.com/", "dblp": "https://dblp.org/pers/m/Mehta:Viraj.html;323/0004;176/5399.html;;38/247;120/7593.html;142/2725", "google_scholar": "4pHjHBkAAAAJ;https://scholar.google.com/citations?hl=en;lmAQ1l8AAAAJ;https://scholar.google.com/citations?hl=en;3bSbb20AAAAJ;QwKHApEAAAAJ;xMvt3NEAAAAJ", "orcid": "0000-0002-2021-9718;0000-0001-8292-6752;;;0000-0002-5080-9073;;", "linkedin": "virajrmehta/;linkedin.com/in/vikramjeetd;;yijia-dai/;jeff-schneider-1593b322/;;", "or_profile": "~Viraj_Mehta1;~Vikramjeet_Das1;~Ojash_Neopane1;~Yijia_Dai1;~Jeff_Schneider1;~Willie_Neiswanger2;~Ilija_Bogunovic1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Cornell University;Carnegie Mellon University;University of Southern California;Swiss Federal Institute of Technology", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu;cornell.edu;cs.cmu.edu;usc.edu;ethz.ch", "position": "PhD student;MS student;PhD student;Undergrad student;Researcher;Assistant Professor;Postdoc", "bibtex": "@misc{\nmehta2024sample,\ntitle={Sample Efficient Reinforcement Learning from Human Feedback via Active Exploration},\nauthor={Viraj Mehta and Vikramjeet Das and Ojash Neopane and Yijia Dai and Ilija Bogunovic and Jeff Schneider and Willie Neiswanger},\nyear={2024},\nurl={https://openreview.net/forum?id=2RJAzSphy9}\n}", "github": "", "project": "", "reviewers": "aHAt;NrDs;vRQm;7uH1;DPyN", "site": "https://openreview.net/forum?id=2RJAzSphy9", "pdf_size": 2399934, "rating": "3;5;5;5;6", "confidence": "5;3;3;2;2", "soundness": "2;3;3;2;3", "contribution": "2;2;2;2;3", "presentation": "2;3;3;3;3", "wc_summary": "389;71;86;50;50", "wc_strengths": "2;30;37;33;20", "wc_weaknesses": "2;92;309;78;119", "wc_questions": "2;239;5;3;51", "wc_review": "395;432;437;164;240", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "909;528;486;538;351", "reply_reviewers": "0;0;0;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 129.2, 130.60995367888316 ], "wc_strengths_avg": [ 24.4, 12.531560158256434 ], "wc_weaknesses_avg": [ 120.0, 102.17044582461212 ], "wc_questions_avg": [ 60.0, 91.3892772703669 ], "wc_review_avg": [ 333.6, 111.05782277714614 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 562.4, 185.7079427488227 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9316949906249125, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6091476061033864192&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;0;2;3", "aff_unique_norm": "Carnegie Mellon University;Cornell University;University of Southern California;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cmu.edu;https://www.cornell.edu;https://www.usc.edu;https://www.ethz.ch", "aff_unique_abbr": "CMU;Cornell;USC;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Time Travel in LLMs: Tracing Data Contamination in Large Language Models", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19550", "id": "2Rwq6c3tvr", "author_site": "Shahriar Golchin, Mihai Surdeanu", "tldr": "", "abstract": "Data contamination, i.e., the presence of test data from downstream tasks in the training data of large language models (LLMs), is a potential major issue in measuring LLMs' real effectiveness on other tasks. We propose a straightforward yet effective method for identifying data contamination within LLMs. At its core, our approach starts by identifying potential contamination at the instance level; using this information, our approach then assesses wider contamination at the partition level. To estimate contamination of individual instances, we employ \"guided instruction:\" a prompt consisting of the dataset name, partition type, and the random-length initial segment of a reference instance, asking the LLM to complete it. An instance is flagged as contaminated if the LLM's output either exactly or nearly matches the latter segment of the reference. To understand if an entire partition is contaminated, we propose two ideas. The first idea marks a dataset partition as contaminated if the average overlap score with the reference instances (as measured by ROUGE-L or BLEURT) is statistically significantly better with the completions from guided instruction compared to a \"general instruction\" that does not include the dataset and partition name. The second idea marks a dataset partition as contaminated if a classifier based on GPT-4 with few-shot in-context learning prompt marks multiple generated completions as exact/near-exact matches of the corresponding reference instances. Our best method achieves an accuracy between 92% and 100% in detecting if an LLM is contaminated with seven datasets, containing train and test/validation partitions, when contrasted with manual evaluation by human experts. Further, our findings indicate that GPT-4 is contaminated with AG News, WNLI, and XSum datasets.", "keywords": "Data Contamination;Large Language Models (LLMs);Guided Instruction;Memorization", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Shahriar Golchin;Mihai Surdeanu", "authorids": "~Shahriar_Golchin1;~Mihai_Surdeanu1", "gender": "M;", "homepage": ";http://surdeanu.info/mihai/", "dblp": ";18/3479", "google_scholar": "iBl-Yc8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": "shahriar-golchin;", "or_profile": "~Shahriar_Golchin1;~Mihai_Surdeanu1", "aff": "University of Arizona;University of Arizona", "aff_domain": "arizona.edu;arizona.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ngolchin2024time,\ntitle={Time Travel in {LLM}s: Tracing Data Contamination in Large Language Models},\nauthor={Shahriar Golchin and Mihai Surdeanu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2Rwq6c3tvr}\n}", "github": "", "project": "", "reviewers": "NUVj;HqCE;KgsM;6REU", "pdf_size": 259469, "rating": "6;6;8;8", "confidence": "3;4;3;2", "soundness": "3;2;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "63;94;93;120", "wc_strengths": "51;13;122;37", "wc_weaknesses": "88;284;19;110", "wc_questions": "101;2;38;94", "wc_review": "303;393;272;361", "wc_reply_reviewers": "0;32;0;168", "wc_reply_authors": "768;1196;368;955", "reply_reviewers": "0;1;0;2", "reply_authors": "1;2;1;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.5, 20.180436070610565 ], "wc_strengths_avg": [ 55.75, 40.59171713539598 ], "wc_weaknesses_avg": [ 125.25, 97.60987398823953 ], "wc_questions_avg": [ 58.75, 40.861809798392436 ], "wc_review_avg": [ 332.25, 47.441411235333206 ], "wc_reply_reviewers_avg": [ 50.0, 69.3685807840985 ], "wc_reply_authors_avg": [ 821.75, 302.7361681398508 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 143, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3876183311628258637&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=2Rwq6c3tvr", "pdf": "https://openreview.net/pdf?id=2Rwq6c3tvr", "email": "arizona.edu;arizona.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Arizona", "aff_unique_dep": "", "aff_unique_url": "https://www.arizona.edu", "aff_unique_abbr": "UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "2SuA42Mq1c", "title": "BMAD: Benchmarks for Medical Anomaly Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Anomaly detection (AD) is a fundamental research problem in machine learning and computer vision, with practical applications in industrial inspection, video surveillance, and medical diagnosis. In medical imaging, AD is especially vital for identifying anomalies that may indicate rare diseases or conditions. Despite its significance, there is a lack of a universal and fair benchmark for evaluating AD methods on medical images, which hinders the development of more generalized and robust AD methods in this specific domain. To bridge this gap, we introduce a comprehensive evaluation benchmark for assessing AD methods on medical images. This benchmark encompasses six reorganized datasets from five medical domains (i.e. brain MRI, liver CT, retinal OCT, chest X-ray, and digital histopathology) and three key evaluation metrics, and includes a total of fifteen state-of-the-art AD algorithms. This standardized and well-curated medical benchmark with the well-structured codebase enables comprehensive comparisons among recently proposed anomaly detection methods. It will facilitate the community to conduct a fair comparison and advance the field of AD on medical imaging.", "keywords": "Anomaly detection;Medical benchmarks", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Jinan Bao;Hanshi Sun;Hanqiu Deng;Yinsheng He;Zhaoxiang Zhang;Xingyu Li", "authorids": "~Jinan_Bao1;~Hanshi_Sun1;~Hanqiu_Deng1;~Yinsheng_He1;~Zhaoxiang_Zhang4;~Xingyu_Li3", "gender": "F;M;M;M;M;", "homepage": "https://apps.ualberta.ca/directory/person/jbao1;https://preminstrel.com/;;;;https://apps.ualberta.ca/directory/person/xingyu", "dblp": ";314/7377.html;298/6509;;;", "google_scholar": ";BjQHEh8AAAAJ;nmNQjgIAAAAJ;;;V8OICzYAAAAJ", "orcid": ";0009-0005-4436-234X;;;;", "linkedin": ";hanshi-sun-5b74b8228/;;yinsheng-he-ab90b3183/?originalSubdomain=ca;zhaoxiangzhangnonozz/;", "or_profile": "~Jinan_Bao1;~Hanshi_Sun1;~Hanqiu_Deng1;~Yinsheng_He1;~Zhaoxiang_Zhang4;~Xingyu_Li3", "aff": "University of Alberta;Carnegie Mellon University;University of Alberta;University of Alberta;;University of Alberta", "aff_domain": "ualberta.ca;cmu.edu;ualberta.ca;ualberta.ca;;ualberta.ca", "position": "MS student;MS student;PhD student;PhD student;;Assistant Professor", "bibtex": "@misc{\nbao2024bmad,\ntitle={{BMAD}: Benchmarks for Medical Anomaly Detection},\nauthor={Jinan Bao and Hanshi Sun and Hanqiu Deng and Yinsheng He and Zhaoxiang Zhang and Xingyu Li},\nyear={2024},\nurl={https://openreview.net/forum?id=2SuA42Mq1c}\n}", "github": "", "project": "", "reviewers": "csZB;jY9s;Y82y;SMmX", "site": "https://openreview.net/forum?id=2SuA42Mq1c", "pdf_size": 28467196, "rating": "3;3;5;6", "confidence": "4;4;4;3", "soundness": "4;2;2;3", "contribution": "2;1;2;3", "presentation": "3;2;2;3", "wc_summary": "63;81;59;27", "wc_strengths": "23;27;205;20", "wc_weaknesses": "21;73;248;35", "wc_questions": "127;1;28;121", "wc_review": "234;182;540;203", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "339;261;194;116", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 57.5, 19.461500456028563 ], "wc_strengths_avg": [ 68.75, 78.7031606735079 ], "wc_weaknesses_avg": [ 94.25, 90.78374028426015 ], "wc_questions_avg": [ 69.25, 55.616431924387236 ], "wc_review_avg": [ 289.75, 145.6612079450119 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 227.5, 82.32405480781422 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10416728347596820430&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Alberta;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ualberta.ca;https://www.cmu.edu", "aff_unique_abbr": "UAlberta;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Towards Offline Opponent Modeling with In-context Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19549", "id": "2SwHngthig", "author_site": "Yuheng Jing, Kai Li, Bingyun Liu, Yifan Zang, Haobo Fu, QIANG FU, Junliang Xing, Jian Cheng", "tldr": "", "abstract": "Opponent modeling aims at learning the opponent's behaviors, goals, or beliefs to reduce the uncertainty of the competitive environment and assist decision-making. Existing work has mostly focused on learning opponent models online, which is impractical and inefficient in practical scenarios. To this end, we formalize an Offline Opponent Modeling (OOM) problem with the objective of utilizing pre-collected offline datasets to learn opponent models that characterize the opponent from the viewpoint of the controlled agent, which aids in adapting to the unknown fixed policies of the opponent. Drawing on the promises of the Transformers for decision-making, we introduce a general approach, Transformer Against Opponent (TAO), for OOM. Essentially, TAO tackles the problem by harnessing the full potential of the supervised pre-trained Transformers' in-context learning capabilities. The foundation of TAO lies in three stages: an innovative offline policy embedding learning stage, an offline opponent-aware response policy training stage, and a deployment stage for opponent adaptation with in-context learning. Theoretical analysis establishes TAO's equivalence to Bayesian posterior sampling in opponent modeling and guarantees TAO's convergence in opponent policy recognition. Extensive experiments and ablation studies on competitive environments with sparse and dense rewards demonstrate the impressive performance of TAO. Our approach manifests remarkable prowess for fast adaptation, especially in the face of unseen opponent policies, confirming its in-context learning potency.", "keywords": "Opponent Modeling;Offline;Transformer", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/ea211856fbe4c2b8d7cb871c578d530ac4bcec5c.zip", "author": "Yuheng Jing;Kai Li;Bingyun Liu;Yifan Zang;Haobo Fu;QIANG FU;Junliang Xing;Jian Cheng", "authorids": "~Yuheng_Jing1;~Kai_Li2;~Bingyun_Liu1;~Yifan_Zang1;~Haobo_Fu2;~QIANG_FU8;~Junliang_Xing1;~Jian_Cheng7", "gender": "M;M;M;M;M;M;M;", "homepage": ";;;;;http://people.ucas.ac.cn/~jlxing?language=en;https://people.ucas.ac.cn/~chengjian?language=en;https://github.com/liuby26", "dblp": "382/3906;181/2853;269/4608;85/8571;;43/7659.html;14/6145-1;", "google_scholar": ";_cY_PXgAAAAJ;;LFdJXNcAAAAJ;gANaxT0AAAAJ;jSwNd3MAAAAJ;ZGCIUJ8AAAAJ;", "orcid": ";;;;;0000-0001-6801-0510;0000-0003-1289-2758;", "linkedin": "jingyuheng;;;haobo-fu-382b0784/;;https://www.linkedin.cn/incareer/in/ACoAAAvlU14B40ZWH1pxg5JJDtQ6LlgMYkp0e5s;;", "or_profile": "~Yuheng_Jing1;~Kai_Li2;~Yifan_Zang1;~Haobo_Fu2;~QIANG_FU8;~Junliang_Xing1;~Jian_Cheng7;~Liu_Bingyun1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Tencent AI Lab;Tencent AI Lab;Tsinghua University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ucas.ac.cn;tencent.com;tencent.com;tsinghua.edu.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;Associate Professor;PhD student;Principal Researcher;Principal Researcher;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\njing2024towards,\ntitle={Towards Offline Opponent Modeling with In-context Learning},\nauthor={Yuheng Jing and Kai Li and Bingyun Liu and Yifan Zang and Haobo Fu and QIANG FU and Junliang Xing and Jian Cheng},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2SwHngthig}\n}", "github": "", "project": "", "reviewers": "fLaD;F2KT;Z2nF;QAav", "pdf_size": 1562635, "rating": "5;6;6;6", "confidence": "3;3;4;4", "soundness": "1;2;3;3", "contribution": "2;2;2;3", "presentation": "1;2;3;2", "wc_summary": "120;83;114;98", "wc_strengths": "78;90;23;68", "wc_weaknesses": "267;252;61;188", "wc_questions": "17;805;274;45", "wc_review": "482;1230;472;399", "wc_reply_reviewers": "48;503;191;0", "wc_reply_authors": "1711;4570;2118;1140", "reply_reviewers": "1;1;2;0", "reply_authors": "4;8;6;4", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 103.75, 14.428704030508076 ], "wc_strengths_avg": [ 64.75, 25.33155147242269 ], "wc_weaknesses_avg": [ 192.0, 81.24346127535433 ], "wc_questions_avg": [ 285.25, 316.20592578255076 ], "wc_review_avg": [ 645.75, 338.83504467513393 ], "wc_reply_reviewers_avg": [ 185.5, 196.31161453159106 ], "wc_reply_authors_avg": [ 2384.75, 1308.6075376139327 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 5.5, 1.6583123951777 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9282177930204355332&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=2SwHngthig", "pdf": "https://openreview.net/pdf?id=2SwHngthig", "email": "ia.ac.cn;ia.ac.cn;ucas.ac.cn;tencent.com;tencent.com;tsinghua.edu.cn;ia.ac.cn;ia.ac.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;2;3;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Tencent;Tsinghua University", "aff_unique_dep": "Institute of Automation;;Tencent AI Lab;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn;https://ai.tencent.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CAS;UCAS;Tencent AI Lab;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "2TFfLiTGBS", "title": "DART: A Principled Approach to Adversarially Robust Unsupervised Domain Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Distribution shifts and adversarial examples are two major challenges for deploying machine learning models. While these challenges have been studied individually, their combination is an important topic that remains relatively under-explored. In this work, we study the problem of adversarial robustness under a common setting of distribution shift \u2013 unsupervised domain adaptation (UDA). Specifically, given a labeled source domain $\\mathcal{D}_S$ and an unlabeled target domain $\\mathcal{D}_T$ with related but different distributions, the goal is to obtain an adversarially robust model for $\\mathcal{D}_T$. The absence of target domain labels poses a unique challenge, as conventional adversarial robustness defenses cannot be directly applied to $\\mathcal{D}_T$. To address this challenge, we first establish a generalization bound for the adversarial target loss, which consists of (i) terms related to the loss on the data, and (ii) a measure of worst-case domain divergence. Motivated by this bound, we develop a novel unified defense framework called *Divergence Aware adveRsarial Training* (DART), which can be used in conjunction with a variety of standard UDA methods; e.g., DANN (Ganin & Lempitsky, 2015). DART is applicable to general threat models, including the popular $\\ell_p$-norm model, and does not require heuristic regularizers or architectural changes. We also release DomainRobust: a testbed for evaluating robustness of UDA models to adversarial attacks. DomainRobust consists of 4 multi-domain benchmark datasets (with 46 source-target pairs) and 7 meta-algorithms with a total of 11 variants. Our large-scale experiments demonstrate that on average, DART significantly enhances model robustness on all benchmarks compared to the state of the art, while maintaining competitive standard accuracy. The relative improvement in robustness from DART reaches up to 29.2% on the source-target domain pairs considered.", "keywords": "Unsupervised Domain Adaptation;Adversarial Robustness", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/2b121ce5b467f20c612b9cac2cd9bc67cb561fcd.pdf", "author": "Yunjuan Wang;Hussein Hazimeh;Natalia Ponomareva;Alexey Kurakin;Ibrahim Hammoud;Raman Arora", "authorids": "~Yunjuan_Wang1;~Hussein_Hazimeh1;~Natalia_Ponomareva1;~Alexey_Kurakin1;~Ibrahim_Hammoud1;~Raman_Arora1", "gender": "F;;F;M;M;M", "homepage": "https://yunjuanwang.github.io/;http://www.mit.edu/~hazimeh;;http://kurakin.me;https://ibrahmd.github.io/;http://www.cs.jhu.edu/~raman/Home.html", "dblp": "31/560;165/0820-1;71/6768-1;56/9834;;", "google_scholar": "t_VSEEwAAAAJ;;eIdQR5oAAAAJ;nCh4qyMAAAAJ;KVsJ87cAAAAJ;Spe0xdkAAAAJ", "orcid": ";0000-0003-4501-0678;0009-0005-6761-1468;;;", "linkedin": "yunjuan-wang-12ab85169/;;;;;", "or_profile": "~Yunjuan_Wang1;~Hussein_Hazimeh1;~Natalia_Ponomareva1;~Alexey_Kurakin1;~Ibrahim_Hammoud1;~Raman_Arora1", "aff": "Johns Hopkins University;Google;Google;Research, Google;Google;Johns Hopkins University", "aff_domain": "jhu.edu;google.com;google.com;research.google.com;google.com;jhu.edu", "position": "PhD student;Research Scientist;Software Engineer in Research;Research Software Engineer;Researcher;Associate Professor", "bibtex": "@misc{\nwang2024dart,\ntitle={{DART}: A Principled Approach to Adversarially Robust Unsupervised Domain Adaptation},\nauthor={Yunjuan Wang and Hussein Hazimeh and Natalia Ponomareva and Alexey Kurakin and Ibrahim Hammoud and Raman Arora},\nyear={2024},\nurl={https://openreview.net/forum?id=2TFfLiTGBS}\n}", "github": "", "project": "", "reviewers": "vW45;ag53;d4Ff;qNjr", "site": "https://openreview.net/forum?id=2TFfLiTGBS", "pdf_size": 686187, "rating": "3;3;6;6", "confidence": "4;5;3;3", "soundness": "1;2;2;3", "contribution": "2;1;2;3", "presentation": "2;2;3;3", "wc_summary": "74;59;49;65", "wc_strengths": "55;29;65;63", "wc_weaknesses": "307;145;84;105", "wc_questions": "29;1;5;32", "wc_review": "465;234;203;265", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1342;317;1382;1072", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;5;4", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 61.75, 9.093266739736606 ], "wc_strengths_avg": [ 53.0, 14.352700094407323 ], "wc_weaknesses_avg": [ 160.25, 87.51392746300442 ], "wc_questions_avg": [ 16.75, 13.863170633011771 ], "wc_review_avg": [ 291.75, 102.39964599548183 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1028.25, 427.6004998827761 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4090061530743553470&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Johns Hopkins University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.jhu.edu;https://www.google.com", "aff_unique_abbr": "JHU;Google", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2TOcJivjpt", "title": "Poor Teaching: Explore and Question Knowledge Distillation under Distribution Shift", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Knowledge distillation techniques transfer knowledge from a complex or large learning model into a small model, and have made remarkable achievements in recent decades. However, few studies has investigated and explored the mechanism of the knowledge distillation against distribution shifts in real scenarios. In this paper, we reconsider the knowledge distillation paradigm under the shift situations, by reformulating the objectives of distillation with multiple domains. Under the novel paradigm, we propose a unified and systematic evaluation framework to benchmark knowledge distillation against two general distributional shifts including diversity and correlation shift. \nThe evaluation benchmark covers more than 20 methods from algorithmic, data-driven, and optimization perspectives for five benchmark datasets. Extensive experiments are constructed and some constructive findings are summarized to explain when and how the existing knowledge distillation methods work against distribution shifts.", "keywords": "Knowledge Distillation;Distribution Shift", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/6693baad034702ac4a71dc4dfc60ffe0ea4ce3c3.zip", "author": "Songming Zhang;Ziyu Lyu;Xiaofeng Chen", "authorids": "~Songming_Zhang2;~Ziyu_Lyu1;~Xiaofeng_Chen5", "gender": "M;;M", "homepage": ";;", "dblp": "315/4171-2;;c/XiaofengChen9", "google_scholar": "ITQNuIIAAAAJ;;", "orcid": "0000-0001-7695-5880;;0000-0003-4062-4515", "linkedin": "songming-zhang-987589321/;;", "or_profile": "~Songming_Zhang2;~Ziyu_Lyu1;~Xiaofeng_Chen5", "aff": "Chongqing Jiaotong Universiity;;Chongqing Jiaotong Universiity", "aff_domain": "cqjtu.edu.cn;;cqjtu.edu.cn", "position": "MS student;;Full Professor", "bibtex": "@misc{\nzhang2024poor,\ntitle={Poor Teaching: Explore and Question Knowledge Distillation under Distribution Shift},\nauthor={Songming Zhang and Ziyu Lyu and Xiaofeng Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=2TOcJivjpt}\n}", "github": "", "project": "", "reviewers": "Uh6z;ditG;Ywf2;m1kT", "site": "https://openreview.net/forum?id=2TOcJivjpt", "pdf_size": 750173, "rating": "1;3;3;5", "confidence": "5;5;4;3", "soundness": "2;2;1;3", "contribution": "1;2;2;2", "presentation": "1;3;2;3", "wc_summary": "100;52;58;69", "wc_strengths": "62;26;38;57", "wc_weaknesses": "442;229;287;233", "wc_questions": "3;3;2;6", "wc_review": "607;310;385;365", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 69.75, 18.498310733685926 ], "wc_strengths_avg": [ 45.75, 14.49784466739798 ], "wc_weaknesses_avg": [ 297.75, 86.37527134544933 ], "wc_questions_avg": [ 3.5, 1.5 ], "wc_review_avg": [ 416.75, 113.22185080628209 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ou7d6aYqByEJ:scholar.google.com/&scioq=Poor+Teaching:+Explore+and+Question+Knowledge+Distillation+under+Distribution+Shift&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Chongqing Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "http://www.cqjtu.edu.cn", "aff_unique_abbr": "CQJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "2UlfvGU6rL", "title": "Equivariant Graph Neural Operator for Modeling 3D Dynamics", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modeling the complex three-dimensional (3D) dynamics of relational systems is an important problem in the natural sciences, with applications ranging from molecular simulations to particle mechanics. Machine learning methods have achieved good success by learning graph neural networks to model spatial interactions. However, these approaches do not faithfully capture temporal correlations since they only model next-step predictions. In this work, we propose Equivariant Graph Neural Operator (EGNO), a novel and principled method that directly models dynamics as trajectories instead of just as next-step prediction. Different from existing methods, EGNO explicitly learns the temporal evolution of 3D dynamics where we formulate the dynamics as a function over time and learn neural operators to approximate it. To capture the temporal correlations while keeping the intrinsic SE(3)-equivariance, we develop equivariant temporal convolutions parameterized in the Fourier space and build EGNO by stacking the Fourier layers over equivariant networks. Comprehensive experiments in multiple domains, including particle simulations, human motion capture, and molecular dynamics, demonstrate the significantly superior performance of EGNO against existing methods, thanks to the equivariant temporal modeling.", "keywords": "Equivariant Graph Neural Network;Neural Operator;3D Dynamics", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/32ad867085ddb16eb5fc67e72aca0bd98bf9f8d1.zip", "author": "Minkai Xu;Jiaqi Han;Aaron Lou;Kamyar Azizzadenesheli;Stefano Ermon;Anima Anandkumar", "authorids": "~Minkai_Xu1;~Jiaqi_Han2;~Aaron_Lou1;~Kamyar_Azizzadenesheli1;~Stefano_Ermon1;~Anima_Anandkumar1", "gender": "M;M;M;M;M;F", "homepage": "https://minkaixu.com;https://hanjq17.github.io;https://aaronlou.com;https://kamyar.page/;http://cs.stanford.edu/~ermon/;http://tensorlab.cms.caltech.edu/users/anima/", "dblp": "257/3355;235/0412;232/3858;176/5584;47/8135;", "google_scholar": "https://scholar.google.com/citations?hl=en;AKppgMAAAAAJ;;CxAS4SQAAAAJ;;bEcLezcAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;anima-anandkumar-35171b1/", "or_profile": "~Minkai_Xu1;~Jiaqi_Han2;~Aaron_Lou1;~Kamyar_Azizzadenesheli1;~Stefano_Ermon1;~anima_anandkumar1", "aff": "Stanford University;Computer Science Department, Stanford University;Stanford University;NVIDIA;Stanford University;California Institute of Technology", "aff_domain": "stanford.edu;cs.stanford.edu;stanford.edu;nvidia.com;stanford.edu;caltech.edu", "position": "PhD student;PhD student;PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@misc{\nxu2024equivariant,\ntitle={Equivariant Graph Neural Operator for Modeling 3D Dynamics},\nauthor={Minkai Xu and Jiaqi Han and Aaron Lou and Kamyar Azizzadenesheli and Stefano Ermon and Anima Anandkumar},\nyear={2024},\nurl={https://openreview.net/forum?id=2UlfvGU6rL}\n}", "github": "", "project": "", "reviewers": "kxN5;vFqH;mHw8;AtkK", "site": "https://openreview.net/forum?id=2UlfvGU6rL", "pdf_size": 3142918, "rating": "5;5;6;8", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "contribution": "2;4;3;3", "presentation": "2;3;3;3", "wc_summary": "57;153;47;76", "wc_strengths": "51;43;81;118", "wc_weaknesses": "92;84;126;3", "wc_questions": "120;76;90;22", "wc_review": "320;356;344;219", "wc_reply_reviewers": "0;107;18;0", "wc_reply_authors": "1240;1312;944;101", "reply_reviewers": "0;1;1;0", "reply_authors": "3;3;3;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.25, 41.595522595587134 ], "wc_strengths_avg": [ 73.25, 29.46502163583119 ], "wc_weaknesses_avg": [ 76.25, 45.135213525583325 ], "wc_questions_avg": [ 77.0, 35.510561809129406 ], "wc_review_avg": [ 309.75, 53.97395205096621 ], "wc_reply_reviewers_avg": [ 31.25, 44.347350540928595 ], "wc_reply_authors_avg": [ 899.25, 481.06100184903784 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10943342957164133565&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Stanford University;NVIDIA;California Institute of Technology", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.stanford.edu;https://www.nvidia.com;https://www.caltech.edu", "aff_unique_abbr": "Stanford;NVIDIA;Caltech", "aff_campus_unique_index": "0;0;0;0;2", "aff_campus_unique": "Stanford;;Pasadena", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unbalancedness in Neural Monge Maps Improves Unpaired Domain Translation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19548", "id": "2UnCj3jeao", "author_site": "Luca Eyring, Dominik Klein, Th\u00e9o Uscidda, Giovanni Palla, Niki Kilbertus, Zeynep Akata, Fabian Theis", "tldr": "", "abstract": "In optimal transport (OT), a Monge map is known as a mapping that transports a source distribution to a target distribution in the most cost-efficient way. Recently, multiple neural estimators for Monge maps have been developed and applied in diverse unpaired domain translation tasks, e.g. in single-cell biology and computer vision. However, the classic OT framework enforces mass conservation, which\nmakes it prone to outliers and limits its applicability in real-world scenarios. The latter can be particularly harmful in OT domain translation tasks, where the relative position of a sample within a distribution is explicitly taken into account. While unbalanced OT tackles this challenge in the discrete setting, its integration into neural Monge map estimators has received limited attention. We propose a theoretically\ngrounded method to incorporate unbalancedness into any Monge map estimator. We improve existing estimators to model cell trajectories over time and to predict cellular responses to perturbations. Moreover, our approach seamlessly integrates with the OT flow matching (OT-FM) framework. While we show that OT-FM performs competitively in image translation, we further improve performance by\nincorporating unbalancedness (UOT-FM), which better preserves relevant features. We hence establish UOT-FM as a principled method for unpaired image translation.", "keywords": "optimal transport;domain translation;image translation;flow matching", "primary_area": "generative models", "supplementary_material": "", "author": "Luca Eyring;Dominik Klein;Th\u00e9o Uscidda;Giovanni Palla;Niki Kilbertus;Zeynep Akata;Fabian J Theis", "authorids": "~Luca_Eyring1;~Dominik_Klein1;~Th\u00e9o_Uscidda1;~Giovanni_Palla1;~Niki_Kilbertus1;~Zeynep_Akata1;~Fabian_J_Theis1", "gender": "M;M;M;;F;;M", "homepage": "https://scholar.google.com/citations?user=Zs1w-ukAAAAJ&hl=en&oi=ao;https://theouscidda6.github.io/;https://giovannipalla.com/;;https://eml-unitue.de/people/zeynep-akata;https://www.helmholtz-munich.de/en/icb/pi/fabian-theis;https://lucaeyring.com/", "dblp": ";;;202/1966;117/4838;t/FabianJTheis;361/7132", "google_scholar": "Zs1w-ukAAAAJ;xnQZonMAAAAJ;20uwxzkAAAAJ;uQZjTq4AAAAJ;jQl9RtkAAAAJ;sqWpn2AAAAAJ;", "orcid": ";;0000-0002-8004-4462;;0000-0002-1432-7747;0000-0002-2419-1943;", "linkedin": ";th\u00e9o-uscidda-926335174/;;;zeynep-akata-36182045/?ppe=1;;luca-eyring-74abb01b2/", "or_profile": "~Dominik_Klein1;~Th\u00e9o_Uscidda1;~Giovanni_Palla1;~Niki_Kilbertus1;~Zeynep_Akata1;~Fabian_J._Theis1;~Luca_Vincent_Eyring1", "aff": ";Ecole Nationale de la Statistique et de l'Administration Economique;Technische Universit\u00e4t M\u00fcnchen;Helmholtz AI;Helmholtz Munich;Technical University Munich;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": ";ensae.fr;tum.de;helmholtz-muenchen.de;helmholtz-munich.de;tum.de;uni-tuebingen.de", "position": ";PhD student;PhD student;Group Leader;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\neyring2024unbalancedness,\ntitle={Unbalancedness in Neural Monge Maps Improves Unpaired Domain Translation},\nauthor={Luca Eyring and Dominik Klein and Th{\\'e}o Uscidda and Giovanni Palla and Niki Kilbertus and Zeynep Akata and Fabian J Theis},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2UnCj3jeao}\n}", "github": "", "project": "", "reviewers": "1iBx;Rczx;qjsW;LDyN", "pdf_size": 10584155, "rating": "6;6;6;6", "confidence": "3;3;3;4", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "60;120;117;33", "wc_strengths": "46;64;131;54", "wc_weaknesses": "41;37;78;144", "wc_questions": "162;288;40;6", "wc_review": "309;509;366;237", "wc_reply_reviewers": "42;48;0;0", "wc_reply_authors": "993;943;561;439", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.5, 37.259227045122664 ], "wc_strengths_avg": [ 73.75, 33.662850443775554 ], "wc_weaknesses_avg": [ 75.0, 42.924352062669506 ], "wc_questions_avg": [ 124.0, 111.04053313993049 ], "wc_review_avg": [ 355.25, 99.84581864054198 ], "wc_reply_reviewers_avg": [ 22.5, 22.599778759979046 ], "wc_reply_authors_avg": [ 734.0, 238.5979882563975 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16466905825496249577&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=2UnCj3jeao", "pdf": "https://openreview.net/pdf?id=2UnCj3jeao", "email": ";ensae.fr;tum.de;helmholtz-muenchen.de;helmholtz-munich.de;tum.de;uni-tuebingen.de", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique;Technische Universit\u00e4t M\u00fcnchen;Helmholtz Association of German Research Centres;Helmholtz Zentrum M\u00fcnchen;Technical University of Munich;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";;Helmholtz AI;;;", "aff_unique_url": "https://ensae.fr;https://www.tum.de;https://www.helmholtz-ai.de;https://www.helmholtz-muenchen.de;https://www.tum.de;https://www.uni-tuebingen.de/", "aff_unique_abbr": "ENSAE;TUM;Helmholtz AI;HMGU;TUM;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "France;Germany" }, { "id": "2UxSXuzrap", "title": "Learning the Unlearnable: Adversarial Augmentations Suppress Unlearnable Example Attacks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Unlearnable example attacks are data poisoning techniques that can be used to safeguard public data against unauthorized use for training deep learning models. These methods add stealthy perturbations to the original image, thereby making it difficult for deep learning models to learn from these training data effectively. Current research suggests that adversarial training can, to a certain degree, mitigate the impact of unlearnable example attacks, while common data augmentation methods are not effective against such poisons. Adversarial training, however, demands considerable computational resources and can result in non-trivial accuracy loss. In this paper, we introduce the UEraser method, which outperforms current defenses against different types of state-of-the-art unlearnable example attacks through a combination of effective data augmentation policies and loss-maximizing adversarial augmentations. In stark contrast to the current SOTA adversarial training methods, UEraser uses adversarial augmentations, which extends beyond the confines of $\\ell_p$ perturbation budget assumed by current unlearning attacks and defenses. It also helps to improve the model's generalization ability, thus protecting against accuracy loss. UEraser wipes out the unlearning effect with loss-maximizing adversarial augmentations, thus restoring trained model accuracies. Interestingly, UEraser-Lite, a fast variant without adversarial augmentations, is also highly effective in preserving clean accuracies. On challenging unlearnable CIFAR-10, CIFAR-100, SVHN, and ImageNet-subset datasets produced with various attacks, it achieves results that are comparable to those obtained during clean training. We also demonstrate its efficacy against possible adaptive attacks. Our code is open source and available to the deep learning community.", "keywords": "Availability Attacks;Unlearnable Examples;Adversarial Machine Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/cb66b6bd43a3dc69768c49799caf6defcb95f2d5.zip", "author": "Tianrui Qin;Xitong Gao;Juanjuan Zhao;Kejiang Ye;Cheng-zhong Xu", "authorids": "~Tianrui_Qin1;~Xitong_Gao1;~Juanjuan_Zhao1;~Kejiang_Ye1;~Cheng-zhong_Xu1", "gender": "M;M;F;;", "homepage": "https://github.com/Tianyue818;https://github.com/admk;https://people.ucas.ac.cn/~zhaojuanjuan?language=cn;;", "dblp": "294/4992;140/2071;;;", "google_scholar": "_bOMXMkAAAAJ;-YIUCL8AAAAJ;;;", "orcid": "0009-0002-8386-2003;0000-0002-2063-2051;;;", "linkedin": ";;;;", "or_profile": "~Tianrui_Qin1;~Xitong_Gao1;~Juanjuan_Zhao1;~Kejiang_Ye1;~Cheng-zhong_Xu1", "aff": "Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;;", "aff_domain": "siat.ac.cn;siat.ac.cn;siat.ac.cn;;", "position": "MS student;Researcher;Associate Professor;;", "bibtex": "@misc{\nqin2024learning,\ntitle={Learning the Unlearnable: Adversarial Augmentations Suppress Unlearnable Example Attacks},\nauthor={Tianrui Qin and Xitong Gao and Juanjuan Zhao and Kejiang Ye and Cheng-zhong Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=2UxSXuzrap}\n}", "github": "", "project": "", "reviewers": "iWnX;3r2o;E5wZ;6kgo", "site": "https://openreview.net/forum?id=2UxSXuzrap", "pdf_size": 3562476, "rating": "1;3;6;6", "confidence": "5;4;2;3", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "181;77;72;72", "wc_strengths": "29;55;50;109", "wc_weaknesses": "547;365;112;103", "wc_questions": "3;210;11;2", "wc_review": "760;707;245;286", "wc_reply_reviewers": "402;0;0;0", "wc_reply_authors": "808;657;191;210", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 4.0, 2.1213203435596424 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.5, 46.521500405726385 ], "wc_strengths_avg": [ 60.75, 29.51588555337617 ], "wc_weaknesses_avg": [ 281.75, 185.77859806769993 ], "wc_questions_avg": [ 56.5, 88.69188237939254 ], "wc_review_avg": [ 499.5, 235.19619469710815 ], "wc_reply_reviewers_avg": [ 100.5, 174.07110616067217 ], "wc_reply_authors_avg": [ 466.5, 271.38763789089586 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9486832980505139, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12527428757192423846&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Shenzhen Institute of Advanced Technology", "aff_unique_dep": "Shenzhen Institutes of Advanced Technology;", "aff_unique_url": "http://www.cas.cn;http://www.siat.cas.cn", "aff_unique_abbr": "CAS;SIAT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "On the Over-Memorization During Natural, Robust and Catastrophic Overfitting", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19547", "id": "2V1Z0Jdmss", "author_site": "Runqi Lin, Chaojian Yu, Bo Han, Tongliang Liu", "tldr": "", "abstract": "Overfitting negatively impacts the generalization ability of deep neural networks (DNNs) in both natural and adversarial training. Existing methods struggle to consistently address different types of overfitting, typically designing strategies that focus separately on either natural or adversarial patterns. In this work, we adopt a unified perspective by solely focusing on natural patterns to explore different types of overfitting. Specifically, we examine the memorization effect in DNNs and reveal a shared behaviour termed over-memorization, which impairs their generalization capacity. This behaviour manifests as DNNs suddenly becoming high-confidence in predicting certain training patterns and retaining a persistent memory for them. Furthermore, when DNNs over-memorize an adversarial pattern, they tend to simultaneously exhibit high-confidence prediction for the corresponding natural pattern. These findings motivate us to holistically mitigate different types of overfitting by hindering the DNNs from over-memorization training patterns. To this end, we propose a general framework, $\\textit{Distraction Over-Memorization}$ (DOM), which explicitly prevents over-memorization by either removing or augmenting the high-confidence natural patterns. Extensive experiments demonstrate the effectiveness of our proposed method in mitigating overfitting across various training paradigms.", "keywords": "overfitting;natural overfitting;robust overfitting;catastrophic overfitting", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Runqi Lin;Chaojian Yu;Bo Han;Tongliang Liu", "authorids": "~Runqi_Lin1;~Chaojian_Yu1;~Bo_Han1;~Tongliang_Liu1", "gender": "M;M;M;M", "homepage": "https://runqilin.github.io;;https://tongliang-liu.github.io/;https://bhanml.github.io/", "dblp": "359/1108;223/9872;150/6667;241/0472-3", "google_scholar": "Zg7PKbcAAAAJ;b3ltuG8AAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;nTNjqHwAAAAJ", "orcid": "0009-0000-6607-7754;;;", "linkedin": ";;;", "or_profile": "~Runqi_Lin1;~Chaojian_Yu1;~Tongliang_Liu1;~bo_han2", "aff": "University of Sydney;The University of Sydney;Mohamed bin Zayed University of Artificial Intelligence;MBZUAI", "aff_domain": "usyd.edu.au;uni.sydney.edu.au;mbzuai.ac.ae;mbzuai.ac.ae", "position": "PhD student;PhD student;Affiliated Associate Professor;Researcher", "bibtex": "@inproceedings{\nlin2024on,\ntitle={On the Over-Memorization During Natural, Robust and Catastrophic Overfitting},\nauthor={Runqi Lin and Chaojian Yu and Bo Han and Tongliang Liu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2V1Z0Jdmss}\n}", "github": "", "project": "", "reviewers": "uFwf;mcnh;3ew1;2dX7", "pdf_size": 12791991, "rating": "5;6;6;8", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "162;44;121;79", "wc_strengths": "107;40;105;42", "wc_weaknesses": "253;128;140;126", "wc_questions": "114;5;87;2", "wc_review": "636;217;453;249", "wc_reply_reviewers": "348;0;0;0", "wc_reply_authors": "1246;1087;575;709", "reply_reviewers": "2;0;0;0", "reply_authors": "6;4;3;3", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.5, 44.30857704779064 ], "wc_strengths_avg": [ 73.5, 32.515380975778214 ], "wc_weaknesses_avg": [ 161.75, 52.954579594214515 ], "wc_questions_avg": [ 52.0, 49.441885077330944 ], "wc_review_avg": [ 388.75, 169.03309587178484 ], "wc_reply_reviewers_avg": [ 87.0, 150.68842025849233 ], "wc_reply_authors_avg": [ 904.25, 272.35948211876155 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14175200537487355709&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=2V1Z0Jdmss", "pdf": "https://openreview.net/pdf?id=2V1Z0Jdmss", "email": "usyd.edu.au;uni.sydney.edu.au;mbzuai.ac.ae;mbzuai.ac.ae", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Sydney;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;https://mbzuai.ac.ae", "aff_unique_abbr": "USYD;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Australia;United Arab Emirates" }, { "id": "2VAi5F9BOJ", "title": "PLPP: PROMPT LEARNING WITH PERPLEXITY FOR VISION-LANGUAGE MODELS", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Pre-trained vision-language (VL) models such as CLIP have demonstrated their excellent performance across numerous downstream tasks. A recent method, called Context Optimization (CoOp), further improves the performance of CLIP on downstream tasks by introducing prompt learning. CoOp optimizes a set of learnable vectors, aka prompt and freezes the whole CLIP model, instead of using manually crafted templates (e.g., a template ``a photo of a \\{category\\}'') to fine-tune the CLIP model. Nonetheless, we observed that the resulting prompts are always incomprehensible, which is counter-intuitive, and existing CoOp-based methods overlook this issue. As the first work aiming at learning comprehensible prompts, this paper proposes to use Perplexity to supervise the process of prompt learning in the CoOp framework. Perplexity is a metric to evaluate the quality of a language model (LM) in Natural Language Processing field, and we design a two-step operation to compute the perplexity for prompts. The first step is a calculation of cosine similarity to obtain the labels of vectors, and the second step is a training-free LM Head to output word probability distribution. Our proposed method, i.e., \\textbf{P}rompt \\textbf{L}earning with \\textbf{P}er\\textbf{P}lexity (PLPP), can be integrated in any CoOp-based method and the experiments show that the learned prompts are much more comprehensible compared with the original and an improved CoOp methods, without sacrificing model accuracy. Codes are available at \\href{https://github.com}{https://github.com}.", "keywords": "Vision-Language Models;Prompt Learning;Perplexity.", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/d0440dfa3c5eaa5e0e5608f7abe900e2d2e07894.pdf", "author": "Biao Liu;Wenyi Fang;Xiaoyu Wu;YANG ZHENG;zheng hu;Bo Yuan", "authorids": "~Biao_Liu2;~Wenyi_Fang2;~Xiaoyu_Wu3;~YANG_ZHENG5;~zheng_hu3;~Bo_Yuan14", "gender": "M;M;F;M;;M", "homepage": ";https://www.researchgate.net/profile/Wenyi-Fang-2;;;http://www.none.com;http://cse.sustech.edu.cn/faculty/~yuanb", "dblp": ";;;;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;bNn7VkEAAAAJ;Vi5OPpoAAAAJ;;", "orcid": ";;;0000-0002-3775-6739;;", "linkedin": ";;;;;", "or_profile": "~Biao_Liu2;~Wenyi_Fang2;~Xiaoyu_Wu3;~YANG_ZHENG5;~zheng_hu3;~Bo_Yuan14", "aff": "Southern University of Science and Technology;;;Huawei Technologies Ltd.;Huawei Technologies Ltd.;", "aff_domain": "sustech.edu.cn;;;huawei.com;huawei.com;", "position": "MS student;;;Researcher;Principal Researcher;", "bibtex": "@misc{\nliu2024plpp,\ntitle={{PLPP}: {PROMPT} {LEARNING} {WITH} {PERPLEXITY} {FOR} {VISION}-{LANGUAGE} {MODELS}},\nauthor={Biao Liu and Wenyi Fang and Xiaoyu Wu and YANG ZHENG and zheng hu and Bo Yuan},\nyear={2024},\nurl={https://openreview.net/forum?id=2VAi5F9BOJ}\n}", "github": "", "project": "", "reviewers": "6oUS;NG2C;42yP;hPSs", "site": "https://openreview.net/forum?id=2VAi5F9BOJ", "pdf_size": 1268300, "rating": "1;3;3;3", "confidence": "4;5;5;5", "soundness": "1;2;2;2", "contribution": "1;2;1;2", "presentation": "2;3;2;2", "wc_summary": "58;67;45;84", "wc_strengths": "7;30;31;118", "wc_weaknesses": "263;305;224;426", "wc_questions": "86;5;5;250", "wc_review": "414;407;305;878", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "392;338;447;171", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 63.5, 14.186260959111108 ], "wc_strengths_avg": [ 46.5, 42.38218965556169 ], "wc_weaknesses_avg": [ 304.5, 75.77103668289091 ], "wc_questions_avg": [ 86.5, 100.02124774266716 ], "wc_review_avg": [ 501.0, 221.89524555519435 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 337.0, 103.29811227704019 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w-vWvHN_dSEJ:scholar.google.com/&scioq=PLPP:+PROMPT+LEARNING+WITH+PERPLEXITY+FOR+VISION-LANGUAGE+MODELS&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Southern University of Science and Technology;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.sustech.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SUSTech;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "sRGB Real Noise Modeling via Noise-Aware Sampling with Normalizing Flows", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19546", "id": "2XBBumBGeP", "author_site": "Dongjin Kim, Donggoo Jung, Sungyong Baik, Tae Hyun Kim", "tldr": "", "abstract": "Noise poses a widespread challenge in signal processing, particularly when it comes to denoising images. Although convolutional neural networks (CNNs) have exhibited remarkable success in this field, they are predicated upon the belief that noise follows established distributions, which restricts their practicality when dealing with real-world noise. To overcome this limitation, several efforts have been taken to collect noisy image datasets from the real world. Generative methods, employing techniques such as generative adversarial networks (GANs) and normalizing flows (NFs), have emerged as a solution for generating realistic noisy images. Recent works model noise using camera metadata, however requiring metadata even for sampling phase. In contrast, in this work, we aim to estimate the underlying camera settings, enabling us to improve noise modeling and generate diverse noise distributions. To this end, we introduce a new NF framework that allows us to both classify noise based on camera settings and generate various noisy images. Through experimental results, our model demonstrates exceptional noise quality and leads in denoising performance on benchmark datasets.", "keywords": "sRGB real noise modeling;Normalizing flow;Low-level vision", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Dongjin Kim;Donggoo Jung;Sungyong Baik;Tae Hyun Kim", "authorids": "~Dongjin_Kim3;~Donggoo_Jung1;~Sungyong_Baik1;~Tae_Hyun_Kim2", "gender": ";M;M;M", "homepage": "https://sites.google.com/view/lliger9/;https://donggoo-jung.github.io;https://dsybaik-hy.github.io/;https://sites.google.com/view/lliger9/", "dblp": "16/9611-4;;243/2775;43/11343-6", "google_scholar": "https://scholar.google.co.kr/citations?user=6I9aJxYAAAAJ;https://scholar.google.co.kr/citations?user=yXJ05SwAAAAJ;lQ4gotkAAAAJ;https://scholar.google.co.kr/citations?user=8soccsoAAAAJ", "orcid": ";;;0000-0002-7995-3984", "linkedin": ";;;", "or_profile": "~Dongjin_Kim3;~Donggoo_Jung1;~Sungyong_Baik1;~Tae_Hyun_Kim2", "aff": "Hanyang University;Hanyang University;Hanyang University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2024srgb,\ntitle={s{RGB} Real Noise Modeling via Noise-Aware Sampling with Normalizing Flows},\nauthor={Dongjin Kim and Donggoo Jung and Sungyong Baik and Tae Hyun Kim},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2XBBumBGeP}\n}", "github": "", "project": "", "reviewers": "VvGJ;EqxC;YHPu;s3bG", "pdf_size": 2920526, "rating": "6;6;6;8", "confidence": "5;4;4;5", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "67;67;92;46", "wc_strengths": "57;45;98;78", "wc_weaknesses": "32;69;213;329", "wc_questions": "61;2;5;20", "wc_review": "217;183;408;473", "wc_reply_reviewers": "10;16;0;0", "wc_reply_authors": "339;264;304;1629", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.0, 16.294170736800325 ], "wc_strengths_avg": [ 69.5, 20.254629100529094 ], "wc_weaknesses_avg": [ 160.75, 118.3561891072875 ], "wc_questions_avg": [ 22.0, 23.526580712037184 ], "wc_review_avg": [ 320.25, 123.01498892411445 ], "wc_reply_reviewers_avg": [ 6.5, 6.837397165588672 ], "wc_reply_authors_avg": [ 634.0, 575.0760819230791 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pp5KzMwhRq8J:scholar.google.com/&scioq=sRGB+Real+Noise+Modeling+via+Noise-Aware+Sampling+with+Normalizing+Flows&hl=en&as_sdt=0,5", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=2XBBumBGeP", "pdf": "https://openreview.net/pdf?id=2XBBumBGeP", "email": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Hanyang University", "aff_unique_dep": "", "aff_unique_url": "https://www.hanyang.ac.kr", "aff_unique_abbr": "HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Efficient and Scalable Graph Generation through Iterative Local Expansion", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19545", "id": "2XkTz7gdpc", "author_site": "Andreas Bergmeister, Karolis Martinkus, Nathana\u00ebl Perraudin, Roger Wattenhofer", "tldr": "", "abstract": "In the realm of generative models for graphs, extensive research has been conducted. However, most existing methods struggle with large graphs due to the complexity of representing the entire joint distribution across all node pairs and capturing both global and local graph structures simultaneously.\nTo overcome these issues, we introduce a method that generates a graph by progressively expanding a single node to a target graph. In each step, nodes and edges are added in a localized manner through denoising diffusion, building first the global structure, and then refining the local details. The local generation avoids modeling the entire joint distribution over all node pairs, achieving substantial computational savings with subquadratic runtime relative to node count while maintaining high expressivity through multiscale generation.\nOur experiments show that our model achieves state-of-the-art performance on well-established benchmark datasets while successfully scaling to graphs with at least 5000 nodes. Our method is also the first to successfully extrapolate to graphs outside of the training distribution, showcasing a much better generalization capability over existing methods.", "keywords": "Graph Generation;Denoising Diffusion;Spectral Graph Theory", "primary_area": "generative models", "supplementary_material": "/attachment/0ee2694b3749856225c65d6a4b2b7ca91de6a422.zip", "author": "Andreas Bergmeister;Karolis Martinkus;Nathana\u00ebl Perraudin;Roger Wattenhofer", "authorids": "~Andreas_Bergmeister1;~Karolis_Martinkus1;~Nathana\u00ebl_Perraudin1;~Roger_Wattenhofer1", "gender": "M;M;M;Not Specified", "homepage": "https://bergmeister.ai/;https://disco.ethz.ch/members/mkarolis;;https://disco.ethz.ch/members/wroger", "dblp": ";276/5531;139/7579;w/RogerWattenhofer", "google_scholar": "CvdjBd8AAAAJ;https://scholar.google.ch/citations?user=Sr6ho54AAAAJ;;https://scholar.google.ch/citations?user=EG3VPm4AAAAJ", "orcid": "0009-0000-8039-3577;0000-0002-5344-4321;;", "linkedin": "andreas-bergmeister;;;roger-wattenhofer-4466731/", "or_profile": "~Andreas_Bergmeister1;~Karolis_Martinkus1;~Nathana\u00ebl_Perraudin1;~Roger_Wattenhofer1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Prescient Design / Genentech / Roche;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "tum.de;roche.com;ethz.ch;ethz.ch", "position": "PhD student;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nbergmeister2024efficient,\ntitle={Efficient and Scalable Graph Generation through Iterative Local Expansion},\nauthor={Andreas Bergmeister and Karolis Martinkus and Nathana{\\\"e}l Perraudin and Roger Wattenhofer},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2XkTz7gdpc}\n}", "github": "", "project": "", "reviewers": "HbqV;sNQQ;38QF;mLXw", "pdf_size": 2488113, "rating": "5;5;6;8", "confidence": "4;4;2;3", "soundness": "3;2;3;3", "contribution": "3;2;2;3", "presentation": "2;2;2;2", "wc_summary": "156;65;39;134", "wc_strengths": "174;68;25;62", "wc_weaknesses": "314;699;209;178", "wc_questions": "205;318;24;152", "wc_review": "849;1150;297;526", "wc_reply_reviewers": "41;256;221;21", "wc_reply_authors": "1472;1611;652;350", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 98.5, 48.03384223648989 ], "wc_strengths_avg": [ 82.25, 55.472403048723244 ], "wc_weaknesses_avg": [ 350.0, 207.70291283465428 ], "wc_questions_avg": [ 174.75, 105.68674230952527 ], "wc_review_avg": [ 705.5, 322.98026255485024 ], "wc_reply_reviewers_avg": [ 134.75, 104.72434053265745 ], "wc_reply_authors_avg": [ 1021.25, 533.3626228936557 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4923659639173309, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1056667947435683956&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=2XkTz7gdpc", "pdf": "https://openreview.net/pdf?id=2XkTz7gdpc", "email": "tum.de;roche.com;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Roche;ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tum.de;https://www.roche.com;https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "TUM;Roche;ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Germany;Switzerland" }, { "id": "2XwBIcywWM", "title": "Learning Variational Neighbor Labels for Test-Time Domain Generalization", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper strives for domain generalization, where models are trained exclusively on source domains before being deployed on unseen target domains. We follow the strict separation of source training and target testing, but exploit the value of the unlabeled target data itself during inference. We make three contributions. First, we propose probabilistic pseudo-labeling of target samples to generalize the source-trained model to the target domain at test time. We formulate the generalization at test time as a variational inference problem, by modeling pseudo labels as distributions, to consider the uncertainty during generalization and alleviate the misleading signal of inaccurate pseudo labels. Second, we learn variational neighbor labels that incorporate the information of neighboring target samples to generate more robust pseudo labels. Third, to learn the ability to incorporate more representative target information and generate more precise and robust variational neighbor labels, we introduce a meta-generalization stage during training to simulate the generalization procedure. Experiments on seven widely-used datasets demonstrate the benefits, abilities, and effectiveness of our proposal.", "keywords": "test-time generalization;domain generalization;variational inference", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/b10340de65b51f95d799e346fae7e3c996a05b02.zip", "author": "Sameer Ambekar;Zehao Xiao;Jiayi Shen;Xiantong Zhen;Cees G. M. Snoek", "authorids": "~Sameer_Ambekar1;~Zehao_Xiao1;~Jiayi_Shen3;~Xiantong_Zhen1;~Cees_G._M._Snoek1", "gender": "M;M;F;M;M", "homepage": "https://ambekarsameer.com;https://zzzx1224.github.io/;https://autumn9999.github.io/;;http://www.ceessnoek.info", "dblp": "267/5374;225/5426;;78/10651;s/CeesSnoek", "google_scholar": "bf8p2wMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.ca/citations?user=DnBb3e0AAAAJ;https://scholar.google.nl/citations?user=0uKdbscAAAAJ", "orcid": "0000-0002-8650-3180;;;;0000-0001-9092-1556", "linkedin": "ambekarsameer/;;;;cgmsnoek/", "or_profile": "~Sameer_Ambekar1;~Zehao_Xiao1;~Jiayi_Shen3;~Xiantong_Zhen1;~Cees_Snoek1", "aff": "Technische Universit\u00e4t M\u00fcnchen;University of Amsterdam;University of Amsterdam;United Imaging Healthcare, Co., Ltd.;University of Amsterdam", "aff_domain": "tum.de;uva.nl;uva.nl;cri-united-imaging.com;uva.nl", "position": "PhD student;PhD student;PhD student;Principal Researcher;Full Professor", "bibtex": "@misc{\nambekar2024learning,\ntitle={Learning Variational Neighbor Labels for Test-Time Domain Generalization},\nauthor={Sameer Ambekar and Zehao Xiao and Jiayi Shen and Xiantong Zhen and Cees G. M. Snoek},\nyear={2024},\nurl={https://openreview.net/forum?id=2XwBIcywWM}\n}", "github": "", "project": "", "reviewers": "4tVC;CA6h;JCc1;Hsmr", "site": "https://openreview.net/forum?id=2XwBIcywWM", "pdf_size": 1947754, "rating": "5;5;5;5", "confidence": "4;3;3;3", "soundness": "2;3;2;3", "contribution": "2;2;2;3", "presentation": "1;2;2;3", "wc_summary": "76;199;50;106", "wc_strengths": "24;29;21;65", "wc_weaknesses": "237;210;112;27", "wc_questions": "4;525;5;2", "wc_review": "341;963;188;200", "wc_reply_reviewers": "0;26;0;0", "wc_reply_authors": "662;666;580;117", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 107.75, 56.286654723833074 ], "wc_strengths_avg": [ 34.75, 17.69710428290459 ], "wc_weaknesses_avg": [ 146.5, 83.20606949015198 ], "wc_questions_avg": [ 134.0, 225.74653928687368 ], "wc_review_avg": [ 423.0, 317.52086545611456 ], "wc_reply_reviewers_avg": [ 6.5, 11.258330249197702 ], "wc_reply_authors_avg": [ 506.25, 227.33936636667218 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17881603264637544125&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;University of Amsterdam;United Imaging Healthcare", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tum.de;https://www.uva.nl;https://www.united-imaging.com", "aff_unique_abbr": "TUM;UvA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2;1", "aff_country_unique": "Germany;Netherlands;China" }, { "id": "2Y5Gseybzp", "title": "Imprecise Label Learning: A Unified Framework for Learning with Various Imprecise Label Configurations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning with reduced labeling standards, such as noisy label, partial label, and multiple label candidates, which we generically refer to as imprecise labels, is a commonplace challenge in machine learning tasks. Previous methods tend to propose specific designs for every emerging imprecise label configuration, which is usually unsustainable when multiple configurations of imprecision coexist. In this paper, we introduce imprecise label learning (ILL), a framework towards the unification of learning with various imprecise label configurations.\nILL leverages expectation-maximization (EM) for modeling the imprecise label information, treating the precise labels as latent variables.\nInstead of approximating the correct labels for training, it considers the entire distribution of all possible labeling entailed by the imprecise information. We demonstrate that ILL can seamlessly adapt to partial label learning, semi-supervised learning, noisy label learning, and more importantly, a mixture of these settings. Notably, ILL surpasses the existing specified techniques for handling imprecise labels, marking the first unified framework with robust and effective performance across various challenging settings. We hope our work will inspire further research on this topic, unleashing the full potential of ILL in wider scenarios where precise labels are expensive and complicated to obtain.", "keywords": "Imprecise Label Learning; Partial Label Learning; Noisy Label Learning; Semi-Supervised Learning; Expectation-Maximization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/e25f7b08b1c6086eea0ac0095c71559ffe465b91.zip", "author": "Hao Chen;Ankit Shah;Jindong Wang;Ran Tao;Yidong Wang;Xing Xie;Masashi Sugiyama;Rita Singh;Bhiksha Raj", "authorids": "~Hao_Chen15;~Ankit_Shah1;~Jindong_Wang1;~Ran_Tao2;~Yidong_Wang1;~Xing_Xie3;~Masashi_Sugiyama1;~Rita_Singh1;~Bhiksha_Raj1", "gender": "M;M;F;M;M;M;F;M;M", "homepage": "https://hhhhhhao.github.io/;https://ankitshah009.github.io/;;https://qianlanwyd.github.io/;http://research.microsoft.com/en-us/people/xingx/;http://www.ms.k.u-tokyo.ac.jp/sugi/;http://mlsp.cs.cmu.edu/people/rsingh/index.html;https://www.cs.cmu.edu/directory/bhikshar/;https://jd92.wang/", "dblp": ";04/1935-1.html;99/955;59/6759.html;08/6809-1;35/1228;;60/3996;19/2969-1", "google_scholar": "tktqkhwAAAAJ;https://scholar.google.co.in/citations?user=TqG1H4cAAAAJ;7xW2y6EAAAAJ;;5EQfAFIAAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ;;;hBZ_tKsAAAAJ", "orcid": ";0000-0002-8838-5421;;;0000-0002-8608-8482;0000-0001-6658-6743;;;0000-0002-4833-0880", "linkedin": "haochen97/;ankpsh01/;;;xingx/;;;;jindong-wang/", "or_profile": "~Hao_Chen15;~Ankit_Shah1;~Ran_Tao2;~Yidong_Wang1;~Xing_Xie3;~Masashi_Sugiyama1;~Rita_Singh1;~Bhiksha_Raj1;~Jindong_Wang4", "aff": "Carnegie Mellon University;Accenture;;Peking University;Microsoft Research Asia;The University of Tokyo;School of Computer Science, Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;Microsoft Research", "aff_domain": "andrew.cmu.edu;accenture.com;;pku.edu.cn;microsoft.com;u-tokyo.ac.jp;cs.cmu.edu;mbzuai.ac.ae;microsoft.com", "position": "PhD student;Principal Researcher;;PhD student;Senior Principal Researcher;Full Professor;Research Professor;Full Professor;Researcher", "bibtex": "@misc{\nchen2024imprecise,\ntitle={Imprecise Label Learning: A Unified Framework for Learning with Various Imprecise Label Configurations},\nauthor={Hao Chen and Ankit Shah and Jindong Wang and Ran Tao and Yidong Wang and Xing Xie and Masashi Sugiyama and Rita Singh and Bhiksha Raj},\nyear={2024},\nurl={https://openreview.net/forum?id=2Y5Gseybzp}\n}", "github": "", "project": "", "reviewers": "yf7f;AiQR;torK;QeBS", "site": "https://openreview.net/forum?id=2Y5Gseybzp", "pdf_size": 825344, "rating": "5;5;6;8", "confidence": "4;2;4;4", "soundness": "2;3;3;3", "contribution": "2;3;3;1", "presentation": "2;2;2;3", "wc_summary": "108;138;155;53", "wc_strengths": "42;68;181;66", "wc_weaknesses": "367;144;140;144", "wc_questions": "80;71;181;2", "wc_review": "597;421;657;265", "wc_reply_reviewers": "77;229;0;0", "wc_reply_authors": "1513;1513;1249;511", "reply_reviewers": "1;1;0;0", "reply_authors": "4;3;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 113.5, 38.771768079364136 ], "wc_strengths_avg": [ 89.25, 53.9507877606991 ], "wc_weaknesses_avg": [ 198.75, 97.15290783090335 ], "wc_questions_avg": [ 83.5, 63.869006568131304 ], "wc_review_avg": [ 485.0, 153.80507143784303 ], "wc_reply_reviewers_avg": [ 76.5, 93.4893042010689 ], "wc_reply_authors_avg": [ 1196.5, 410.18623818943513 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11537090440769158708&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;3;4;0;5;3", "aff_unique_norm": "Carnegie Mellon University;Accenture;Peking University;Microsoft;University of Tokyo;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;;Research;;", "aff_unique_url": "https://www.cmu.edu;https://www.accenture.com;http://www.pku.edu.cn;https://www.microsoft.com/en-us/research/group/asia;https://www.u-tokyo.ac.jp;https://mbzuai.ac.ae", "aff_unique_abbr": "CMU;Accenture;Peking U;MSR Asia;UTokyo;MBZUAI", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Pittsburgh", "aff_country_unique_index": "0;0;1;1;2;0;3;0", "aff_country_unique": "United States;China;Japan;United Arab Emirates" }, { "title": "MEND: Meta Demonstration Distillation for Efficient and Effective In-Context Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19544", "id": "2Y5kBPtU0o", "author_site": "Yichuan Li, Xiyao Ma, Sixing Lu, Kyumin Lee, Xiaohu Liu, Chenlei Guo", "tldr": "", "abstract": "Large Language models (LLMs) have demonstrated impressive in-context learning (ICL) capabilities, \nwhere a LLM makes predictions for a given test input together with a few input-output pairs (demonstrations).\nNevertheless, the inclusion of demonstrations poses a challenge, leading to a quadratic increase in the computational overhead of the self-attention mechanism.\nExisting solutions attempt to condense lengthy demonstrations into compact vectors. \nHowever, they often require task-specific retraining or compromise LLM's in-context learning performance. \nTo mitigate these challenges, we present Meta Demonstration Distillation (MEND), where a language model learns to distill any lengthy demonstrations into vectors without retraining for a new downstream task. \nWe exploit the knowledge distillation to enhance alignment between MEND and MEND, achieving both efficiency and effectiveness concurrently. \nMEND is endowed with the meta-knowledge of distilling demonstrations through a two-stage training process, which includes meta-distillation pretraining and fine-tuning.\nComprehensive evaluations across seven diverse ICL settings using decoder-only (GPT-2) and encoder-decoder (T5) attest to MEND's prowess.\nIt not only matches but often outperforms the Vanilla ICL as well as other state-of-the-art distillation models, while significantly reducing the computational demands. \nThis innovation promises enhanced scalability and efficiency for the practical deployment of large language models.", "keywords": "in-context learning;language modeling;data distillation;knowledge distillation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/f315aa78126a99535a58cf174e1f3f45b6db588c.zip", "author": "Yichuan Li;Xiyao Ma;Sixing Lu;Kyumin Lee;Xiaohu Liu;Chenlei Guo", "authorids": "~Yichuan_Li3;~Xiyao_Ma1;~Sixing_Lu1;~Kyumin_Lee1;~Xiaohu_Liu1;~Chenlei_Guo1", "gender": ";;;M;;M", "homepage": ";;https://www.linkedin.com/in/sixinglu/;https://web.cs.wpi.edu/~kmlee/;;", "dblp": "216/7478-1.html;;;https://dblp.uni-trier.de/pid/22/8024.html;18/2453;03/5480", "google_scholar": "lLvYmOwAAAAJ;;https://scholar.google.com/citations?hl=en;zQKRsSEAAAAJ;;gRFsEm4AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;clguo/", "or_profile": "~Yichuan_Li3;~Xiyao_Ma1;~Sixing_Lu1;~Kyumin_Lee1;~Xiaohu_Liu1;~Chenlei_Guo1", "aff": "Worcester Polytechnic Institute;;University of Arizona;Worcester Polytechnic Institute;;", "aff_domain": "wpi.edu;;arizona.edu;wpi.edu;;", "position": "PhD student;;PhD student;Associate Professor;;", "bibtex": "@inproceedings{\nli2024mend,\ntitle={{MEND}: Meta Demonstration Distillation for Efficient and Effective In-Context Learning},\nauthor={Yichuan Li and Xiyao Ma and Sixing Lu and Kyumin Lee and Xiaohu Liu and Chenlei Guo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2Y5kBPtU0o}\n}", "github": "", "project": "", "reviewers": "RDfP;RPCQ;Dw9E;Uu3b", "pdf_size": 744151, "rating": "5;6;6;8", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "contribution": "3;2;3;3", "presentation": "3;2;2;3", "wc_summary": "51;54;82;172", "wc_strengths": "60;20;40;79", "wc_weaknesses": "139;45;188;134", "wc_questions": "2;169;50;146", "wc_review": "252;288;360;531", "wc_reply_reviewers": "0;0;0;44", "wc_reply_authors": "442;922;735;545", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 89.75, 49.00191322795468 ], "wc_strengths_avg": [ 49.75, 22.02697210240209 ], "wc_weaknesses_avg": [ 126.5, 51.56791638218477 ], "wc_questions_avg": [ 91.75, 68.38996636934398 ], "wc_review_avg": [ 357.75, 107.31816015940639 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 661.0, 183.72125625523032 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11600534499645052239&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=2Y5kBPtU0o", "pdf": "https://openreview.net/pdf?id=2Y5kBPtU0o", "email": "wpi.edu;;arizona.edu;wpi.edu;;", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Worcester Polytechnic Institute;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.wpi.edu;https://www.arizona.edu", "aff_unique_abbr": "WPI;UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "2aebB2mf0q", "title": "SemiAugIR: Semi-supervised Infrared Small Target Detection via Thermodynamics-Inspired Data Augmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Convolutional neural networks have shown promising results in single-frame infrared small target detection (SIRST) through supervised learning. Nevertheless, this approach requires a substantial number of accurate manual annotations on a per-pixel basis, incurring significant labor costs. To mitigate this, we pioneer the integration of semi-supervised learning into SIRST by exploiting the consistency of paired training samples obtained from data augmentation. Unlike prevalent data augmentation techniques that often rely on standard image processing pipelines designed for visible light natural images, we introduce a novel Thermodynamics-inspired data augmentation technique tailored for infrared images. It enhances infrared images by simulating energy distribution using the thermodynamic radiation pattern of infrared imaging and employing unlabeled images as references. Additionally, to replicate spatial distortions caused by variations in angle and distance during infrared imaging, we design a non-uniform mapping in positional space. This introduces non-uniform offsets in chromaticity and position, inducing desired changes in chromaticity and target configuration. This approach substantially diversifies the training samples, enabling the network to extract more robust features. We also devise an adaptive exponentially weighted loss function to address the challenge of training collapse due to imbalanced and inaccurately labeled samples. Integrating them together, we present SemiAugIR, which delivers promising results on two widely used benchmarks, e.g., with only 1/8 of the labeled samples, it achieves over 94\\% performance of the state-of-the-art fully supervised learning method. The source code will be released.", "keywords": "single-frame infrared small target detection;semi-supervised learning;non-uniform data augmentation;adaptive exponentially weighted loss function", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Mingjin Zhang;Wenteng Shang;Haichen Bai;Yunsong Li;Xinbo Gao;Jing Zhang", "authorids": "~Mingjin_Zhang2;~Wenteng_Shang1;~Haichen_Bai1;~Yunsong_Li1;~Xinbo_Gao3;~Jing_Zhang17", "gender": "F;M;M;M;M;M", "homepage": "https://web.xidian.edu.cn/mjinzhang/index.html;;;https://web.xidian.edu.cn/ysli/;http://see.xidian.edu.cn/faculty/xbgao/;", "dblp": "136/8003;;;;;05/3499-37.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;q8eamah50lwC;aY_2RzkAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-1473-9784;0009-0004-9174-4132;0000-0002-5547-9958;;0000-0003-1443-0776;0000-0001-6595-7661", "linkedin": ";;;;;", "or_profile": "~Mingjin_Zhang2;~Wenteng_Shang1;~Haichen_Bai1;~Yunsong_Li1;~Xinbo_Gao3;~Jing_Zhang17", "aff": "Xidian University;Xi'an University of Electronic Science and Technology;Xi'an University of Electronic Science and Technology;Xidian University ;Xidian University;The University of Sydney", "aff_domain": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;sydney.edu.au", "position": "Full Professor;MS student;MS student;Full Professor;Full Professor;Research Fellow", "bibtex": "@misc{\nzhang2024semiaugir,\ntitle={SemiAug{IR}: Semi-supervised Infrared Small Target Detection via Thermodynamics-Inspired Data Augmentation},\nauthor={Mingjin Zhang and Wenteng Shang and Haichen Bai and Yunsong Li and Xinbo Gao and Jing Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=2aebB2mf0q}\n}", "github": "", "project": "", "reviewers": "ARSd;5Vyd;kGGx", "site": "https://openreview.net/forum?id=2aebB2mf0q", "pdf_size": 1374918, "rating": "3;5;10", "confidence": "2;3;5", "soundness": "1;3;4", "contribution": "1;4;4", "presentation": "2;2;3", "wc_summary": "50;43;98", "wc_strengths": "30;48;85", "wc_weaknesses": "340;15;251", "wc_questions": "1;7;62", "wc_review": "421;113;496", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1850;772;1500", "reply_reviewers": "0;0;0", "reply_authors": "3;1;3", "rating_avg": [ 6.0, 2.943920288775949 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 1.247219128924647 ], "contribution_avg": [ 3.0, 1.4142135623730951 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 63.666666666666664, 24.44494948973214 ], "wc_strengths_avg": [ 54.333333333333336, 22.89589968143253 ], "wc_weaknesses_avg": [ 202.0, 137.1301085344377 ], "wc_questions_avg": [ 23.333333333333332, 27.450966386551052 ], "wc_review_avg": [ 343.3333333333333, 165.72333034977973 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1374.0, 449.0196729171971 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9986254289035241, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10782864233969299252&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;0;2", "aff_unique_norm": "Xidian University;Xi'an University of Electronic Science and Technology;University of Sydney", "aff_unique_dep": ";;", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.xidian.edu.cn/;https://www.sydney.edu.au", "aff_unique_abbr": "Xidian;Xidian University;USYD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Australia" }, { "id": "2bF381xEke", "title": "MapSelect: Sparse & Interpretable Graph Attention Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Attention Networks (GATs) have shown remarkable performance in capturing complex graph structures by assigning dense attention weights over all neighbours of a node. Attention weights can act as an inherent explanation for the model output, by highlighting the most important neighbours for a given input graph. However, the dense nature of the attention layer causes a lack of focus as all edges receive some probability mass. To overcome this, we introduce MapSelect, a new method providing a fully differentiable sparse attention mechanism. Through user-defined constraints, MapSelect enables precise control over the attention density, acting as a continuous relaxation of the popular top-k operator. We propose two distinct variants of MapSelect: a local approach maintaining a fixed degree per node, and a global approach preserving a percentage of the full graph. Upon conducting a comprehensive evaluation of five sparse GATs in terms of sparsity, performance, and interpretability, we provide insights on the sparsity-accuracy and sparsity-interpretability trade-offs. Our results show that MapSelect outperforms robust baselines in terms of interpretability, especially in the local context, while also leading to competitive task performance on real-world datasets.", "keywords": "Graph attention networks;interpretability;sparsity;self-interpretable methods", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Titus Naber;Marcos Vinicius Treviso;Andre Martins;Elvin Isufi", "authorids": "~Titus_Naber1;~Marcos_Vinicius_Treviso1;~Andre_Martins1;~Elvin_Isufi1", "gender": "M;M;M;M", "homepage": ";;https://andre-martins.github.io/;https://sites.google.com/site/elvinisufihp/", "dblp": ";188/5938;m/AndreFTMartins;156/9608", "google_scholar": ";puR_FskAAAAJ;https://scholar.google.pt/citations?user=mT7ppvwAAAAJ;wvywFdwAAAAJ", "orcid": ";;;", "linkedin": "titus-naber/;;;", "or_profile": "~Titus_Naber1;~Marcos_Vinicius_Treviso1;~Andre_Martins1;~Elvin_Isufi1", "aff": ";Instituto de Telecomunica\u00e7\u00f5es, Portugal;Unbabel;Delft University of Technology", "aff_domain": ";it.pt;unbabel.com;tudelft.nl", "position": ";Postdoc;Research Scientist;Associate Professor", "bibtex": "@misc{\nnaber2024mapselect,\ntitle={MapSelect: Sparse \\& Interpretable Graph Attention Networks},\nauthor={Titus Naber and Marcos Vinicius Treviso and Andre Martins and Elvin Isufi},\nyear={2024},\nurl={https://openreview.net/forum?id=2bF381xEke}\n}", "github": "", "project": "", "reviewers": "b971;EqGL;iHh7;WU6w", "site": "https://openreview.net/forum?id=2bF381xEke", "pdf_size": 849635, "rating": "3;3;3;3", "confidence": "4;3;5;4", "soundness": "2;2;3;2", "contribution": "2;1;2;2", "presentation": "2;2;3;4", "wc_summary": "166;97;76;73", "wc_strengths": "84;17;34;34", "wc_weaknesses": "103;104;202;272", "wc_questions": "89;8;3;75", "wc_review": "442;226;315;454", "wc_reply_reviewers": "0;0;0;123", "wc_reply_authors": "328;305;330;478", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 103.0, 37.52998800959041 ], "wc_strengths_avg": [ 42.25, 25.083610186733488 ], "wc_weaknesses_avg": [ 170.25, 71.19120381058323 ], "wc_questions_avg": [ 43.75, 38.6094224251024 ], "wc_review_avg": [ 359.25, 94.25862029544035 ], "wc_reply_reviewers_avg": [ 30.75, 53.26056233274298 ], "wc_reply_authors_avg": [ 360.25, 68.68906390394325 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lAsUqR-ONNAJ:scholar.google.com/&scioq=MapSelect:+Sparse+%26+Interpretable+Graph+Attention+Networks&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Instituto de Telecomunica\u00e7\u00f5es;Unbabel;Delft University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.it.pt;https://www.unbabel.com;https://www.tudelft.nl", "aff_unique_abbr": ";;TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Portugal;Netherlands" }, { "id": "2boLXjsHsB", "title": "Multi-Objective Reinforcement Learning for Forward-Backward Markov Decision Processes", "track": "main", "status": "Reject", "tldr": "", "abstract": "This work introduces the notion of Forward-Backward Markov Decision Process (FB-MDP)\nfor multi-task control problems. In this context, we devise a novel approach called Forward-Backward Multi-Objective Reinforcement Learning (FB-MORL).\nSpecifically, we analytically characterize its convergence towards a Pareto-optimal solution and also empirically evaluate its effectiveness.\nFor the latter, we consider a use case in wireless caching and perform several experiments to characterize performance in that context. Finally, an ablation study demonstrates that FB-MDP is instrumental to optimize rewards for systems with forward-backward dynamics.\nThe outcomes of this work pave the way for further understanding of multi-objective RL algorithms for FB-MDPs.", "keywords": "Forward-Backward Markov Decision Process;Multi-Objective Reinforcement Learning Algorithm", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Mohsen Amidzadeh;Mario Di Francesco", "authorids": "~Mohsen_Amidzadeh1;~Mario_Di_Francesco1", "gender": "M;", "homepage": "https://research.aalto.fi/en/persons/mohsen-amidzade;", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Mohsen_Amidzadeh1;~Mario_Di_Francesco1", "aff": "Aalto University;", "aff_domain": "aalto.fi;", "position": "Postdoc;", "bibtex": "@misc{\namidzadeh2024multiobjective,\ntitle={Multi-Objective Reinforcement Learning for Forward-Backward Markov Decision Processes},\nauthor={Mohsen Amidzadeh and Mario Di Francesco},\nyear={2024},\nurl={https://openreview.net/forum?id=2boLXjsHsB}\n}", "github": "", "project": "", "reviewers": "EeKJ;g9Ad;AwDv", "site": "https://openreview.net/forum?id=2boLXjsHsB", "pdf_size": 1878236, "rating": "5;5;5", "confidence": "4;4;3", "soundness": "3;3;3", "contribution": "2;2;2", "presentation": "2;2;3", "wc_summary": "44;101;132", "wc_strengths": "23;50;111", "wc_weaknesses": "26;139;504", "wc_questions": "256;108;138", "wc_review": "349;398;885", "wc_reply_reviewers": "23;325;0", "wc_reply_authors": "766;1146;750", "reply_reviewers": "1;2;0", "reply_authors": "1;2;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 92.33333333333333, 36.44478319625763 ], "wc_strengths_avg": [ 61.333333333333336, 36.80881536926839 ], "wc_weaknesses_avg": [ 223.0, 203.98202535190856 ], "wc_questions_avg": [ 167.33333333333334, 63.88183535942662 ], "wc_review_avg": [ 544.0, 241.9517858307036 ], "wc_reply_reviewers_avg": [ 116.0, 148.0833098855731 ], "wc_reply_authors_avg": [ 887.3333333333334, 183.0215530720054 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n52GeLK86McJ:scholar.google.com/&scioq=Multi-Objective+Reinforcement+Learning+for+Forward-Backward+Markov+Decision+Processes&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_country_unique_index": "0", "aff_country_unique": "Finland" }, { "title": "Beyond Reverse KL: Generalizing Direct Preference Optimization with Diverse Divergence Constraints", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19543", "id": "2cRzmWXK9N", "author_site": "Chaoqi Wang, Yibo Jiang, Chenghao Yang, Han Liu, Yuxin Chen", "tldr": "", "abstract": "The increasing capabilities of large language models (LLMs) raise opportunities for artificial general intelligence but concurrently amplify safety concerns, such as potential misuse of AI systems, necessitating effective AI alignment. Reinforcement Learning from Human Feedback (RLHF) has emerged as a promising pathway towards AI alignment but brings forth challenges due to its complexity and dependence on a separate reward model. Direct Preference Optimization (DPO) has been proposed as an alternative; and it remains equivalent to RLHF under the reverse KL regularization constraint. This paper presents $f$-DPO, a generalized approach to DPO by incorporating diverse divergence constraints. We show that under certain $f$-divergences, including Jensen-Shannon divergence, forward KL divergences and $\\alpha$-divergences, the complex relationship between the reward and optimal policy can also be simplified by addressing the Karush\u2013Kuhn\u2013Tucker conditions. This eliminates the need for estimating the normalizing constant in the Bradley-Terry model and enables a tractable mapping between the reward function and the optimal policy. Our approach optimizes LLMs to align with human preferences in a more efficient and supervised manner under a broad set of divergence constraints. Empirically, adopting these divergences ensures a balance between alignment performance and generation diversity. Importantly, our $f$-DPO outperforms PPO-based methods in divergence efficiency, and divergence constraints directly influence expected calibration error (ECE).", "keywords": "Large language models;Preference optimization;AI Alignment", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Chaoqi Wang;Yibo Jiang;Chenghao Yang;Han Liu;Yuxin Chen", "authorids": "~Chaoqi_Wang1;~Yibo_Jiang2;~Chenghao_Yang1;~Han_Liu12;~Yuxin_Chen1", "gender": "M;M;M;M;", "homepage": "https://alecwangcq.github.io;;https://yangalan123.github.io/;https://hanliuai.github.io;http://yuxinchen.org/", "dblp": "210/1073;54/2193;229/4179;35/2899;11/5123-1", "google_scholar": "https://scholar.google.ca/citations?user=yN2iRpwAAAAJ;hvQo2gQAAAAJ;B28fiOAAAAAJ;stdJOHwAAAAJ;-k1N7HAAAAAJ", "orcid": ";;;0009-0001-0434-9141;", "linkedin": ";;chenghao-yang-857b51178/;han-liu-347270ba/;", "or_profile": "~Chaoqi_Wang1;~Yibo_Jiang2;~Chenghao_Yang1;~Han_Liu12;~Yuxin_Chen1", "aff": "University of Chicago;University of Chicago;Google;University of Chicago;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu;google.com;uchicago.edu;uchicago.edu", "position": "PhD student;PhD student;Student Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2024beyond,\ntitle={Beyond Reverse {KL}: Generalizing Direct Preference Optimization with Diverse Divergence Constraints},\nauthor={Chaoqi Wang and Yibo Jiang and Chenghao Yang and Han Liu and Yuxin Chen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2cRzmWXK9N}\n}", "github": "", "project": "", "reviewers": "sLFR;AH62;GUwk;Lpy5", "pdf_size": 691534, "rating": "5;8;8;8", "confidence": "3;2;4;3", "soundness": "3;3;3;4", "contribution": "2;3;3;4", "presentation": "2;4;3;3", "wc_summary": "90;103;50;361", "wc_strengths": "122;73;32;50", "wc_weaknesses": "108;27;35;54", "wc_questions": "37;130;130;40", "wc_review": "357;333;247;505", "wc_reply_reviewers": "0;9;17;0", "wc_reply_authors": "905;414;550;88", "reply_reviewers": "0;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 151.0, 122.80675877165719 ], "wc_strengths_avg": [ 69.25, 33.74444398712179 ], "wc_weaknesses_avg": [ 56.0, 31.583223394707513 ], "wc_questions_avg": [ 84.25, 45.7622934302904 ], "wc_review_avg": [ 360.5, 92.9125933337349 ], "wc_reply_reviewers_avg": [ 6.5, 7.088723439378913 ], "wc_reply_authors_avg": [ 489.25, 292.91754385833565 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18160314555054778595&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=2cRzmWXK9N", "pdf": "https://openreview.net/pdf?id=2cRzmWXK9N", "email": "uchicago.edu;uchicago.edu;google.com;uchicago.edu;uchicago.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Chicago;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.uchicago.edu;https://www.google.com", "aff_unique_abbr": "UChicago;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2dHmhoWweE", "title": "Lookbehind Optimizer: k steps back, 1 step forward", "track": "main", "status": "Reject", "tldr": "", "abstract": "Sharpness-aware minimization (SAM) methods have gained increasing popularity by formulating the problem of minimizing both loss value and loss sharpness as a minimax objective. In this work, we increase the efficiency of the maximization and minimization parts of SAM's objective to achieve a better loss-sharpness trade-off. By taking inspiration from the Lookahead optimizer, which uses multiple descent steps ahead, we propose Lookbehind, which performs multiple ascent steps behind to enhance the maximization step of SAM and find a worst-case perturbation with higher loss. Then, to mitigate the variance in the descent step arising from the gathered gradients across the multiple ascent steps, we employ linear interpolation to refine the minimization step. Lookbehind leads to a myriad of benefits across a variety of tasks. Particularly, we show increased generalization performance, greater robustness against noisy weights, as well as improved learning and less catastrophic forgetting in lifelong learning settings.", "keywords": "Optimization;sharpness-aware minimization", "primary_area": "optimization", "supplementary_material": "", "author": "Goncalo Mordido;Pranshu Malviya;Aristide Baratin;Sarath Chandar", "authorids": "~Goncalo_Mordido1;~Pranshu_Malviya1;~Aristide_Baratin1;~Sarath_Chandar1", "gender": ";M;;M", "homepage": ";https://pranshu28.github.io/about/;;http://sarathchandar.in/", "dblp": ";;;45/8542", "google_scholar": ";;;https://scholar.google.co.in/citations?user=yxWtZLAAAAAJ", "orcid": ";;;", "linkedin": ";pranshumalviya2/;;", "or_profile": "~Goncalo_Mordido1;~Pranshu_Malviya1;~Aristide_Baratin1;~Sarath_Chandar1", "aff": ";\u00c9cole Polytechnique de Montr\u00e9al, Universit\u00e9 de Montr\u00e9al;;\u00c9cole Polytechnique de Montr\u00e9al", "aff_domain": ";polymtl.ca;;polymtl.ca", "position": ";PhD student;;Assistant Professor", "bibtex": "@misc{\nmordido2024lookbehind,\ntitle={Lookbehind Optimizer: k steps back, 1 step forward},\nauthor={Goncalo Mordido and Pranshu Malviya and Aristide Baratin and Sarath Chandar},\nyear={2024},\nurl={https://openreview.net/forum?id=2dHmhoWweE}\n}", "github": "", "project": "", "reviewers": "1aPq;aoaQ;tnTm;sgRK", "site": "https://openreview.net/forum?id=2dHmhoWweE", "pdf_size": 846970, "rating": "3;3;5;8", "confidence": "4;4;4;4", "soundness": "3;2;3;4", "contribution": "2;2;2;3", "presentation": "3;2;3;4", "wc_summary": "25;57;68;62", "wc_strengths": "11;50;144;77", "wc_weaknesses": "83;396;133;36", "wc_questions": "1;2;96;126", "wc_review": "120;505;441;301", "wc_reply_reviewers": "10;50;0;11", "wc_reply_authors": "294;784;441;331", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 53.0, 16.62828914831589 ], "wc_strengths_avg": [ 70.5, 48.48968962573384 ], "wc_weaknesses_avg": [ 162.0, 139.38615426217913 ], "wc_questions_avg": [ 56.25, 55.76905503951094 ], "wc_review_avg": [ 341.75, 147.76226683426322 ], "wc_reply_reviewers_avg": [ 17.75, 19.109879643786353 ], "wc_reply_authors_avg": [ 462.5, 193.33196838598627 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18091060575552247786&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "\u00c9cole Polytechnique de Montr\u00e9al", "aff_unique_dep": "", "aff_unique_url": "https://www.polymtl.ca", "aff_unique_abbr": "Polytechnique Montr\u00e9al", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Montr\u00e9al", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "2dLMPOY0HW", "title": "When Do MLPs Excel in Node Classification? An Information-Theoretic Perspective", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent research has shed light on the competitiveness of MLP-structured methods in node-level tasks. Nevertheless, there remains a gap in our understanding regarding why MLPs perform well and how their performance varies across different datasets. This paper addresses this lacuna by emphasizing mutual information\u2019s pivotal role in MLPs vs. GNNs performance variations. We first introduce a\ntractable metric to quantify the mutual information between node features and graph structure, based on which we observe different characteristics of various datasets, aligning with empirical results. Subsequently, we present InfoMLP, which optimizes node embeddings\u2019 mutual information with the graph\u2019s structure, i.e., the adjacency matrix. Our info-max objective comprises two sub-objectives: the first focuses on non-parametric reprocessing to identify the optimal graph-augmented node feature matrix that encapsulates the most graph-related information. The second sub-objective aims to enhance mutual information between node embeddings derived from the original node features and those from the graph-augmented features. This integration of message-passing during preprocessing maintains the efficiency of InfoMLP, ensuring it remains as efficient as a standard MLP during both training and testing. We validate the effectiveness of our approach through experiments on real-world datasets of varying scales supplemented by comprehensive ablation studies. Our results affirm our analysis and underscore the success of our innovative approach.", "keywords": "Node Representation Learning;Node Classification;Graph Neural Networks", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/5342fd19e23fbefff38906d822e4ce2323047093.zip", "author": "Hengrui Zhang;Qitian Wu;Chenxiao Yang;Junchi Yan;Philip S. Yu", "authorids": "~Hengrui_Zhang1;~Qitian_Wu1;~Chenxiao_Yang1;~Junchi_Yan2;~Philip_S._Yu1", "gender": "M;;;;M", "homepage": "https://hengruizhang98.github.io;;;;https://cs.uic.edu/profiles/philip-yu/", "dblp": ";;;;y/PhilipSYu", "google_scholar": "iwffiD0AAAAJ;;;;D0lL1r0AAAAJ", "orcid": "0009-0006-1330-0899;;;;0000-0002-3491-5968", "linkedin": ";;;;", "or_profile": "~Hengrui_Zhang1;~Qitian_Wu1;~Chenxiao_Yang1;~Junchi_Yan2;~Philip_S._Yu1", "aff": "University of Illinois, Chicago;;;;University of Illinois Chicago", "aff_domain": "uic.edu;;;;uic.edu", "position": "PhD student;;;;Full Professor", "bibtex": "@misc{\nzhang2024when,\ntitle={When Do {MLP}s Excel in Node Classification? An Information-Theoretic Perspective},\nauthor={Hengrui Zhang and Qitian Wu and Chenxiao Yang and Junchi Yan and Philip S. Yu},\nyear={2024},\nurl={https://openreview.net/forum?id=2dLMPOY0HW}\n}", "github": "", "project": "", "reviewers": "zabj;YPT6;o93U", "site": "https://openreview.net/forum?id=2dLMPOY0HW", "pdf_size": 4695346, "rating": "3;3;8", "confidence": "4;4;3", "soundness": "1;2;4", "contribution": "2;2;4", "presentation": "2;3;3", "wc_summary": "125;272;95", "wc_strengths": "117;12;101", "wc_weaknesses": "752;35;126", "wc_questions": "126;44;5", "wc_review": "1120;363;327", "wc_reply_reviewers": "203;0;78", "wc_reply_authors": "2207;1540;430", "reply_reviewers": "1;0;1", "reply_authors": "4;3;1", "rating_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 1.247219128924647 ], "contribution_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 164.0, 77.34339015067803 ], "wc_strengths_avg": [ 76.66666666666667, 46.19042709864843 ], "wc_weaknesses_avg": [ 304.3333333333333, 318.72070671496834 ], "wc_questions_avg": [ 58.333333333333336, 50.427065043376146 ], "wc_review_avg": [ 603.3333333333334, 365.6340003640556 ], "wc_reply_reviewers_avg": [ 93.66666666666667, 83.61153562092308 ], "wc_reply_authors_avg": [ 1392.3333333333333, 732.933072585 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UC-sMQy_S6wJ:scholar.google.com/&scioq=When+Do+MLPs+Excel+in+Node+Classification%3F+An+Information-Theoretic+Perspective&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "2dYAbdXgpu", "title": "SimSCOOD: Systematic Analysis of Out-of-Distribution Generalization in Fine-tuned Source Code Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large code datasets have become increasingly accessible for pre-training source code models. However, for the fine-tuning phase, obtaining representative training data that fully covers the code distribution for specific downstream tasks remains challenging due to the task-specific nature and limited labeling resources. Moreover, fine-tuning pretrained models can result in forgetting previously acquired pre-training knowledge. These lead to out-of-distribution (OOD) generalization issues with unexpected model inference behaviors that have not been systematically studied yet.\nIn this paper, we contribute the first systematic approach that simulates various OOD scenarios along different dimensions of source code data properties and study the fine-tuned model behaviors in such scenarios. We investigate the behaviors of models under different fine-tuning methodologies, including full fine-tuning and Low-Rank Adaptation (LoRA) fine-tuning methods. Our comprehensive analysis, conducted on four state-of-the-art pretrained models and applied to two code generation tasks, exposes multiple failure modes attributed to OOD generalization issues. Additionally, our analysis uncovers that LoRA fine-tuning consistently exhibits significantly better OOD generalization performance than full fine-tuning across various scenarios.", "keywords": "LLM for code generation;Out-of-distribution generalization;Pre-trained code models;Fine-tuned codeLLM", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Hossein Hajipour;Ning Yu;Cristian-Alexandru Staicu;Mario Fritz", "authorids": "~Hossein_Hajipour1;~Ning_Yu2;~Cristian-Alexandru_Staicu2;~Mario_Fritz1", "gender": "M;;M;M", "homepage": "https://cispa.de/en/people/hossein.hajipour;;http://www.staicu.org/;https://cispa.saarland/group/fritz/", "dblp": "125/1613;;;", "google_scholar": ";;JmpDeRQAAAAJ;https://scholar.google.de/citations?user=4V1nNm4AAAAJ", "orcid": ";;0000-0002-6542-2226;", "linkedin": ";;crstaicu/;", "or_profile": "~Hossein_Hajipour1;~Ning_Yu2;~Cristian-Alexandru_Staicu2;~Mario_Fritz1", "aff": "CISPA Helmholtz Center for Information Security;;CISPA Helmholtz Center for Information Security;Saarland University", "aff_domain": "cispa.saarland;;cispa.saarland;uni-saarland.de", "position": "PhD student;;Researcher;Full Professor", "bibtex": "@misc{\nhajipour2024simscood,\ntitle={Sim{SCOOD}: Systematic Analysis of Out-of-Distribution Generalization in Fine-tuned Source Code Models},\nauthor={Hossein Hajipour and Ning Yu and Cristian-Alexandru Staicu and Mario Fritz},\nyear={2024},\nurl={https://openreview.net/forum?id=2dYAbdXgpu}\n}", "github": "", "project": "", "reviewers": "fJQ1;9kMw;cCst;HtnZ", "site": "https://openreview.net/forum?id=2dYAbdXgpu", "pdf_size": 1056307, "rating": "5;5;5;5", "confidence": "4;4;3;3", "soundness": "3;2;2;3", "contribution": "2;1;2;2", "presentation": "3;3;3;3", "wc_summary": "53;33;117;96", "wc_strengths": "64;18;66;63", "wc_weaknesses": "191;88;160;262", "wc_questions": "4;25;7;21", "wc_review": "312;164;350;442", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "929;387;269;821", "reply_reviewers": "0;0;0;0", "reply_authors": "3;1;1;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.75, 33.36446462930284 ], "wc_strengths_avg": [ 52.75, 20.09197601033806 ], "wc_weaknesses_avg": [ 175.25, 62.48749874974994 ], "wc_questions_avg": [ 14.25, 8.926785535678562 ], "wc_review_avg": [ 317.0, 100.18482919085105 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 601.5, 279.28614358754 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8603687437464787116&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;Saarland University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cispa.de/;https://www.uni-saarland.de", "aff_unique_abbr": "CISPA;UdS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Function-space Parameterization of Neural Networks for Sequential Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19542", "id": "2dhxxIKhqz", "author_site": "Aidan Scannell, Riccardo Mereu, Paul Chang, Ella Tamir, Joni Pajarinen, Arno Solin", "tldr": "", "abstract": "Sequential learning paradigms pose challenges for gradient-based deep learning due to difficulties incorporating new data and retaining prior knowledge. While Gaussian processes elegantly tackle these problems, they struggle with scalability and handling rich inputs, such as images. To address these issues, we introduce a technique that converts neural networks from weight space to function space, through a dual parameterization. Our parameterization offers: (*i*) a way to scale function-space methods to large data sets via sparsification, (*ii*) retention of prior knowledge when access to past data is limited, and (*iii*) a mechanism to incorporate new data without retraining. Our experiments demonstrate that we can retain knowledge in continual learning and incorporate new data efficiently. We further show its strengths in uncertainty quantification and guiding exploration in model-based RL. Further information and code is available on the project website.", "keywords": "Neural networks;Bayesian deep learning;deep learning;Gaussian processes;Laplace approximation;sequential learning", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Aidan Scannell;Riccardo Mereu;Paul Edmund Chang;Ella Tamir;Joni Pajarinen;Arno Solin", "authorids": "~Aidan_Scannell1;~Riccardo_Mereu1;~Paul_Edmund_Chang1;~Ella_Tamir1;~Joni_Pajarinen2;~Arno_Solin1", "gender": "M;M;M;;;", "homepage": "https://www.aidanscannell.com/;;https://research.aalto.fi/en/persons/paul-chang;;;http://arno.solin.fi", "dblp": "304/4471;318/1146;270/0387;;23/8355;98/11225", "google_scholar": "https://scholar.google.co.uk/citations?user=piA0zS4AAAAJ;UVziXI0AAAAJ;CLzK5SkAAAAJ;;https://scholar.google.fi/citations?user=-2fJStwAAAAJ;U_fJCnAAAAAJ", "orcid": ";0000-0002-8932-9341;;;0000-0003-4469-8191;0000-0002-0958-7886", "linkedin": "aidan-scannell-82522789/;riccardo-mereu-050248a1/;;;;asolin/", "or_profile": "~Aidan_Scannell1;~Riccardo_Mereu1;~Paul_Edmund_Chang1;~Ella_Tamir1;~Joni_Pajarinen2;~Arno_Solin1", "aff": "Aalto University;Aalto University;;;Aalto University;Aalto University", "aff_domain": "aalto.fi;aalto.fi;;;aalto.fi;aalto.fi", "position": "Postdoc;PhD student;;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nscannell2024functionspace,\ntitle={Function-space Parameterization of Neural Networks for Sequential Learning},\nauthor={Aidan Scannell and Riccardo Mereu and Paul Edmund Chang and Ella Tamir and Joni Pajarinen and Arno Solin},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2dhxxIKhqz}\n}", "github": "", "project": "", "reviewers": "C2Qu;7JfH;V1B4", "pdf_size": 2126191, "rating": "6;6;8", "confidence": "3;3;2", "soundness": "3;2;4", "contribution": "3;2;3", "presentation": "3;1;3", "wc_summary": "208;38;104", "wc_strengths": "77;20;83", "wc_weaknesses": "370;224;89", "wc_questions": "551;8;57", "wc_review": "1206;290;333", "wc_reply_reviewers": "54;85;0", "wc_reply_authors": "1289;824;336", "reply_reviewers": "1;1;0", "reply_authors": "3;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 116.66666666666667, 69.97777424932069 ], "wc_strengths_avg": [ 60.0, 28.39013913315678 ], "wc_weaknesses_avg": [ 227.66666666666666, 114.74706483779396 ], "wc_questions_avg": [ 205.33333333333334, 245.2404715557546 ], "wc_review_avg": [ 609.6666666666666, 422.0365966227205 ], "wc_reply_reviewers_avg": [ 46.333333333333336, 35.122009560324926 ], "wc_reply_authors_avg": [ 816.3333333333334, 389.09838801459404 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11704751970787674703&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=2dhxxIKhqz", "pdf": "https://openreview.net/pdf?id=2dhxxIKhqz", "email": "aalto.fi;aalto.fi;;;aalto.fi;aalto.fi", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Finland" }, { "title": "Vision Transformers Need Registers", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19541", "id": "2dnO3LLiJ1", "author_site": "Timoth\u00e9e Darcet, Maxime Oquab, Julien Mairal, Piotr Bojanowski", "tldr": "", "abstract": "Transformers have recently emerged as a powerful tool for learning visual representations. In this paper, we identify and characterize artifacts in feature maps of both supervised and self-supervised ViT networks. The artifacts correspond to high-norm tokens appearing during inference primarily in low-informative background areas of images, that are repurposed for internal computations. We propose a simple yet effective solution based on providing additional tokens to the input sequence of the Vision Transformer to fill that role. We show that this solution fixes that problem entirely for both supervised and self-supervised models, sets a new state of the art for self-supervised visual models on dense visual prediction tasks, enables object discovery methods with larger models, and most importantly leads to smoother feature maps and attention maps for downstream visual processing.", "keywords": "representation;vision;transformer;register;SSL;CLIP;attention;attention map;interpretability;DINO;DINOv2", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Timoth\u00e9e Darcet;Maxime Oquab;Julien Mairal;Piotr Bojanowski", "authorids": "~Timoth\u00e9e_Darcet1;~Maxime_Oquab1;~Julien_Mairal1;~Piotr_Bojanowski1", "gender": "M;;;M", "homepage": "https://tim.darcet.fr;;http://julien.mairal.org;", "dblp": "344/5814;151/8880;49/6555;142/2542", "google_scholar": "G4qOJQEAAAAJ;https://scholar.google.fr/citations?user=5vteYV8AAAAJ;https://scholar.google.fr/citations?user=Bx9WGD6lBFEC;https://scholar.google.fr/citations?user=lJ_oh2EAAAAJ", "orcid": ";;;", "linkedin": "timdarcet/;;;piotr-bojanowski-9a94402a", "or_profile": "~Timoth\u00e9e_Darcet1;~Maxime_Oquab1;~Julien_Mairal1;~Piotr_Bojanowski1", "aff": "Meta;Meta;Inria;Meta", "aff_domain": "meta.com;meta.com;inria.fr;meta.com", "position": "PhD student;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\ndarcet2024vision,\ntitle={Vision Transformers Need Registers},\nauthor={Timoth{\\'e}e Darcet and Maxime Oquab and Julien Mairal and Piotr Bojanowski},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2dnO3LLiJ1}\n}", "github": "", "project": "", "reviewers": "mTMB;KSLu;GGy7;eoPK", "pdf_size": 5953026, "rating": "8;8;8;8", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "contribution": "4;3;4;3", "presentation": "4;4;4;3", "wc_summary": "169;91;160;118", "wc_strengths": "88;67;208;69", "wc_weaknesses": "179;257;127;56", "wc_questions": "393;3;83;42", "wc_review": "829;418;578;285", "wc_reply_reviewers": "591;0;0;0", "wc_reply_authors": "1164;658;780;316", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 134.5, 31.64253466459348 ], "wc_strengths_avg": [ 108.0, 58.31380625546578 ], "wc_weaknesses_avg": [ 154.75, 73.42470633240558 ], "wc_questions_avg": [ 130.25, 154.31360115038467 ], "wc_review_avg": [ 527.5, 202.63822442964704 ], "wc_reply_reviewers_avg": [ 147.75, 255.91050681830163 ], "wc_reply_authors_avg": [ 729.5, 303.082084590957 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 360, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7001609497646764111&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "openreview": "https://openreview.net/forum?id=2dnO3LLiJ1", "pdf": "https://openreview.net/pdf?id=2dnO3LLiJ1", "email": "meta.com;meta.com;inria.fr;meta.com", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Meta;INRIA", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.inria.fr", "aff_unique_abbr": "Meta;Inria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;France" }, { "id": "2drC319yHQ", "title": "RepoFusion: Training Code Models to Understand Your Repository", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite the huge success of Large Language Models (LLMs) in coding assistants like GitHub Copilot, these models struggle to understand the context present in the repository (e.g., imports, parent classes, files with similar names, etc.), thereby producing inaccurate code completions. This effect is more pronounced when using these assistants for repositories that the model has not seen during training, such as proprietary software or work-in-progress code projects. Recent work has shown the promise of using context from the repository during inference. In this work, we extend this idea and propose \\emph{RepoFusion}, a framework to train models to incorporate relevant repository context. Experiments on single-line code completion show that our models trained with repository context significantly outperform much larger code models as CodeGen-16B-multi ($\\sim73\\times$ larger) and closely match the performance of the $\\sim 70\\times$ larger StarCoderBase model that was trained with the Fill-in-the-Middle objective. We find these results to be a novel and compelling demonstration of the gains that training with repository context can bring. We carry out extensive ablation studies to investigate the impact of design choices such as context type, number of contexts, context length, and initialization within our framework. Lastly, we release a dataset for code completion with repository context to facilitate further research in this domain.", "keywords": "large language models of code;long context;repository;code completion;source code;LLM;retrieval", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/6de0d88c748b9c572e7a27a6f1dd2b4ba9b4d32d.pdf", "author": "Disha Shrivastava;Denis Kocetkov;Harm de Vries;Dzmitry Bahdanau;Torsten Scholak", "authorids": "~Disha_Shrivastava1;~Denis_Kocetkov1;~Harm_de_Vries1;~Dzmitry_Bahdanau1;~Torsten_Scholak1", "gender": "F;M;M;M;M", "homepage": "https://shrivastavadisha.github.io/;;;https://tscholak.github.com;https://uk.linkedin.com/in/denis-kochetkov-14290213", "dblp": "203/9100;;151/6504;277/0957;", "google_scholar": "https://scholar.google.co.in/citations?user=7R8dnlUAAAAJ;LWrdpCsAAAAJ;https://scholar.google.ca/citations?user=Nq0dVMcAAAAJ;https://scholar.google.ca/citations?user=BgkjtKgAAAAJ;", "orcid": ";;;;", "linkedin": "disha-shrivastava-8398a212/;;;tscholak;", "or_profile": "~Disha_Shrivastava1;~Harm_de_Vries1;~Dzmitry_Bahdanau1;~Torsten_Scholak1;~Denis_Kochetkov1", "aff": "Google;ServiceNow Research;ServiceNow Research;ServiceNow Research;ServiceNow Research", "aff_domain": "deepmind.com;elementai.com;servicenow.com;servicenow.com;servicenow.com", "position": "Researcher;Researcher;Research Scientist;Researcher;Software engineer", "bibtex": "@misc{\nshrivastava2024repofusion,\ntitle={RepoFusion: Training Code Models to Understand Your Repository},\nauthor={Disha Shrivastava and Denis Kocetkov and Harm de Vries and Dzmitry Bahdanau and Torsten Scholak},\nyear={2024},\nurl={https://openreview.net/forum?id=2drC319yHQ}\n}", "github": "", "project": "", "reviewers": "U4U4;hziX;Ar6w", "site": "https://openreview.net/forum?id=2drC319yHQ", "pdf_size": 632352, "rating": "3;3;6", "confidence": "3;4;4", "soundness": "1;3;3", "contribution": "2;1;3", "presentation": "4;3;3", "wc_summary": "257;204;107", "wc_strengths": "83;48;77", "wc_weaknesses": "194;120;268", "wc_questions": "65;150;9", "wc_review": "599;522;461", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 189.33333333333334, 62.10922278982048 ], "wc_strengths_avg": [ 69.33333333333333, 15.2825245151302 ], "wc_weaknesses_avg": [ 194.0, 60.42074698865172 ], "wc_questions_avg": [ 74.66666666666667, 57.96742380184082 ], "wc_review_avg": [ 527.3333333333334, 56.46434469842205 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14668583810301525263&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Google;ServiceNow", "aff_unique_dep": "Google;Research", "aff_unique_url": "https://www.google.com;https://www.servicenow.com", "aff_unique_abbr": "Google;ServiceNow", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2eBx1b9C4y", "title": "Detecting Deepfakes Without Seeing Any", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deepfake attacks, malicious manipulation of media containing people, are a serious concern for society. Conventional deepfake detection methods train supervised classifiers to distinguish real media from previously encountered deepfakes. Such techniques can only detect deepfakes similar to those previously seen, but not zero-day (previously unseen) attack types. As current deepfake generation techniques are changing at a breathtaking pace, new attack types are proposed frequently, making this a major issue. Our main observations are that: i) in many effective deepfake attacks, the fake media must be accompanied by false facts i.e. claims about the identity, speech, motion, or appearance of the person. For instance, when impersonating Obama, the attacker explicitly or implicitly claims that the fake media show Obama; ii) current generative techniques cannot perfectly synthesize the false facts claimed by the attacker. We therefore introduce the concept of \u201cfact checking\u201d, adapted from fake news detection, for detecting zero-day deepfake attacks. Fact checking verifies that the claimed facts (e.g. identity is Obama), agree with the observed media (e.g. is the face really Obama\u2019s?), and thus can differentiate between real and fake media. Consequently, we introduce FACTOR, a practical recipe for deepfake fact checking and demonstrate its power in critical attack settings: face swapping and audio-visual synthesis. Although it is training-free, relies exclusively on off-the-shelf features, is very easy to implement, and does not see any deepfakes, it achieves better than state-of-the-art accuracy.", "keywords": "deepfake detection", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/9d0f343ff294b32e2d6bbeef144963c75cc0ad29.zip", "author": "Tal Reiss;Bar Cavia;Yedid Hoshen", "authorids": "~Tal_Reiss1;~Bar_Cavia1;~Yedid_Hoshen3", "gender": "M;M;M", "homepage": ";;https://www.cs.huji.ac.il/~ydidh/", "dblp": "276/6114;;136/0280", "google_scholar": "sgMIT6EAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.il/citations?user=6y1-qS4AAAAJ", "orcid": ";;", "linkedin": ";bar-cavia;", "or_profile": "~Tal_Reiss1;~Bar_Cavia1;~Yedid_Hoshen3", "aff": "Hebrew University of Jerusalem;Hebrew University, Hebrew University of Jerusalem;Google", "aff_domain": "huji.ac.il;cs.huji.ac.il;google.com", "position": "PhD student;Undergrad student;Researcher", "bibtex": "@misc{\nreiss2024detecting,\ntitle={Detecting Deepfakes Without Seeing Any},\nauthor={Tal Reiss and Bar Cavia and Yedid Hoshen},\nyear={2024},\nurl={https://openreview.net/forum?id=2eBx1b9C4y}\n}", "github": "", "project": "", "reviewers": "y1v2;pkUE;cJQS", "site": "https://openreview.net/forum?id=2eBx1b9C4y", "pdf_size": 3296304, "rating": "3;5;8", "confidence": "4;4;5", "soundness": "1;2;3", "contribution": "2;2;3", "presentation": "1;3;3", "wc_summary": "88;149;149", "wc_strengths": "16;143;107", "wc_weaknesses": "329;455;119", "wc_questions": "6;154;101", "wc_review": "439;901;476", "wc_reply_reviewers": "553;0;0", "wc_reply_authors": "1150;980;415", "reply_reviewers": "1;0;0", "reply_authors": "4;3;1", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 128.66666666666666, 28.75567576825293 ], "wc_strengths_avg": [ 88.66666666666667, 53.443635937520405 ], "wc_weaknesses_avg": [ 301.0, 138.59292911256333 ], "wc_questions_avg": [ 87.0, 61.22635598062869 ], "wc_review_avg": [ 605.3333333333334, 209.612870682016 ], "wc_reply_reviewers_avg": [ 184.33333333333334, 260.6866999974405 ], "wc_reply_authors_avg": [ 848.3333333333334, 314.1744030877683 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9176629354822472, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2813304515057723632&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Hebrew University of Jerusalem;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.huji.ac.il;https://www.google.com", "aff_unique_abbr": "HUJI;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Jerusalem;;Mountain View", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Israel;United States" }, { "id": "2eG9w9CeSY", "title": "EventCLIP: Adapting CLIP for Event-based Object Recognition", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent advances in zero-shot and few-shot classification heavily rely on the success of pre-trained vision-language models (VLMs) such as CLIP.\nDue to a shortage of large-scale datasets, training such models for event camera data remains infeasible.\nThus, adapting existing models across modalities is an important research challenge.\nIn this work, we introduce EventCLIP, a novel approach that utilizes CLIP for zero-shot and few-shot event-based object recognition.\nWe first generalize CLIP's image encoder to event data by converting raw events to 2D grid-based representations.\nTo further enhance performance, we propose a feature adapter to aggregate temporal information over event frames and refine text embeddings to better align with the visual inputs.\nWe evaluate EventCLIP on N-Caltech, N-Cars, and N-ImageNet datasets, achieving state-of-the-art few-shot performance.\nWhen fine-tuned on the entire dataset, our method outperforms all existing event classifiers.\nMoreover, we explore practical applications of EventCLIP including robust event classification and label-free event recognition, where our approach surpasses previous baselines designed specifically for these tasks.", "keywords": "event-based vision;CLIP;few-shot learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Ziyi Wu;Xudong Liu;Igor Gilitschenski", "authorids": "~Ziyi_Wu1;~Xudong_Liu10;~Igor_Gilitschenski1", "gender": "M;M;M", "homepage": "https://wuziyi616.github.io/;;https://www.gilitschenski.org/igor", "dblp": "217/8678;;129/1281", "google_scholar": "iopH6wIAAAAJ;VK2CEbgAAAAJ;Nuw1Y4oAAAAJ", "orcid": "0000-0002-8247-5872;0000-0002-5286-4826;", "linkedin": ";xudong-frank-liu-566513198/;igorgilitschenski/", "or_profile": "~Ziyi_Wu1;~Xudong_Liu10;~Igor_Gilitschenski1", "aff": "Google;University of Toronto;University of Toronto", "aff_domain": "google.com;utoronto.ca;toronto.edu", "position": "Intern;MS student;Assistant Professor", "bibtex": "@misc{\nwu2024eventclip,\ntitle={Event{CLIP}: Adapting {CLIP} for Event-based Object Recognition},\nauthor={Ziyi Wu and Xudong Liu and Igor Gilitschenski},\nyear={2024},\nurl={https://openreview.net/forum?id=2eG9w9CeSY}\n}", "github": "", "project": "", "reviewers": "rcqs;KXch;rCwv;RBgA;FLiL;4a2f", "site": "https://openreview.net/forum?id=2eG9w9CeSY", "pdf_size": 3998324, "rating": "3;3;3;3;5;6", "confidence": "4;4;5;3;2;4", "soundness": "3;2;3;3;3;3", "contribution": "2;1;2;3;2;3", "presentation": "3;1;2;3;3;3", "wc_summary": "160;74;49;66;67;42", "wc_strengths": "43;31;44;28;55;85", "wc_weaknesses": "123;129;163;47;174;55", "wc_questions": "70;268;16;13;80;46", "wc_review": "396;502;272;154;376;228", "wc_reply_reviewers": "0;0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;0;0;0;0;0", "reply_authors": "0;0;0;0;0;0", "rating_avg": [ 3.8333333333333335, 1.2133516482134197 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "contribution_avg": [ 2.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 2.5, 0.7637626158259734 ], "wc_summary_avg": [ 76.33333333333333, 38.99857547255911 ], "wc_strengths_avg": [ 47.666666666666664, 18.91795149821695 ], "wc_weaknesses_avg": [ 115.16666666666667, 48.76616541095772 ], "wc_questions_avg": [ 82.16666666666667, 86.75332206255212 ], "wc_review_avg": [ 321.3333333333333, 115.72188883506679 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.33995005182504245, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8551387232150929775&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Google;University of Toronto", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.utoronto.ca", "aff_unique_abbr": "Google;U of T", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Canada" }, { "id": "2eIembMRQJ", "title": "Active Teacher Selection for Reinforcement Learning from Human Feedback", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reinforcement learning from human feedback (RLHF) enables machine learning systems to learn objectives from human feedback. A core limitation of these systems is their assumption that all feedback comes from a single human teacher, despite querying a range of distinct teachers. We propose the Hidden Utility Bandit (HUB) framework to model differences in teacher rationality, expertise, and costliness, formalizing the problem of learning from multiple teachers. We develop a variety of solution algorithms and apply them to two real-world domains: paper recommendation systems and COVID-19 vaccine testing. We find that the Active Teacher Selection (ATS) algorithm outperforms baseline algorithms by actively selecting when and which teacher to query. The HUB framework and ATS algorithm demonstrate the importance of leveraging differences between teachers to learn accurate reward models, facilitating future research on active teacher selection for robust reward modeling.", "keywords": "reward modeling;preference learning;active learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/6fe8d4fb1d66fb183f9cec01091d7878bf5e8524.pdf", "author": "Rachel Freedman;Justin Svegliato;Kyle Hollins Wray;Stuart Russell", "authorids": "~Rachel_Freedman1;~Justin_Svegliato2;~Kyle_Hollins_Wray1;~Stuart_Russell1", "gender": "F;M;;M", "homepage": "https://rachelfreedman.github.io/;https://www.justinsvegliato.com/;;https://people.eecs.berkeley.edu/~russell/", "dblp": "218/7198;133/5067.html;;", "google_scholar": "Mj1fmhsAAAAJ;3Orv6wUAAAAJ;;https://scholar.google.com.tw/citations?user=KJGrjCAAAAAJ", "orcid": "0000-0003-3299-4313;;;", "linkedin": "rachelalexfreedman/;;;", "or_profile": "~Rachel_Freedman1;~Justin_Svegliato2;~Kyle_Hollins_Wray1;~Stuart_Russell1", "aff": "University of California, Berkeley;Microsoft;;University of California, Berkeley", "aff_domain": "berkeley.edu;microsoft.com;;berkeley.edu", "position": "PhD student;Senior Research Scientist;;Full Professor", "bibtex": "@misc{\nfreedman2024active,\ntitle={Active Teacher Selection for Reinforcement Learning from Human Feedback},\nauthor={Rachel Freedman and Justin Svegliato and Kyle Hollins Wray and Stuart Russell},\nyear={2024},\nurl={https://openreview.net/forum?id=2eIembMRQJ}\n}", "github": "", "project": "", "reviewers": "eqU7;9GWq;szYk;WRYj", "site": "https://openreview.net/forum?id=2eIembMRQJ", "pdf_size": 4152774, "rating": "5;5;5;6", "confidence": "4;4;5;3", "soundness": "3;2;3;3", "contribution": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "84;71;79;70", "wc_strengths": "81;107;42;186", "wc_weaknesses": "560;102;183;271", "wc_questions": "42;6;61;49", "wc_review": "767;286;365;576", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "697;238;378;278", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.0, 5.787918451395113 ], "wc_strengths_avg": [ 104.0, 52.69250421075089 ], "wc_weaknesses_avg": [ 279.0, 172.8944764878277 ], "wc_questions_avg": [ 39.5, 20.5 ], "wc_review_avg": [ 498.5, 187.80109158362205 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 397.75, 180.13935577768675 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7957453272223164376&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.berkeley.edu;https://www.microsoft.com", "aff_unique_abbr": "UC Berkeley;Microsoft", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "2fSyBPBfBs", "title": "Bilevel Optimization without Lower-Level Strong Convexity from the Hyper-Objective Perspective", "track": "main", "status": "Reject", "tldr": "", "abstract": "Bilevel optimization reveals the inner structure of otherwise oblique optimization problems, such as hyperparameter tuning, neural architecture search, and meta-learning. A common goal in bilevel optimization is to find stationary points of the hyper-objective function. \nAlthough this hyper-objective approach is widely used, its theoretical properties have not been thoroughly investigated in cases where the lower-level functions lack strong convexity. \nThis work takes a step forward when the typical lower-level strong convexity assumption is absent.\nOur hardness results show that bilevel optimization for general convex lower-level functions is intractable to solve. \nWe then identify several regularity conditions of the lower-level\nproblems that can provably confer tractability.\nUnder these conditions, we propose the Inexact Gradient-Free Method (IGFM), which uses the Switching Gradient Method (SGM) as an efficient sub-routine, to find an approximate stationary point of the hyper-objective in polynomial time.", "keywords": "Bilevel Optimization", "primary_area": "learning theory", "supplementary_material": "/attachment/c4595614af34254d30a971187804019f05351e45.pdf", "author": "Lesi Chen;Jing Xu;Jingzhao Zhang", "authorids": "~Lesi_Chen1;~Jing_Xu4;~Jingzhao_Zhang2", "gender": "M;M;M", "homepage": "https://truenobility303.github.io/;https://jingxuthu.github.io;https://sites.google.com/view/jingzhao/home", "dblp": "326/5433;07/1951-27;220/5559", "google_scholar": "ynGzhugAAAAJ;jlrroGQAAAAJ;8NudxYsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Lesi_Chen1;~Jing_Xu4;~Jingzhao_Zhang2", "aff": "Tsinghua Univeristy;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;thu.edu.cn;mail.tsinghua.edu.cn", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nchen2024bilevel,\ntitle={Bilevel Optimization without Lower-Level Strong Convexity from the Hyper-Objective Perspective},\nauthor={Lesi Chen and Jing Xu and Jingzhao Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=2fSyBPBfBs}\n}", "github": "", "project": "", "reviewers": "f4Tk;y92G;ShQk;V1pU;9BLf;Z4XB", "site": "https://openreview.net/forum?id=2fSyBPBfBs", "pdf_size": 291045, "rating": "3;3;3;5;5;6", "confidence": "2;3;3;3;3;3", "soundness": "2;2;2;3;3;2", "contribution": "2;2;2;2;3;3", "presentation": "2;3;2;3;3;3", "wc_summary": "16;80;44;103;48;60", "wc_strengths": "23;69;22;169;32;56", "wc_weaknesses": "83;217;63;315;135;201", "wc_questions": "7;370;813;4;37;33", "wc_review": "129;736;942;591;252;350", "wc_reply_reviewers": "0;0;57;0;0;0", "wc_reply_authors": "203;349;323;162;276;87", "reply_reviewers": "0;0;1;0;0;0", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 4.166666666666667, 1.2133516482134197 ], "confidence_avg": [ 2.8333333333333335, 0.3726779962499649 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 58.5, 27.602838018339103 ], "wc_strengths_avg": [ 61.833333333333336, 50.91632569443932 ], "wc_weaknesses_avg": [ 169.0, 86.0542464572977 ], "wc_questions_avg": [ 210.66666666666666, 298.3592167542713 ], "wc_review_avg": [ 500.0, 283.1330664781726 ], "wc_reply_reviewers_avg": [ 9.5, 21.242645786248 ], "wc_reply_authors_avg": [ 233.33333333333334, 91.88882171165085 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.43000658178376594, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5733103613312067356&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "2gMwe9Duc4", "title": "Neuroexplicit Diffusion Models for Inpainting of Optical Flow Fields", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deep learning has revolutionized the field of computer vision by introducing large scale neural networks with millions of parameters. \nTraining these networks requires massive datasets and leads to intransparent models that can fail to generalize.\nAt the other extreme, models designed from partial differential equations (PDEs) embed specialized domain knowledge into mathematical equations and usually rely on few manually chosen hyperparameters.\nThis makes them transparent by construction and if designed and calibrated carefully, they can generalize well to unseen scenarios. In this paper, we show how to bring model- and data-driven approaches together by combining the explicit PDE-based approaches with convolutional neural networks to obtain the best of both worlds. \nWe illustrate a joint architecture for the task of inpainting optical flow fields and show that the combination of model- and data-driven modeling leads to an effective architecture.\nOur model outperforms both fully explicit and fully data-driven baselines in terms of reconstruction quality, robustness and amount of required training data. \nAveraging the endpoint error across different mask densities, our method outperforms the explicit baseline by $27.12$%, the GAN baseline by $46.72$% and the Probabilisitic Diffusion baseline by $42.38$%.", "keywords": "Deep Learning;Diffusion;Inpainting;Optical Flow;Neuroexplicit", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Tom Fischer;Pascal Peter;Joachim Weickert;Eddy Ilg", "authorids": "~Tom_Fischer1;~Pascal_Peter1;~Joachim_Weickert1;~Eddy_Ilg3", "gender": "M;M;M;M", "homepage": "https://cvmp.cs.uni-saarland.de/people/#tom-fischer;https://www.mia.uni-saarland.de/peter/index.shtml;https://www.mia.uni-saarland.de/weickert/index.shtml;https://www.utn.de/departments/department-engineering/cvmp-lab/", "dblp": ";134/3090;w/JoachimWeickert.html;151/9307", "google_scholar": "idj3nF4AAAAJ;y4TcrHUAAAAJ;IWwCuGAAAAAJ;MYvSvGsAAAAJ", "orcid": "0009-0009-6776-2767;;;", "linkedin": "https://linkedin.com/in/tom-fischer-6209a2239;pascal-peter/;;eddy-ilg/", "or_profile": "~Tom_Fischer1;~Pascal_Peter1;~Joachim_Weickert1;~Eddy_Ilg3", "aff": "Universit\u00e4t des Saarlandes;Universit\u00e4t des Saarlandes;Universit\u00e4t des Saarlandes;Universit\u00e4t des Saarlandes", "aff_domain": "uni-saarland.de;uni-saarland.de;uni-saarland.de;uni-saarland.de", "position": "PhD student;Lecturer;Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024neuroexplicit,\ntitle={Neuroexplicit Diffusion Models for Inpainting of Optical Flow Fields},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=2gMwe9Duc4}\n}", "github": "", "project": "", "reviewers": "cyeS;3rtj;BStA", "site": "https://openreview.net/forum?id=2gMwe9Duc4", "pdf_size": 6585006, "rating": "3;5;6", "confidence": "3;3;4", "soundness": "2;3;4", "contribution": "2;3;4", "presentation": "2;3;4", "wc_summary": "39;66;65", "wc_strengths": "51;27;113", "wc_weaknesses": "187;48;141", "wc_questions": "14;286;7", "wc_review": "291;427;326", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "621;1053;618", "reply_reviewers": "0;0;0", "reply_authors": "1;2;1", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 56.666666666666664, 12.498888839501783 ], "wc_strengths_avg": [ 63.666666666666664, 36.23380864453651 ], "wc_weaknesses_avg": [ 125.33333333333333, 57.81772123569805 ], "wc_questions_avg": [ 102.33333333333333, 129.90338290009578 ], "wc_review_avg": [ 348.0, 57.659922534345 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 764.0, 204.35752983435674 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UscZFzNJEpgJ:scholar.google.com/&scioq=Neuroexplicit+Diffusion+Models+for+Inpainting+of+Optical+Flow+Fields&hl=en&as_sdt=0,5", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Universit\u00e4t des Saarlandes", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-saarland.de", "aff_unique_abbr": "UDS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "2gwo9cjOEz", "title": "Neural Tangent Kernels Motivate Graph Neural Networks with Cross-Covariance Graphs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural tangent kernels (NTKs) provide a theoretical regime to analyze the learning and generalization behavior of over-parametrized neural networks. For a supervised learning task, the association between the eigenvectors of the NTK kernel and given data (a concept referred to as \\emph{alignment} in this paper) can govern the rate of convergence of gradient descent, as well as generalization to unseen data. Building upon this concept, we investigate NTKs and alignment in the context of graph neural networks (GNNs), where our analysis reveals that optimizing alignment translates to optimizing the graph representation or the graph shift operator in a GNN. Our results further establish the theoretical guarantees on the optimality of the alignment for a two-layer GNN and these guarantees are characterized by the graph shift operator being a function of the \\emph{cross-covariance} between the input and the output data. The theoretical insights drawn from the analysis of NTKs are validated by our experiments focused on a multi-variate time series prediction task for a publicly available dataset. Specifically, they demonstrate that GNNs with cross-covariance as the graph shift operator indeed outperform those that operate on the covariance matrix from only the input data.", "keywords": "Neural Tangent Kernel;Graph Neural Networks;Cross-covariance;Convergence;Generalization", "primary_area": "learning theory", "supplementary_material": "/attachment/29a863ab7cf611de377bbd77b52ecbccb44bd74a.zip", "author": "Shervin Khalafi;Saurabh Sihag;Alejandro Ribeiro", "authorids": "~Shervin_Khalafi1;~Saurabh_Sihag1;~Alejandro_Ribeiro1", "gender": "M;M;M", "homepage": "https://shervinkhalafi.github.io/;https://sihags.github.io/;https://alelab.seas.upenn.edu", "dblp": ";172/0928;32/15", "google_scholar": "rdfxlq8AAAAJ;T8D94-QAAAAJ;7mrPM4kAAAAJ", "orcid": ";;0000-0003-4230-9906", "linkedin": "shervin-khalafi-316b03221/;;", "or_profile": "~Shervin_Khalafi1;~Saurabh_Sihag1;~Alejandro_Ribeiro1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\nkhalafi2024neural,\ntitle={Neural Tangent Kernels Motivate Graph Neural Networks with Cross-Covariance Graphs},\nauthor={Shervin Khalafi and Saurabh Sihag and Alejandro Ribeiro},\nyear={2024},\nurl={https://openreview.net/forum?id=2gwo9cjOEz}\n}", "github": "", "project": "", "reviewers": "bUqG;TKzH;GUVA", "site": "https://openreview.net/forum?id=2gwo9cjOEz", "pdf_size": 1975007, "rating": "5;5;8", "confidence": "3;4;3", "soundness": "3;2;3", "contribution": "3;2;3", "presentation": "2;3;3", "wc_summary": "126;122;76", "wc_strengths": "81;50;64", "wc_weaknesses": "104;238;60", "wc_questions": "67;202;33", "wc_review": "378;612;233", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "510;944;524", "reply_reviewers": "0;0;0", "reply_authors": "1;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 108.0, 22.686266036231405 ], "wc_strengths_avg": [ 65.0, 12.675435561221029 ], "wc_weaknesses_avg": [ 134.0, 75.70116687784058 ], "wc_questions_avg": [ 100.66666666666667, 72.98553890250376 ], "wc_review_avg": [ 407.6666666666667, 156.141673560335 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 659.3333333333334, 201.37085743031992 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2cSkFMEzNBsJ:scholar.google.com/&scioq=Neural+Tangent+Kernels+Motivate+Graph+Neural+Networks+with+Cross-Covariance+Graphs&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "2h3m61LFWL", "title": "Value-Biased Maximum Likelihood Estimation for Model-based Reinforcement Learning in Discounted Linear MDPs", "track": "main", "status": "Reject", "tldr": "", "abstract": "We consider the infinite-horizon linear Markov Decision Processes (MDPs), where the transition probabilities of the dynamic model can be linearly parameterized with the help of a predefined low-dimensional feature mapping. While the existing regression-based approaches have been theoretically shown to achieve nearly-optimal regret, they are computationally rather inefficient due to the need for a large number of optimization runs in each time step, especially when the state and action spaces are large.\nTo address this issue, we propose to solve linear MDPs through the lens of Value-Biased Maximum Likelihood Estimation (VBMLE), which is a classic model-based exploration principle in the adaptive control literature for resolving the well-known closed-loop identification problem of Maximum Likelihood Estimation. We formally show that (i) VBMLE enjoys $\\widetilde{O}(d\\sqrt{T})$ regret, where $T$ is the time horizon and $d$ is the dimension of the model parameter, and (ii) VBMLE is computationally more efficient as it only requires solving one optimization problem in each time step. In our regret analysis, we offer a generic convergence result of MLE in linear MDPs through a novel supermartingale construct and uncover an interesting connection between linear MDPs and online learning, which could be of independent interest. Finally, the simulation results show that VBMLE significantly outperforms the benchmark method in terms of both empirical regret and computation time.", "keywords": "Reinforcement learning;model-based RL;regret analysis;linear MDPs", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/5c944c4277678c24783eb52fcb24426ed3a3196e.zip", "author": "Yu Heng Hung;Ping-Chun Hsieh;Akshay Mete;Panganamala Kumar", "authorids": "~Yu_Heng_Hung1;~Ping-Chun_Hsieh1;~Akshay_Mete1;~Panganamala_Kumar1", "gender": "M;M;M;M", "homepage": ";https://pinghsieh.github.io/;;https://cesg.tamu.edu/faculty/p-r-kumar/", "dblp": "276/1006.html;163/7352;228/0587;https://dblp.org/pers/k/Kumar:P=_R=.html", "google_scholar": "tiCRZiUAAAAJ;ix38JgoAAAAJ;evLF1akAAAAJ;qGUpTVwAAAAJ", "orcid": ";;;0000-0003-0389-5367", "linkedin": ";;;", "or_profile": "~Yu_Heng_Hung1;~Ping-Chun_Hsieh1;~Akshay_Mete1;~Panganamala_Kumar1", "aff": "National Yang-Ming Chiao Tung University;National Yang Ming Chiao Tung University;Texas A&M University - College Station;Texas A&M", "aff_domain": "nycu.edu.tw;nycu.edu.tw;tamu.edu;tamu.edu", "position": "PhD student;Associate Professor;PhD student;Full Professor", "bibtex": "@misc{\nhung2024valuebiased,\ntitle={Value-Biased Maximum Likelihood Estimation for Model-based Reinforcement Learning in Discounted Linear {MDP}s},\nauthor={Yu Heng Hung and Ping-Chun Hsieh and Akshay Mete and Panganamala Kumar},\nyear={2024},\nurl={https://openreview.net/forum?id=2h3m61LFWL}\n}", "github": "", "project": "", "reviewers": "p6f5;W2xc;WqoR;Jxj8", "site": "https://openreview.net/forum?id=2h3m61LFWL", "pdf_size": 408848, "rating": "3;3;5;6", "confidence": "4;5;4;3", "soundness": "3;3;3;3", "contribution": "2;3;2;3", "presentation": "1;3;3;3", "wc_summary": "63;72;47;69", "wc_strengths": "37;33;49;57", "wc_weaknesses": "441;481;340;172", "wc_questions": "60;3;53;179", "wc_review": "601;589;489;477", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2005;1538;1470;1146", "reply_reviewers": "0;0;0;0", "reply_authors": "4;3;3;2", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 62.75, 9.65336728815391 ], "wc_strengths_avg": [ 44.0, 9.539392014169456 ], "wc_weaknesses_avg": [ 358.5, 119.30737613408485 ], "wc_questions_avg": [ 73.75, 64.61955973232872 ], "wc_review_avg": [ 539.0, 56.32051136131489 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1539.75, 306.74286870276217 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lK9xfbtOcZUJ:scholar.google.com/&scioq=Value-Biased+Maximum+Likelihood+Estimation+for+Model-based+Reinforcement+Learning+in+Discounted+Linear+MDPs&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "National Yang-Ming Chiao Tung University;National Yang Ming Chiao Tung University;Texas A&M University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nycu.edu.tw;https://www.nycu.edu.tw;https://www.tamu.edu", "aff_unique_abbr": "NYCU;NYCU;TAMU", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Taiwan;College Station;", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "China;United States" }, { "id": "2iFBWoR7NH", "title": "Knowledge Manipulation in Language Models (Part B)", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Language models can store vast amounts of factual knowledge, but their ability to use this knowledge for logical reasoning remains questionable.\nThis paper explores a language model's ability to manipulate its stored knowledge during inference. We focus on four manipulation types: *retrieval* (e.g., \"What is person A's attribute X\"), *classification* (e.g., \"Is A's attribute X even or odd?\"), *comparison* (e.g., \"Is A greater than B in attribute X?\") and *inverse search* (e.g., \"Which person's attribute X equals T?\")\n\nWe observe that pre-trained language models like GPT2/3/4 excel in knowledge retrieval but struggle with simple classification or comparison tasks unless Chain of Thoughts (CoTs) are employed during both training and inference. They also perform poorly in inverse knowledge search, irrespective of the prompts. Our primary contribution is a synthetic dataset for a *controlled experiment* that confirms these inherent weaknesses: a language model cannot *efficiently* manipulate knowledge from pre-training data, even when such knowledge is perfectly stored and fully extractable in the models, and despite adequate instruct fine-tuning.", "keywords": "Interpretability;Transformers;Language Models;Linear Probing;Inner Working;Factual Knowledge", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "/attachment/9d7b6b037dba4956e53527b7c687c8574ee1164d.zip", "author": "Zeyuan Allen-Zhu;Yuanzhi Li", "authorids": "~Zeyuan_Allen-Zhu1;~Yuanzhi_Li1", "gender": ";M", "homepage": ";", "dblp": ";73/3628", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Zeyuan_Allen-Zhu1;~Yuanzhi_Li1", "aff": ";Carnegie Mellon University", "aff_domain": ";andrew.cmu.edu", "position": ";Assistant Professor", "bibtex": "@misc{\nallen-zhu2024knowledge,\ntitle={Knowledge Manipulation in Language Models (Part B)},\nauthor={Zeyuan Allen-Zhu and Yuanzhi Li},\nyear={2024},\nurl={https://openreview.net/forum?id=2iFBWoR7NH}\n}", "github": "", "project": "", "reviewers": "kTy8;otDf;BNdJ", "site": "https://openreview.net/forum?id=2iFBWoR7NH", "pdf_size": 696437, "rating": "3;6;8", "confidence": "5;4;4", "soundness": "1;3;4", "contribution": "1;3;3", "presentation": "3;3;4", "wc_summary": "76;123;122", "wc_strengths": "37;94;101", "wc_weaknesses": "132;166;43", "wc_questions": "103;77;1", "wc_review": "348;460;267", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 1.247219128924647 ], "contribution_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 107.0, 21.924111536540465 ], "wc_strengths_avg": [ 77.33333333333333, 28.662790435607548 ], "wc_weaknesses_avg": [ 113.66666666666667, 51.86092513208336 ], "wc_questions_avg": [ 60.333333333333336, 43.27688631231329 ], "wc_review_avg": [ 358.3333333333333, 79.12999150483687 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9176629354822472, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4PC5CJdToYwJ:scholar.google.com/&scioq=Knowledge+Manipulation+in+Language+Models+(Part+B)&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "BroGNet: Momentum-Conserving Graph Neural Stochastic Differential Equation for Learning Brownian Dynamics", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19540", "id": "2iGiSHmeAN", "author_site": "Suresh Suresh, Jayadeva Jayadeva, Sayan Ranu, N. M. Anoop Krishnan", "tldr": "", "abstract": "Neural networks (NNs) that exploit strong inductive biases based on physical laws and symmetries have shown remarkable success in learning the dynamics of physical systems directly from their trajectory. However, these works focus only on the systems that follow deterministic dynamics, such as Newtonian or Hamiltonian. Here, we propose a framework, namely Brownian graph neural networks (BroGNet), combining stochastic differential equations (SDEs) and GNNs to learn Brownian dynamics directly from the trajectory. We modify the architecture of BroGNet to enforce linear momentum conservation of the system, which, in turn, provides superior performance on learning dynamics as revealed empirically. We demonstrate this approach on several systems, namely, linear spring, linear spring with binary particle types, and non-linear spring systems, all following Brownian dynamics at finite temperatures. We show that BroGNet significantly outperforms proposed baselines across all the benchmarked Brownian systems. In addition, we demonstrate zero-shot generalizability of BroGNet to simulate unseen system sizes that are two orders of magnitude larger and to different temperatures than those used during training. Finally, we show that BroGNet conserves the momentum of the system resulting in superior performance and data efficiency. Altogether, our study contributes to advancing the understanding of the intricate dynamics of Brownian motion and demonstrates the effectiveness of graph neural networks in modeling such complex systems.", "keywords": "Brownian dynamics;stochastic differential equation;graph neural network;scientific machine learning", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/3c5a269cc97726c258f8c59f70a719d96f7f1327.pdf", "author": "Suresh Bishnoi;Jayadeva Jayadeva;Sayan Ranu;N M Anoop Krishnan", "authorids": "~Suresh_Bishnoi1;~Jayadeva_Jayadeva1;~Sayan_Ranu2;~N_M_Anoop_Krishnan1", "gender": "M;M;M;M", "homepage": "https://web.iitd.ac.in/~srz208500/;;https://www.cse.iitd.ac.in/~sayan/index.html;", "dblp": "329/6194;58/4288;38/768;", "google_scholar": "Wy6q2QwAAAAJ;;K4w5qYUAAAAJ;https://scholar.google.co.in/citations?user=fGnjHcEAAAAJ", "orcid": ";;0000-0003-4147-9372;0000-0003-1500-4947", "linkedin": "sureshb1999/;;;", "or_profile": "~Suresh_Bishnoi1;~Jayadeva_Jayadeva1;~Sayan_Ranu2;~N_M_Anoop_Krishnan1", "aff": "Indian Institute of Technology Delhi;Indian Institute of Technology Delhi;Indian Institute of Technology Delhi;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;iitd.ac.in;iitd.ac.in", "position": "PhD student;Full Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nbishnoi2024brognet,\ntitle={Bro{GN}et: Momentum-Conserving Graph Neural Stochastic Differential Equation for Learning Brownian Dynamics},\nauthor={Suresh Bishnoi and Jayadeva Jayadeva and Sayan Ranu and N M Anoop Krishnan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2iGiSHmeAN}\n}", "github": "", "project": "", "reviewers": "Va4f;VaBU;KKvb;cdjq", "pdf_size": 1982791, "rating": "6;6;6;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "contribution": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "296;62;78;87", "wc_strengths": "12;51;19;213", "wc_weaknesses": "2;191;242;171", "wc_questions": "37;70;3;91", "wc_review": "347;374;342;562", "wc_reply_reviewers": "22;40;24;213", "wc_reply_authors": "938;737;1484;959", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;4;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 130.75, 95.82634032456838 ], "wc_strengths_avg": [ 73.75, 81.72935519138763 ], "wc_weaknesses_avg": [ 151.5, 90.11242977525353 ], "wc_questions_avg": [ 50.25, 33.38693606786942 ], "wc_review_avg": [ 406.25, 90.74242392618791 ], "wc_reply_reviewers_avg": [ 74.75, 80.12295239193324 ], "wc_reply_authors_avg": [ 1029.5, 276.3462502007219 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10400832838272360997&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=2iGiSHmeAN", "pdf": "https://openreview.net/pdf?id=2iGiSHmeAN", "email": "iitd.ac.in;iitd.ac.in;iitd.ac.in;iitd.ac.in", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Delhi", "aff_unique_dep": "", "aff_unique_url": "https://www.iitd.ac.in", "aff_unique_abbr": "IIT Delhi", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "title": "Deep SE(3)-Equivariant Geometric Reasoning for Precise Placement Tasks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19539", "id": "2inBuwTyL2", "author_site": "Ben Eisner, Yi Yang, Todor Davchev, Mel Vecerik, Jonathan Scholz, David Held", "tldr": "", "abstract": "Many robot manipulation tasks can be framed as geometric reasoning tasks, where an agent must be able to precisely manipulate an object into a position that satisfies the task from a set of initial conditions. Often, task success is defined based on the relationship between two objects - for instance, hanging a mug on a rack. In such cases, the solution should be equivariant to the initial position of the objects as well as the agent, and invariant to the pose of the camera. This poses a challenge for learning systems which attempt to solve this task by learning directly from high-dimensional demonstrations: the agent must learn to be both equivariant as well as precise, which can be challenging without any inductive biases about the problem. In this work, we propose a method for precise relative pose prediction which is provably SE(3)-equivariant, can be learned from only a few demonstrations, and can generalize across variations in a class of objects. We accomplish this by factoring the problem into learning an SE(3) invariant task-specific representation of the scene and then interpreting this representation with novel geometric reasoning layers which are provably SE(3) equivariant. We demonstrate that our method can yield substantially more precise placement predictions in simulated placement tasks than previous methods trained with the same amount of data, and can accurately represent relative placement relationships data collected from real-world demonstrations. Supplementary information and videos can be found at https://sites.google.com/view/reldist-iclr-2023.", "keywords": "Learning from Demonstration;Manipulation;3D Learning;SE(3) Equivariance", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Ben Eisner;Yi Yang;Todor Davchev;Mel Vecerik;Jonathan Scholz;David Held", "authorids": "~Ben_Eisner1;~Yi_Yang10;~Todor_Davchev1;~Mel_Vecerik1;~Jonathan_Scholz2;~David_Held1", "gender": "M;M;;;M;M", "homepage": ";https://yangyi02.github.io/;https://tdavchev.github.io/;https://sites.google.com/corp/view/2020-s3k/home;https://sites.google.com/site/jonathanscholz/;http://davheld.github.io/", "dblp": ";33/4854-7;241/7187;;;22/11147", "google_scholar": "RWe-v0UAAAAJ;-BO7TXUAAAAJ;h_q7XhoAAAAJ;;bwORIKIAAAAJ;0QtU-NsAAAAJ", "orcid": ";;0000-0002-0584-5163;;;", "linkedin": ";;;;jonathan-scholz-689aa34/;", "or_profile": "~Ben_Eisner1;~Yi_Yang10;~Todor_Davchev1;~Mel_Vecerik1;~Jonathan_Scholz2;~David_Held1", "aff": "Carnegie Mellon University;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Carnegie Mellon University", "aff_domain": "cmu.edu;deepmind.com;deepmind.com;deepmind.com;deepmind.com;cmu.edu", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\neisner2024deep,\ntitle={Deep {SE}(3)-Equivariant Geometric Reasoning for Precise Placement Tasks},\nauthor={Ben Eisner and Yi Yang and Todor Davchev and Mel Vecerik and Jonathan Scholz and David Held},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2inBuwTyL2}\n}", "github": "", "project": "", "reviewers": "Te9m;5kHb;5GvH", "pdf_size": 2076030, "rating": "6;6;6", "confidence": "4;4;3", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "3;2;3", "wc_summary": "129;50;65", "wc_strengths": "62;45;94", "wc_weaknesses": "102;98;165", "wc_questions": "117;11;56", "wc_review": "410;204;380", "wc_reply_reviewers": "66;147;0", "wc_reply_authors": "987;1125;886", "reply_reviewers": "2;2;0", "reply_authors": "3;3;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 81.33333333333333, 34.25719713513579 ], "wc_strengths_avg": [ 67.0, 20.314198646923455 ], "wc_weaknesses_avg": [ 121.66666666666667, 30.684777260973487 ], "wc_questions_avg": [ 61.333333333333336, 43.43833432452134 ], "wc_review_avg": [ 331.3333333333333, 90.86742479507653 ], "wc_reply_reviewers_avg": [ 71.0, 60.11655346075655 ], "wc_reply_authors_avg": [ 999.3333333333334, 97.96030942285871 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12691674680712641233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=2inBuwTyL2", "pdf": "https://openreview.net/pdf?id=2inBuwTyL2", "email": "cmu.edu;deepmind.com;deepmind.com;deepmind.com;deepmind.com;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.cmu.edu;https://deepmind.com", "aff_unique_abbr": "CMU;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "2int61IpaP", "title": "FutureDD: Planning in POMDP with Encoded Future Dynamics", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Partially observable Markov decision process (POMDP) is a powerful framework for modeling decision-making problems where agents do not have full access to environment states. In the realm of offline reinforcement learning (RL), agents need to extract policies on previously recorded decision-making datasets without directly interacting with environments. Due to the inherent partial observability of environments and the limited availability of offline data, agents must possess the capability to extract valuable insights from limited data, which can serve as crucial prior information for making informed decisions. Recent works have shown that deep generative models, particularly diffusion models, exhibit impressive performance in offline RL. However, most of these approaches mainly focus on fully observed environments while neglecting POMDPs, and heavily rely on history information for decision-making, disregarding the valuable prior information about the future that can be extracted from offline data. Having recognized this gap, we propose a novel framework $\\textit{FutureDD}$ to extract future prior. $\\textit{FutureDD}$ leverages an auxiliary prior model encoding future sub-trajectories to a latent variable, which serves as a compensation for directly modeling observations with a diffusion model. This enables $\\textit{FutureDD}$ to extract richer prior information from limited offline data for agents to predict potential future dynamics. The experimental results on a set of tasks demonstrate that in the context of POMDPs, $\\textit{FutureDD}$ provides a simple yet effective approach for agents to learn behaviours yielding higher returns.", "keywords": "Offline Reinforcement Learning;Partially Observable Markov Decision Process;Sequential Decision Making;Diffusion Models", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/52486c4a6445a8da7138ea1c0e021bee721cb3c7.zip", "author": "Yao Tang;Zhihui Xie;Tong Yu;Bokai Hu;Shuai Li", "authorids": "~Yao_Tang2;~Zhihui_Xie2;~Tong_Yu3;~Bokai_Hu2;~Shuai_Li3", "gender": "F;M;;M;F", "homepage": "https://yaotang23.github.io/;https://fffffarmer.github.io/;https://www.linkedin.com/in/tong-yu-42790744;;http://shuaili8.github.io", "dblp": ";31/3570-2;32/1593-1;;57/2281-10", "google_scholar": ";Jml8NvkAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ", "orcid": ";;0000-0002-5991-2050;;", "linkedin": ";;tong-yu-42790744;\u535a\u51ef-\u80e1-a26279292;", "or_profile": "~Yao_Tang2;~Zhihui_Xie2;~Tong_Yu3;~Bokai_Hu2;~Shuai_Li3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Adobe Research;University of California, San Diego;John Hopcroft Center, Shanghai Jiao Tong University", "aff_domain": "cs.sjtu.edu.cn;sjtu.edu.cn;adobe.com;ucsd.edu;sjtu.edu.cn", "position": "Undergrad student;MS student;Senior Research Scientist;MS student;Assistant Professor", "bibtex": "@misc{\ntang2024futuredd,\ntitle={Future{DD}: Planning in {POMDP} with Encoded Future Dynamics},\nauthor={Yao Tang and Zhihui Xie and Tong Yu and Bokai Hu and Shuai Li},\nyear={2024},\nurl={https://openreview.net/forum?id=2int61IpaP}\n}", "github": "", "project": "", "reviewers": "e8st;BsjN;bCks;sLyp", "site": "https://openreview.net/forum?id=2int61IpaP", "pdf_size": 341452, "rating": "3;3;5;5", "confidence": "2;5;4;4", "soundness": "2;3;3;3", "contribution": "1;2;2;2", "presentation": "1;2;3;2", "wc_summary": "63;91;54;242", "wc_strengths": "17;45;28;25", "wc_weaknesses": "199;107;292;285", "wc_questions": "76;28;29;152", "wc_review": "355;271;403;704", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 112.5, 76.00164471904539 ], "wc_strengths_avg": [ 28.75, 10.207227831296802 ], "wc_weaknesses_avg": [ 220.75, 75.19433156827714 ], "wc_questions_avg": [ 71.25, 50.49443038593464 ], "wc_review_avg": [ 433.25, 163.30091089764318 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.22941573387056177, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1EXxHaTR3wMJ:scholar.google.com/&scioq=FutureDD:+Planning+in+POMDP+with+Encoded+Future+Dynamics&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;Adobe;University of California, San Diego", "aff_unique_dep": ";Adobe Research;", "aff_unique_url": "https://www.sjtu.edu.cn;https://research.adobe.com;https://www.ucsd.edu", "aff_unique_abbr": "SJTU;Adobe;UCSD", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";San Diego;Shanghai", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "2kvDzdC5rh", "title": "IntentGPT: Few-Shot Intent Discovery with Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "In today's digitally driven world, dialogue systems play a pivotal role in enhancing user interactions, from customer service to virtual assistants. In these dialogues, it is important to identify user's goals automatically to resolve their needs promptly. This has necessitated the integration of models that perform Intent Detection. However, users' intents are diverse and dynamic, making it challenging to maintain a fixed set of predefined intents. As a result, a more practical approach is to develop a model capable of identifying new intents as they emerge. We address the challenge of Intent Discovery, an area that has drawn significant attention in recent research efforts. Existing methods need to train on a substantial amount of data for correctly identifying new intents, demanding significant human effort. To overcome this, we introduce IntentGPT, a novel method that efficiently prompts Large Language Models (LLMs) such as GPT-4 to effectively discover new intents with minimal labeled data. IntentGPT comprises an In-Context Prompt Generator, which generates informative prompts for In-Context Learning, an Intent Predictor for classifying and discovering user intents behind utterances, and a Semantic Few-Shot Sampler which leverages embedding similarities for selecting the closest examples from the labeled data. Our experiments show that IntentGPT outperforms previous methods that require extensive domain-specific data and fine-tuning, in popular benchmarks, including CLINC and BANKING.", "keywords": "intent discovery;intent detection;intent classification;open-set classification;in-contex learning;few-shot learning;large language models", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/4f79883c32a5f4b99a6256bbaa731c4512af88a0.zip", "author": "Juan A. Rodriguez;Nicholas Botzer;David Vazquez;Christopher Pal;Marco Pedersoli;Issam H. Laradji", "authorids": "~Juan_A._Rodriguez1;~Nicholas_Botzer1;~David_Vazquez1;~Christopher_Pal1;~Marco_Pedersoli1;~Issam_H._Laradji1", "gender": "M;M;;M;M;M", "homepage": ";http://www.david-vazquez.com;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ&hl=en&oi=ao;http://profs.etsmtl.ca/mpedersoli/;https://issamlaradji.github.io/;https://github.com/joanrod/ocr-vqgan", "dblp": ";94/8653;45/1217;81/4503;142/0043;276/6112", "google_scholar": "5rS0yCoAAAAJ;1jHvtfsAAAAJ;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ;aVfyPAoAAAAJ;https://scholar.google.ca/citations?user=8vRS7F0AAAAJ;https://scholar.google.es/citations?user=0selhb4AAAAJ", "orcid": ";0000-0002-2845-8158;;;;", "linkedin": ";https://www.linkedin.com/company/david-vazquez/;;;issam-laradji-67ba1a99/;", "or_profile": "~Nicholas_Botzer1;~David_Vazquez1;~Christopher_Pal1;~Marco_Pedersoli1;~Issam_H._Laradji1;~Juan_Rodriguez2", "aff": "University of Notre Dame;ServiceNow research;Polytechnique Montreal;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;ServiceNow;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec", "aff_domain": "nd.edu;servicenow.com;polymtl.ca;etsmtl.ca;servicenow.com;etsmtl.ca", "position": "PhD student;Researcher;Full Professor;Associate Professor;Researcher;PhD student", "bibtex": "@misc{\nrodriguez2024intentgpt,\ntitle={Intent{GPT}: Few-Shot Intent Discovery with Large Language Models},\nauthor={Juan A. Rodriguez and Nicholas Botzer and David Vazquez and Christopher Pal and Marco Pedersoli and Issam H. Laradji},\nyear={2024},\nurl={https://openreview.net/forum?id=2kvDzdC5rh}\n}", "github": "", "project": "", "reviewers": "kRkh;NWU5;vjHF;GqJc;G7hj", "site": "https://openreview.net/forum?id=2kvDzdC5rh", "pdf_size": 580546, "rating": "3;3;5;5;6", "confidence": "4;4;5;4;3", "soundness": "2;2;2;4;3", "contribution": "2;1;2;2;3", "presentation": "3;4;2;3;2", "wc_summary": "20;51;42;64;126", "wc_strengths": "98;117;22;39;128", "wc_weaknesses": "111;776;43;309;226", "wc_questions": "2;29;17;57;77", "wc_review": "231;973;124;469;557", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1431;465;791;1330;667", "reply_reviewers": "0;0;0;0;0", "reply_authors": "3;1;2;2;1", "rating_avg": [ 4.4, 1.2 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.8 ], "contribution_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 60.6, 35.7077022503549 ], "wc_strengths_avg": [ 80.8, 42.51776099467139 ], "wc_weaknesses_avg": [ 293.0, 258.32460200298385 ], "wc_questions_avg": [ 36.4, 27.155846515989886 ], "wc_review_avg": [ 470.8, 295.774508705534 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 936.8, 378.28158823818006 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2635231383473649, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12943960937829617865&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;1;3", "aff_unique_norm": "University of Notre Dame;ServiceNow;Polytechnique Montreal;Universit\u00e9 du Qu\u00e9bec", "aff_unique_dep": ";research;;", "aff_unique_url": "https://www.nd.edu;https://www.servicenow.com;https://www.polymtl.ca;https://www.etsmtl.ca", "aff_unique_abbr": "Notre Dame;ServiceNow;PolyMTL;ETS", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Montreal;\u00c9cole de technologie sup\u00e9rieure", "aff_country_unique_index": "0;0;1;1;0;1", "aff_country_unique": "United States;Canada" }, { "id": "2l7g7zwC4z", "title": "Embedding File Structure for Tabular File Preparation", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce the notion of file structure, the set of characters within a file's content that do not belong to data values.\nData preparation can be considered as a pipeline of heterogeneous steps with the common theme of wrangling the structure of a file to access its payload in a downstream task.\nWe claim that solving typical data preparation tasks benefits from an explicit representation of file structure.\nWe propose a novel approach for learning such a representation, which we call a structural embedding, using the raw file content as input.\nOur approach is based on a novel neural network architecture, composed of a transformer module and a convolutional module, trained in a self-supervised fashion on almost 1M public data files to learn structural embeddings.\nWe demonstrate the usefulness of structural embeddings in several steps of a data preparation pipeline: data loading, row classification, and column type annotation.\nFor these tasks, we show that our approach obtains performances comparable with state-of-the-art baselines on six real-world datasets, and, more importantly, we improve upon such baselines by combining them with the structural embeddings provided by our approach.", "keywords": "representation;tabular embedding;file structure;data preparation;table representation learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/4494d29648c5f2a4bcd456c387323fb43b49b440.zip", "author": "Gerardo Vitagliano;Mazhar Hameed;Alejandro Sierra M\u00fanera;Felix Naumann", "authorids": "~Gerardo_Vitagliano1;mazhar.hameed@hpi.de;~Alejandro_Sierra_M\u00fanera1;~Felix_Naumann1", "gender": "M;;;", "homepage": "https://vitaglianog.github.io/;;;", "dblp": "249/4023.html;;;n/FelixNaumann", "google_scholar": "zYBXv3sAAAAJ;;;", "orcid": "0000-0001-7782-2596;;;", "linkedin": "gerardo-vitagliano/;;;", "or_profile": "~Gerardo_Vitagliano1;mazhar.hameed@hpi.de;~Alejandro_Sierra_M\u00fanera1;~Felix_Naumann1", "aff": "Computer Science and Artificial Intelligence Laboratory, Electrical Engineering & Computer Science;;;Hasso Plattner Institute", "aff_domain": "csail.mit.edu;;;hpi.de", "position": "Postdoc;;;Full Professor", "bibtex": "@misc{\nvitagliano2024embedding,\ntitle={Embedding File Structure for Tabular File Preparation},\nauthor={Gerardo Vitagliano and Mazhar Hameed and Alejandro Sierra M{\\'u}nera and Felix Naumann},\nyear={2024},\nurl={https://openreview.net/forum?id=2l7g7zwC4z}\n}", "github": "", "project": "", "reviewers": "ffWZ;Wgj4;9VNy", "site": "https://openreview.net/forum?id=2l7g7zwC4z", "pdf_size": 2520129, "rating": "3;3;5", "confidence": "3;3;3", "soundness": "2;2;3", "contribution": "2;1;2", "presentation": "2;3;3", "wc_summary": "36;251;30", "wc_strengths": "35;52;29", "wc_weaknesses": "309;131;104", "wc_questions": "8;58;14", "wc_review": "388;492;177", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "476;645;278", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 105.66666666666667, 102.7953738691041 ], "wc_strengths_avg": [ 38.666666666666664, 9.741092797468305 ], "wc_weaknesses_avg": [ 181.33333333333334, 90.94442747573316 ], "wc_questions_avg": [ 26.666666666666668, 22.29100466306732 ], "wc_review_avg": [ 352.3333333333333, 131.04791320564993 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 466.3333333333333, 149.98296199531762 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mSuJ70-nehkJ:scholar.google.com/&scioq=Embedding+File+Structure+for+Tabular+File+Preparation&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Hasso Plattner Institute", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory;", "aff_unique_url": "https://www.csail.mit.edu;https://www.hpi.de", "aff_unique_abbr": "CSAIL;HPI", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Germany" }, { "title": "Instant3D: Fast Text-to-3D with Sparse-view Generation and Large Reconstruction Model", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19538", "id": "2lDQLiH1W4", "author_site": "Jiahao Li, Hao Tan, Kai Zhang, Zexiang Xu, Fujun Luan, Yinghao Xu, Yicong Hong, Kalyan Sunkavalli, Greg Shakhnarovich, Sai Bi", "tldr": "", "abstract": "Text-to-3D with diffusion models has achieved remarkable progress in recent years. However, existing methods either rely on score distillation-based optimization which suffer from slow inference, low diversity and Janus problems, or are feed-forward methods that generate low-quality results due to the scarcity of 3D training data. In this paper, we propose Instant3D, a novel method that generates high-quality and diverse 3D assets from text prompts in a feed-forward manner. We adopt a two-stage paradigm, which first generates a sparse set of four structured and consistent views from text in one shot with a fine-tuned 2D text-to-image diffusion model, and then directly regresses the NeRF from the generated images with a novel transformer-based sparse-view reconstructor. Through extensive experiments, we demonstrate that our method can generate diverse 3D assets of high visual quality within 20 seconds, which is two orders of magnitude faster than previous optimization-based methods that can take 1 to 10 hours. Our project webpage is: https://jiahao.ai/instant3d/.", "keywords": "text-to-3d;generative models;diffusion models;3D reconstruction;3D generation;sparse-view reconstruction", "primary_area": "generative models", "supplementary_material": "/attachment/d95b8bde0e07536f9902a6ca7875eea06e659153.zip", "author": "Jiahao Li;Hao Tan;Kai Zhang;Zexiang Xu;Fujun Luan;Yinghao Xu;Yicong Hong;Kalyan Sunkavalli;Greg Shakhnarovich;Sai Bi", "authorids": "~Jiahao_Li2;~Hao_Tan1;~Kai_Zhang7;~Zexiang_Xu1;~Fujun_Luan2;~Yinghao_Xu1;~Yicong_Hong1;~Kalyan_Sunkavalli1;~Greg_Shakhnarovich1;~Sai_Bi1", "gender": ";M;M;M;M;M;M;M;M;M", "homepage": "https://jiahao.ai;http://www.cs.unc.edu/~airsplay/;https://kai-46.github.io/website/;https://cseweb.ucsd.edu/~zex014/;https://luanfujun.com/;https://justimyhxu.github.io/;http://www.yiconghong.me/;http://www.kalyans.org/;https://sai-bi.github.io/;http://ttic.edu/gregory/", "dblp": ";94/877-2;55/957-45;154/0366;183/9337;232/2482;262/3437;42/5978;165/9898;17/1926.html", "google_scholar": "w9jtLkIAAAAJ;OV1Y3FUAAAAJ;6B7FPMoAAAAJ;_RRIYvEAAAAJ;NLxrmYQAAAAJ;https://scholar.google.com/citations?hl=en;AerHOzUAAAAJ;j7uL6VEAAAAJ;-q4nE1kAAAAJ;https://scholar.google.com.tw/citations?user=YLOz1kgAAAAJ", "orcid": ";;;;;;0000-0002-5068-1508;;;", "linkedin": "jiahaoli95;hao-tan-23677180/;;;luanfujun/;;yicong-hong;;;", "or_profile": "~Jiahao_Li2;~Hao_Tan1;~Kai_Zhang7;~Zexiang_Xu1;~Fujun_Luan2;~Yinghao_Xu1;~Yicong_Hong1;~Kalyan_Sunkavalli1;~Sai_Bi1;~Gregory_Shakhnarovich2", "aff": "Toyota Technological Institute at Chicago;Adobe Systems;Adobe Systems;Adobe Research;Adobe Systems;Stanford University;Adobe Systems;Adobe Research;Adobe Systems;University of Chicago", "aff_domain": "ttic.edu;adobe.com;adobe.com;adobe.com;adobe.com;stanford.edu;adobe.com;adobe.com;adobe.com;uchicago.edu", "position": "PhD student;Research Scientist;Researcher;Researcher;Researcher;Postdoc;Researcher;Principal Scientist;Researcher;Professor, part time", "bibtex": "@inproceedings{\nli2024instantd,\ntitle={Instant3D: Fast Text-to-3D with Sparse-view Generation and Large Reconstruction Model},\nauthor={Jiahao Li and Hao Tan and Kai Zhang and Zexiang Xu and Fujun Luan and Yinghao Xu and Yicong Hong and Kalyan Sunkavalli and Greg Shakhnarovich and Sai Bi},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2lDQLiH1W4}\n}", "github": "", "project": "", "reviewers": "dCt6;wyA8;eZrX", "pdf_size": 44315184, "rating": "6;8;8", "confidence": "3;5;5", "soundness": "3;3;3", "contribution": "2;4;3", "presentation": "3;4;3", "wc_summary": "32;148;72", "wc_strengths": "25;150;235", "wc_weaknesses": "100;315;418", "wc_questions": "156;160;22", "wc_review": "313;773;747", "wc_reply_reviewers": "0;148;51", "wc_reply_authors": "737;907;1117", "reply_reviewers": "0;1;1", "reply_authors": "3;3;3", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 84.0, 48.11098280711657 ], "wc_strengths_avg": [ 136.66666666666666, 86.24899355290407 ], "wc_weaknesses_avg": [ 277.6666666666667, 132.47976784735428 ], "wc_questions_avg": [ 112.66666666666667, 64.1318087136866 ], "wc_review_avg": [ 611.0, 210.98499156733084 ], "wc_reply_reviewers_avg": [ 66.33333333333333, 61.38584708401621 ], "wc_reply_authors_avg": [ 920.3333333333334, 155.42057635833024 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 250, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8026239238112532104&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=2lDQLiH1W4", "pdf": "https://openreview.net/pdf?id=2lDQLiH1W4", "email": "ttic.edu;adobe.com;adobe.com;adobe.com;adobe.com;stanford.edu;adobe.com;adobe.com;adobe.com;uchicago.edu", "author_num": 10, "aff_unique_index": "0;1;1;1;1;2;1;1;1;3", "aff_unique_norm": "Toyota Technological Institute at Chicago;Adobe;Stanford University;University of Chicago", "aff_unique_dep": ";Adobe Systems Incorporated;;", "aff_unique_url": "https://www.tti-chicago.org;https://www.adobe.com;https://www.stanford.edu;https://www.uchicago.edu", "aff_unique_abbr": "TTI Chicago;Adobe;Stanford;UChicago", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Chicago;;Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2mDzzLWRHr", "title": "Comfetch: Federated Learning of Large Networks on Constrained Clients via Sketching", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated learning (FL) is a popular paradigm for private and collaborative model training on the edge. In centralized FL, the parameters of a global architecture (such as a deep neural network) are maintained and distributed by a central server/controller to clients who transmit model updates (gradients) back to the server based on local optimization. While many efforts have focused on reducing the communication complexity of gradient transmission, the vast majority of compression-based algorithms assume that each participating client is able to download and train the current and full set of parameters, which may not be a practical assumption depending on the resource constraints of smaller clients such as mobile devices. In this work, we propose a simple yet effective novel algorithm Comfetch, which allows clients to train large networks using reduced representations of the global architecture via the count sketch, which reduces local computational and memory costs along with bi-directional communication complexity. We provide a nonconvex convergence guarantee and experimentally demonstrate that it is possible to learn large models, such as a deep convolutional network, through federated training on their sketched counterparts. The resulting global models exhibit competitive test accuracy over CIFAR10/100 classification when compared against un-compressed model training.", "keywords": "federated learning;compression;sketch", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/d2141f11f1f8ea4517950d066a9a07c786e6e54c.zip", "author": "Tahseen Rabbani;Brandon Yushan Feng;Marco Bornstein;Yifan Yang;Kyle Rui Sang;Arjun Rajkumar;Amitabh Varshney;Furong Huang", "authorids": "~Tahseen_Rabbani1;~Brandon_Yushan_Feng1;~Marco_Bornstein1;~Yifan_Yang5;~Kyle_Rui_Sang1;~Arjun_Rajkumar1;~Amitabh_Varshney1;~Furong_Huang1", "gender": "M;;M;M;M;M;M;F", "homepage": "https://www.cs.umd.edu/people/trabbani;https://brandonyfeng.github.io/;https://marcobornstein.github.io;https://yifanyang.dev/;;;http://www.cs.umd.edu/~varshney;https://furong-huang.com", "dblp": "280/2362;284/2193;332/0431;;;;;72/8513", "google_scholar": ";VCeYRsYAAAAJ;;Wn-GGXkAAAAJ;;qpMkWPEAAAAJ;b053OdAAAAAJ;13yyuCcAAAAJ", "orcid": ";0000-0001-7003-9128;;;;;0000-0002-9873-2212;", "linkedin": ";;;;kyle-sang/;arjun-rajkumar-1351a9145/;amitabh-varshney-4556961b/;", "or_profile": "~Tahseen_Rabbani1;~Brandon_Yushan_Feng1;~Marco_Bornstein1;~Yifan_Yang5;~Kyle_Rui_Sang1;~Arjun_Rajkumar1;~Amitabh_Varshney1;~Furong_Huang1", "aff": "University of Maryland, College Park;Massachusetts Institute of Technology;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Adobe Systems;University of Maryland, College Park;University of Maryland", "aff_domain": "umd.edu;mit.edu;umd.edu;umd.edu;umd.edu;adobe.com;umd.edu;cs.umd.edu", "position": "PhD student;Postdoc;PhD student;PhD student;MS student;Engineer;Full Professor;Assistant Professor", "bibtex": "@misc{\nrabbani2024comfetch,\ntitle={Comfetch: Federated Learning of Large Networks on Constrained Clients via Sketching},\nauthor={Tahseen Rabbani and Brandon Yushan Feng and Marco Bornstein and Yifan Yang and Kyle Rui Sang and Arjun Rajkumar and Amitabh Varshney and Furong Huang},\nyear={2024},\nurl={https://openreview.net/forum?id=2mDzzLWRHr}\n}", "github": "", "project": "", "reviewers": "qrJM;p5E1;W1rB;cTtw", "site": "https://openreview.net/forum?id=2mDzzLWRHr", "pdf_size": 770448, "rating": "3;3;5;6", "confidence": "4;3;4;4", "soundness": "2;2;2;4", "contribution": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "92;66;65;68", "wc_strengths": "143;227;23;36", "wc_weaknesses": "557;2029;215;76", "wc_questions": "1;41;2;127", "wc_review": "793;2363;305;307", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 11.166355717063647 ], "wc_strengths_avg": [ 107.25, 83.35578864122155 ], "wc_weaknesses_avg": [ 719.25, 776.1779354632545 ], "wc_questions_avg": [ 42.75, 51.24634133282102 ], "wc_review_avg": [ 942.0, 844.1617143652038 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12970504062840043210&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;0;2;0;0", "aff_unique_norm": "University of Maryland;Massachusetts Institute of Technology;Adobe", "aff_unique_dep": ";;Adobe Systems Incorporated", "aff_unique_url": "https://www/umd.edu;https://web.mit.edu;https://www.adobe.com", "aff_unique_abbr": "UMD;MIT;Adobe", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Ferret: Refer and Ground Anything Anywhere at Any Granularity", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19537", "id": "2msbbX3ydD", "author_site": "Haoxuan You, Haotian Zhang, Zhe Gan, Xianzhi Du, Bowen Zhang, Zirui Wang, Liangliang Cao, Shih-Fu Chang, Yinfei Yang", "tldr": "", "abstract": "We introduce Ferret, a new Multimodal Large Language Model (MLLM) capable of understanding spatial referring of any shape or granularity within an image and accurately grounding open-vocabulary descriptions. To unify referring and grounding in the LLM paradigm, Ferret employs a novel and powerful hybrid region representation that integrates discrete coordinates and continuous features jointly to represent a region in the image. To extract the continuous features of versatile regions, we propose a spatial-aware visual sampler, adept at handling varying sparsity across different shapes. Consequently, Ferret can accept diverse region inputs, such as points, bounding boxes, and free-form shapes. To bolster the desired capability of Ferret, we curate GRIT, a comprehensive refer-and-ground instruction tuning dataset including 1.1M samples that contain rich hierarchical spatial knowledge, with an additional 130K hard negative data to promote model robustness. The resulting model not only achieves superior performance in classical referring and grounding tasks, but also greatly outperforms existing MLLMs in region-based and localization-demanded multimodal chatting. Our evaluations also reveal a significantly improved capability of describing image details and a remarkable alleviation in object hallucination.", "keywords": "Ferret;Multimodal Large Language Model;Referring;Grounding", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Haoxuan You;Haotian Zhang;Zhe Gan;Xianzhi Du;Bowen Zhang;Zirui Wang;Liangliang Cao;Shih-Fu Chang;Yinfei Yang", "authorids": "~Haoxuan_You1;~Haotian_Zhang3;~Zhe_Gan1;~Xianzhi_Du4;~Bowen_Zhang2;~Zirui_Wang1;~Liangliang_Cao1;~Shih-Fu_Chang3;~Yinfei_Yang1", "gender": "M;M;M;M;M;M;M;M;", "homepage": "https://hxyou.github.io/;https://haotian-zhang.github.io/;http://zhegan27.github.io/;;https://zbwglory.github.io;;http://llcao.net;http://www.ee.columbia.edu/~sfchang/;", "dblp": "210/2628;;41/7845;;85/7433-2;;95/6915;c/ShihFuChang;117/4082", "google_scholar": "BhysChMAAAAJ;1vz0kKUAAAAJ;E64XWyMAAAAJ;l1hP40AAAAAJ;nI3cKV8AAAAJ;GgD-B68AAAAJ;S-hBSfIAAAAJ;OMVTRscAAAAJ;kvDbu90AAAAJ", "orcid": ";0000-0001-6809-0426;;;;;;;", "linkedin": ";haotian-zhang-075508a6/;zhe-gan-a2229a78/;xianzhi-du-1b128934/;;;liangliangcao/;;", "or_profile": "~Haoxuan_You1;~Haotian_Zhang3;~Zhe_Gan1;~Xianzhi_Du4;~Bowen_Zhang2;~Zirui_Wang1;~Liangliang_Cao1;~Shih-Fu_Chang3;~Yinfei_Yang1", "aff": "Columbia University;Apple AI/ML;Apple;Apple;Apple;Apple AI/ML;Apple;Columbia University;Apple", "aff_domain": "columbia.edu;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;ee.columbia.edu;apple.com", "position": "PhD student;Researcher;Principal Researcher;Researcher;Research Scientist;Research Scientist;Principal Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nyou2024ferret,\ntitle={Ferret: Refer and Ground Anything Anywhere at Any Granularity},\nauthor={Haoxuan You and Haotian Zhang and Zhe Gan and Xianzhi Du and Bowen Zhang and Zirui Wang and Liangliang Cao and Shih-Fu Chang and Yinfei Yang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2msbbX3ydD}\n}", "github": "", "project": "", "reviewers": "dfTo;6Qgf;7dkr", "pdf_size": 27418370, "rating": "6;6;8", "confidence": "4;4;4", "soundness": "3;2;3", "contribution": "3;3;3", "presentation": "3;3;3", "wc_summary": "67;109;178", "wc_strengths": "53;82;152", "wc_weaknesses": "46;62;141", "wc_questions": "11;145;96", "wc_review": "177;398;567", "wc_reply_reviewers": "0;0;171", "wc_reply_authors": "624;1622;1545", "reply_reviewers": "0;0;2", "reply_authors": "3;5;4", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 118.0, 45.760244754590204 ], "wc_strengths_avg": [ 95.66666666666667, 41.55585264302597 ], "wc_weaknesses_avg": [ 83.0, 41.52910625894406 ], "wc_questions_avg": [ 84.0, 55.35943159631127 ], "wc_review_avg": [ 380.6666666666667, 159.68789002996508 ], "wc_reply_reviewers_avg": [ 57.0, 80.61017305526642 ], "wc_reply_authors_avg": [ 1263.6666666666667, 453.4036710139088 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 300, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9686123011748005154&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=2msbbX3ydD", "pdf": "https://openreview.net/pdf?id=2msbbX3ydD", "email": "columbia.edu;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;ee.columbia.edu;apple.com", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;1;0;1", "aff_unique_norm": "Columbia University;Apple", "aff_unique_dep": ";AI/ML", "aff_unique_url": "https://www.columbia.edu;https://www.apple.com", "aff_unique_abbr": "Columbia;Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2nD1SvxTZc", "title": "One-Versus-Others Attention: Scalable Multimodal Integration", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Multimodal learning models have become increasingly important as they surpass single-modality approaches on diverse tasks ranging from question-answering to autonomous driving. Despite the importance of multimodal learning, existing efforts focus on NLP applications, where the number of modalities is typically at most four (audio, video, text, images). However, data inputs in other domains, such as clinical medicine, may include X-rays, PET scans, MRIs, genetic screening, clinical notes, and more, creating a need for both efficient and accurate information fusion. Many state-of-the-art models rely on pairwise cross-attention or early fusion through self-attention, which do not scale well for applications with more than three modalities. The complexity per layer of computing attention in either paradigm is, at best, quadratic with the number of modalities, potentially requiring considerable computational resources. To address this, we propose a new domain-neutral attention mechanism, One-Versus-Others (OvO) attention, that scales linearly with the number of modalities, thus offering a significant reduction in computational complexity compared to existing multimodal attention methods. Using three diverse real-world datasets as well as an additional simulation experiment, we show that our method improves performance compared to popular fusion techniques while decreasing computation costs.", "keywords": "deep learning;multimodal learning;attention;clinical application", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/f30e7fce056b804efc434124d184a18ba9fa37c9.zip", "author": "Michal Golovanevsky;Eva Schiller;Akira A Nair;Ritambhara Singh;Carsten Eickhoff", "authorids": "~Michal_Golovanevsky1;~Eva_Schiller1;~Akira_A_Nair1;~Ritambhara_Singh1;~Carsten_Eickhoff1", "gender": "F;F;;F;M", "homepage": "https://michalg04.github.io/;;http://github.com/akira-nair;https://ritambharasingh.com/;https://health-nlp.org", "dblp": "322/7058;;;180/5813;42/8700", "google_scholar": ";;;V6lRMxoAAAAJ;QQi1_rAAAAAJ", "orcid": "0000-0002-2601-6604;;;0000-0002-7523-160X;0000-0001-9895-4061", "linkedin": ";eva-schiller-9614621b5/;akira-nair/;;", "or_profile": "~Michal_Golovanevsky1;~Eva_Schiller1;~Akira_A_Nair1;~Ritambhara_Singh1;~Carsten_Eickhoff1", "aff": "Brown University;Brown University;Brown University;Brown University;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "brown.edu;brown.edu;brown.edu;brown.edu;uni-tuebingen.de", "position": "PhD student;Undergrad student;Undergrad student;Assistant Professor;Full Professor", "bibtex": "@misc{\ngolovanevsky2024oneversusothers,\ntitle={One-Versus-Others Attention: Scalable Multimodal Integration},\nauthor={Michal Golovanevsky and Eva Schiller and Akira A Nair and Ritambhara Singh and Carsten Eickhoff},\nyear={2024},\nurl={https://openreview.net/forum?id=2nD1SvxTZc}\n}", "github": "", "project": "", "reviewers": "YfMJ;UUiq;sMs5", "site": "https://openreview.net/forum?id=2nD1SvxTZc", "pdf_size": 2325215, "rating": "5;5;5", "confidence": "3;5;4", "soundness": "3;2;2", "contribution": "3;2;2", "presentation": "3;2;3", "wc_summary": "81;113;56", "wc_strengths": "50;73;46", "wc_weaknesses": "138;191;112", "wc_questions": "3;98;164", "wc_review": "272;475;378", "wc_reply_reviewers": "74;247;0", "wc_reply_authors": "302;738;721", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 83.33333333333333, 23.32857094256359 ], "wc_strengths_avg": [ 56.333333333333336, 11.897712198383164 ], "wc_weaknesses_avg": [ 147.0, 32.873494895837695 ], "wc_questions_avg": [ 88.33333333333333, 66.08244009484585 ], "wc_review_avg": [ 375.0, 82.90154803540563 ], "wc_reply_reviewers_avg": [ 107.0, 103.50201286287464 ], "wc_reply_authors_avg": [ 587.0, 201.64490240684654 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10766012326229557819&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Brown University;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Brown;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;Germany" }, { "id": "2nrn8LRpex", "title": "Z-score Normalized SAC Plus Behavioural Cloning for Offline Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Reinforcement learning (RL) defines the task that optimize a policy to maximize the cumulative reward function. Online learning collects data samples by interacting with the environment of task. Instead, Offline RL learns effective policies from a prior demonstrated dataset, which has the potential to transfer the successes between tasks. The main challenge encountered by offline RL is the inaccurate value estimates from out-of-distribution (OOD) actions, and applying vanilla off-policy algorithms to offline setting will cause severe overestimation bias for actions beyond the dataset distribution, because of the disability to correct value estimation errors via observations from the environment. To tackle this problem, the behavior regularization has been adopted in the literature to prevent the selected actions far away from the distribution of dataset so that the learned policy can be optimized within the support set of dataset. One simple method is combining RL with the behavioural cloning (BC) linearly.\nBy making a right balance of the relative weight between RL and BC, the pre-existing off-policy algorithms are able to work efficiently offline at the minimal cost of complexity. Overly large BC term will limit the agent\u2019s potential to explore better policy, and oversize RL term will cause more OOD actions, both of which are undesired.\nSimulated by TD3-BC, this paper aim to make a more efficient offline RL algorithm at the cost of minimal changes and light complexity. We find that the BC term can be added to the policy update of SAC algorithm to get extensively better performance with proper weight adjustment and normalization. The proposed SAC-BC algorithm is evaluated on the D4RL benchmark and proved to converge to much higher levels due to better exploration provided by tuned maximum entropy.", "keywords": "Reinforcement learning; offline; off-policy", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Huihui Zhang", "authorids": "~Huihui_Zhang1", "gender": "M", "homepage": "", "dblp": "https://dblp.uni-trier.de/pid/32/7555", "google_scholar": "", "orcid": "", "linkedin": "https://www.linkedin.com/feed/", "or_profile": "~Huihui_Zhang1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\nzhang2024zscore,\ntitle={Z-score Normalized {SAC} Plus Behavioural Cloning for Offline Reinforcement Learning},\nauthor={Huihui Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=2nrn8LRpex}\n}", "github": "", "project": "", "reviewers": "jNsi;Q6ES;mezX;whc9", "site": "https://openreview.net/forum?id=2nrn8LRpex", "pdf_size": 2535726, "rating": "1;3;3;3", "confidence": "5;4;5;4", "soundness": "2;2;1;3", "contribution": "2;1;1;2", "presentation": "2;2;2;1", "wc_summary": "122;31;67;66", "wc_strengths": "41;22;33;34", "wc_weaknesses": "67;406;70;157", "wc_questions": "106;123;100;184", "wc_review": "336;582;270;441", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 32.56148031032987 ], "wc_strengths_avg": [ 32.5, 6.800735254367722 ], "wc_weaknesses_avg": [ 175.0, 138.17923143511834 ], "wc_questions_avg": [ 128.25, 33.27442711753277 ], "wc_review_avg": [ 407.25, 117.88845363308486 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5-mUbTnQgyEJ:scholar.google.com/&scioq=Z-score+Normalized+SAC+Plus+Behavioural+Cloning+for+Offline+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0 }, { "title": "Light-MILPopt: Solving Large-scale Mixed Integer Linear Programs with Lightweight Optimizer and Small-scale Training Dataset", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19536", "id": "2oWRumm67L", "author_site": "Huigen Ye, Hua Xu, Hongyan Wang", "tldr": "", "abstract": "Machine Learning (ML)-based optimization approaches emerge as a promising technique for solving large-scale Mixed Integer Linear Programs (MILPs). However, existing ML-based frameworks suffer from high model computation complexity, weak problem reduction, and reliance on large-scale optimizers and large training datasets, resulting in performance bottlenecks for large-scale MILPs. This paper proposes Light-MILPopt, a lightweight large-scale optimization framework that only uses a lightweight optimizer and small training dataset to solve large-scale MILPs. Specifically, Light-MILPopt can be divided into four stages: Problem Formulation for problem division to reduce model computational costs, Model-based Initial Solution Prediction for predicting and constructing the initial solution using a small-scale training dataset, Problem Reduction for both variable and constraint reduction, and Data-driven Optimization for current solution improvement employing a lightweight optimizer. Experimental evaluations on four large-scale benchmark MILPs and a real-world case study demonstrate that Light-MILPopt, leveraging a lightweight optimizer and small training dataset, outperforms the state-of-the-art ML-based optimization framework and advanced large-scale solvers (e.g. Gurobi, SCIP). The results and further analyses substantiate the ML-based framework's feasibility and effectiveness in solving large-scale MILPs.", "keywords": "Large-scale MILP;Learning for Optimization;Lightweight Optimization Framework", "primary_area": "optimization", "supplementary_material": "", "author": "Huigen Ye;Hua Xu;Hongyan Wang", "authorids": "~Huigen_Ye1;~Hua_Xu1;~Hongyan_Wang1", "gender": ";M;F", "homepage": ";https://thu-xuhua.github.io/;https://www.researchgate.net/profile/Hongyan-Wang-25", "dblp": ";31/4114-3;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Huigen_Ye1;~Hua_Xu1;~Hongyan_Wang1", "aff": ";Tsinghua University;", "aff_domain": ";tsinghua.edu.cn;", "position": ";Associate Professor;", "bibtex": "@inproceedings{\nye2024lightmilpopt,\ntitle={Light-{MILP}opt: Solving Large-scale Mixed Integer Linear Programs with Lightweight Optimizer and Small-scale Training Dataset},\nauthor={Huigen Ye and Hua Xu and Hongyan Wang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2oWRumm67L}\n}", "github": "", "project": "", "reviewers": "RzQr;6e4z;LzLN;tDoY", "pdf_size": 579916, "rating": "3;5;6;6", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "contribution": "2;2;3;3", "presentation": "2;1;4;3", "wc_summary": "214;55;41;121", "wc_strengths": "83;44;75;40", "wc_weaknesses": "118;46;187;7", "wc_questions": "527;322;3;90", "wc_review": "942;467;306;258", "wc_reply_reviewers": "372;243;30;0", "wc_reply_authors": "4458;3340;1787;1186", "reply_reviewers": "4;2;1;0", "reply_authors": "9;6;5;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 107.75, 68.37899896898169 ], "wc_strengths_avg": [ 60.5, 18.76832437912346 ], "wc_weaknesses_avg": [ 89.5, 68.95106960736722 ], "wc_questions_avg": [ 235.5, 204.74435279147505 ], "wc_review_avg": [ 493.25, 270.4028245044789 ], "wc_reply_reviewers_avg": [ 161.25, 153.56330127996077 ], "wc_reply_authors_avg": [ 2692.75, 1287.0235769013714 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 5.75, 2.165063509461097 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7354538115541094753&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=2oWRumm67L", "pdf": "https://openreview.net/pdf?id=2oWRumm67L", "email": ";tsinghua.edu.cn;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "2ov9RiAkxE", "title": "Identifying and Mitigating Vulnerabilities in LLM-Integrated Applications", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) are increasingly deployed as the service backend for LLM-integrated applications such as code completion and AI-powered search. Compared with the traditional usage of LLMs where users directly send queries to an LLM, LLM-integrated applications serve as middleware to refine users\u2019 queries with domain-specific knowledge to better inform LLMs and enhance the responses. Despite numerous opportunities and benefits, LLM-integrated applications also introduce new attack surfaces. Understanding, minimizing, and eliminating these emerging attack surfaces is a new area of research. In this work, we consider a setup where the user and LLM interact via an LLM-integrated application in the middle. We focus on the communication rounds that begin with user\u2019s queries and end with LLM-integrated application returning responses to the queries, powered by LLMs at the service backend. For this query-response protocol, we identify potential high-risk vulnerabilities that can originate from the malicious application developer or from an outsider threat initiator that is able to control the database access, manipulate and poison data that are high-risk for the user. Successful exploits of the identified vulnerabilities result in the users receiving responses tailored to the intent of a threat initiator (e.g., biased preferences for certain products). We assess such threats against LLM-integrated applications empowered by OpenAI GPT-3.5 and GPT-4. Our empirical results show that the threats can effectively bypass the restrictions and moderation policies of OpenAI, resulting in users receiving responses that contain bias, toxic content, privacy risk, and disinformation. To mitigate those threats, we identify and define four key properties, namely integrity, source identification, attack detectability, and utility preservation, that need to be satisfied by a safe LLM-integrated application. Based on these properties, we develop a lightweight, threat-agnostic defense that mitigates both insider and outsider threats. Our evaluations demonstrate the efficacy of our defense.", "keywords": "large language model;safety of LLM-integrated application;misuse mitigation;bias;privacy;toxicity;disinformation", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/b3d1abed570ec82c0180aefb79e01db1210250a2.pdf", "author": "Fengqing Jiang;Zhangchen Xu;Luyao Niu;Boxin Wang;Jinyuan Jia;Bo Li;Radha Poovendran", "authorids": "~Fengqing_Jiang1;~Zhangchen_Xu1;~Luyao_Niu1;~Boxin_Wang1;~Jinyuan_Jia2;~Bo_Li19;~Radha_Poovendran1", "gender": "M;M;M;;;F;Not Specified", "homepage": "https://fqjiang.work/;https://zhangchenxu.com;;https://wbx.life;https://jinyuan-jia.github.io/;http://boli.cs.illinois.edu/;https://people.ece.uw.edu/radha/index.html", "dblp": "294/4119;350/1187;181/8375;236/6319;24/5124-1.html;50/3402-26;29/5044", "google_scholar": "kTXY8P0AAAAJ;7KnVoNwAAAAJ;nSFafMoAAAAJ;YOf2ATIAAAAJ;iyg4ytkAAAAJ;K8vJkTcAAAAJ;EEoNZ7NbVzMC", "orcid": "0009-0002-9077-2399;0000-0002-6971-412X;0000-0001-8591-5522;;0000-0002-9785-7769;;", "linkedin": "fengqing-jiang-45b7311aa/;zhangchenxu/;;;;;", "or_profile": "~Fengqing_Jiang1;~Zhangchen_Xu1;~Luyao_Niu1;~Boxin_Wang1;~Jinyuan_Jia2;~Bo_Li19;~Radha_Poovendran1", "aff": "Amazon;University of Washington;University of Washington;NVIDIA;Pennsylvania State University;University of Illinois, Urbana Champaign;University of Washington, Seattle", "aff_domain": "amazon.com;uw.edu;uw.edu;nvidia.com;psu.edu;illinois.edu;uw.edu", "position": "Intern;PhD student;Postdoc;Senior Research Scientist;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\njiang2024identifying,\ntitle={Identifying and Mitigating Vulnerabilities in {LLM}-Integrated Applications},\nauthor={Fengqing Jiang and Zhangchen Xu and Luyao Niu and Boxin Wang and Jinyuan Jia and Bo Li and Radha Poovendran},\nyear={2024},\nurl={https://openreview.net/forum?id=2ov9RiAkxE}\n}", "github": "", "project": "", "reviewers": "uAhs;tqMB;o6oy;E69Y", "site": "https://openreview.net/forum?id=2ov9RiAkxE", "pdf_size": 763141, "rating": "3;3;6;6", "confidence": "3;5;3;3", "soundness": "2;2;3;3", "contribution": "2;1;3;3", "presentation": "2;1;3;3", "wc_summary": "42;37;98;71", "wc_strengths": "26;9;266;91", "wc_weaknesses": "285;130;127;71", "wc_questions": "34;5;72;65", "wc_review": "387;181;563;298", "wc_reply_reviewers": "0;78;0;0", "wc_reply_authors": "1503;1256;1245;1765", "reply_reviewers": "0;1;0;0", "reply_authors": "4;4;3;4", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 62.0, 24.50510150968569 ], "wc_strengths_avg": [ 98.0, 101.70791512955125 ], "wc_weaknesses_avg": [ 153.25, 79.61273453411835 ], "wc_questions_avg": [ 44.0, 26.67395733669828 ], "wc_review_avg": [ 357.25, 139.4567585310945 ], "wc_reply_reviewers_avg": [ 19.5, 33.77499074759311 ], "wc_reply_authors_avg": [ 1442.25, 212.98752897763757 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 1867, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3617304031662540087&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1;2;3;4;1", "aff_unique_norm": "Amazon;University of Washington;NVIDIA;Pennsylvania State University;University of Illinois Urbana-Champaign", "aff_unique_dep": "Amazon.com, Inc.;;NVIDIA Corporation;;", "aff_unique_url": "https://www.amazon.com;https://www.washington.edu;https://www.nvidia.com;https://www.psu.edu;https://illinois.edu", "aff_unique_abbr": "Amazon;UW;NVIDIA;PSU;UIUC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Urbana-Champaign;Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2pAdYVCbU9", "title": "Promoting Sparsity in Continuous-time Neural Networks to Learn Dependence Structures", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Continuous-time dynamics models, such as neural ordinary differential equations, enable accurate modeling of underlying dynamics in time-series data. However, the use of neural networks in parameterizing dynamics makes it challenging for humans to identify dependence structures, especially in the presence of delayed effects. In consequence, these models are not an attractive option when capturing dependence carries more importance than accurate predictions, e.g., tsunami forecasting. In this paper, we present a novel method for learning dependence structures in continuous-time dynamics models. Inspired by neural graphical modeling, we promote weight sparsity in the network's first layer during training. Once trained, we prune the sparse weights to identify dependence structures. In evaluation, we first test our method in scenarios where the exact dependence-structures of time-series are known. Our method captures the underlying dependence structure precisely even when there is a delayed effects. We further evaluate our method to a real-world tsunami forecasting, where the exact dependence structures are unknown. Even in this challenging case, our method effective learns physically-consistent dependence structures with a high forecasting accuracy.", "keywords": "Neural graphical modeling;Neural delay differential equations;Tsunami forecasting;Structure discovery", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/e157c6277e80ead6fe3550c988d9c50e0daddd71.zip", "author": "Fan Wu;Woojin Cho;David Korotky;Sanghyun Hong;Donsub Rim;Noseong Park;Kookjin Lee", "authorids": "~Fan_Wu17;~Woojin_Cho1;korotkyd@oregonstate.edu;~Sanghyun_Hong1;~Donsub_Rim1;~Noseong_Park1;~Kookjin_Lee1", "gender": ";M;;M;M;;M", "homepage": ";https://woojin-cho.github.io/;;http://www.sanghyun-hong.com;https://dsrim.github.io;;https://scholar.google.com/citations?hl=en&user=KL89hVQAAAAJ&view_op=list_works", "dblp": ";;;135/8991;239/0132;;122/5103", "google_scholar": ";cqIj5tQAAAAJ;;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-6721-2070;;", "linkedin": "fanwu8/;woojin-cho-02b905264/;;;;;", "or_profile": "~Fan_Wu17;~Woojin_Cho1;korotkyd@oregonstate.edu;~Sanghyun_Hong1;~Donsub_Rim1;~Noseong_Park1;~Kookjin_Lee1", "aff": "Arizona State University;Yonsei University;;Oregon State University;Washington University, Saint Louis;;Arizona State University", "aff_domain": "asu.edu;yonsei.ac.kr;;oregonstate.edu;wustl.edu;;asu.edu", "position": "PhD student;MS student;;Assistant Professor;Assistant Professor;;Assistant Professor", "bibtex": "@misc{\nwu2024promoting,\ntitle={Promoting Sparsity in Continuous-time Neural Networks to Learn Dependence Structures},\nauthor={Fan Wu and Woojin Cho and David Korotky and Sanghyun Hong and Donsub Rim and Noseong Park and Kookjin Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=2pAdYVCbU9}\n}", "github": "", "project": "", "reviewers": "EHcX;eu4v;3bqx;tEwg", "site": "https://openreview.net/forum?id=2pAdYVCbU9", "pdf_size": 1578700, "rating": "3;3;3;5", "confidence": "3;4;4;3", "soundness": "2;2;2;2", "contribution": "1;2;2;2", "presentation": "3;2;2;2", "wc_summary": "31;89;202;35", "wc_strengths": "39;44;52;43", "wc_weaknesses": "132;239;146;109", "wc_questions": "76;174;19;33", "wc_review": "278;546;419;220", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.25, 69.00860453595624 ], "wc_strengths_avg": [ 44.5, 4.716990566028302 ], "wc_weaknesses_avg": [ 156.5, 49.429242357131066 ], "wc_questions_avg": [ 75.5, 60.62384019509157 ], "wc_review_avg": [ 365.75, 126.75641009432225 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nOSuYLPMKAkJ:scholar.google.com/&scioq=Promoting+Sparsity+in+Continuous-time+Neural+Networks+to+Learn+Dependence+Structures&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Arizona State University;Yonsei University;Oregon State University;Washington University in St. Louis", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.asu.edu;https://www.yonsei.ac.kr;https://oregonstate.edu;https://wustl.edu", "aff_unique_abbr": "ASU;Yonsei;OSU;WUSTL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saint Louis", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;South Korea" }, { "id": "2psWOW7JKO", "title": "Learned Visual Features to Textual Explanations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Interpreting the learned features of vision models has posed a longstanding challenge in the field of machine learning. To address this issue, we propose a novel method that leverages the capabilities of large language models (LLMs) to interpret the *learned features* of pre-trained image classifiers.\nOur method, called TExplain, tackles this task by training a neural network to establish a connection between the feature space of image classifiers and LLMs. Then, during inference, our approach generates a vast number of sentences to explain the features learned by the classifier for a given image. These sentences are then used to extract the most frequent words, providing a comprehensive understanding of the learned features and patterns within the classifier.\nOur method, for the first time, utilizes these frequent words corresponding to a visual representation to provide insights into the decision-making process of the independently trained classifier, enabling the detection of spurious correlations, biases, and a deeper comprehension of its behavior. To validate the effectiveness of our approach, we conduct experiments on diverse datasets, including ImageNet-9L and Waterbirds. The results demonstrate the potential of our method to enhance the interpretability and robustness of image classifiers.", "keywords": "explainability;reliability", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "/attachment/2338da666416b52d0e4a4e8e02397a15b1eba398.zip", "author": "Saeid Asgari;Aliasghar Khani;Amir Hosein Khasahmadi;Ali Saheb Pasand;Aditya Sanghi;Karl D.D. Willis;Ali Mahdavi Amiri", "authorids": "~Saeid_Asgari1;~Aliasghar_Khani1;~Amir_Hosein_Khasahmadi1;~Ali_Saheb_Pasand1;~Aditya_Sanghi1;~Karl_D.D._Willis1;~Ali_Mahdavi_Amiri1", "gender": ";M;M;M;M;;", "homepage": "https://asgsaeid.github.io/;http://aliasgharkhani.github.io/;;https://github.com/sanghiad;https://www.sfu.ca/~amahdavi;;", "dblp": "201/4374.html;;238/1089;;33/10499.html;259/1508;82/121", "google_scholar": "SuePM1sAAAAJ;yr7Y5EcAAAAJ;xjTZIisAAAAJ;q0-11e25FxIC;https://scholar.google.ca/citations?user=M9eTADwAAAAJ;cFpYRhkAAAAJ;yMoEQSMAAAAJ", "orcid": ";;;;;;", "linkedin": ";aliasghar-khani-08157b16b/;;;;amir-khas/;", "or_profile": "~Saeid_Asgari1;~Aliasghar_Khani1;~Ali_Saheb_Pasand1;~Aditya_Sanghi1;~Ali_Mahdavi_Amiri1;~Amir_Hosein_Khasahmadi2;~Karl_Willis1", "aff": "Autodesk;Computing Science, Simon Fraser University;McGill University;Autodesk;Simon Fraser University;Toronto University;Autodesk", "aff_domain": "autodesk.com;cs.sfu.ca;cs.mcgill.ca;autodesk.com;sfu.ca;utoronto.ca;autodesk.com", "position": "Research Scientist;MS student;PhD student;Researcher;Assistant Professor;MS student;Senior Research Manager", "bibtex": "@misc{\nasgari2024learned,\ntitle={Learned Visual Features to Textual Explanations},\nauthor={Saeid Asgari and Aliasghar Khani and Amir Hosein Khasahmadi and Ali Saheb Pasand and Aditya Sanghi and Karl D.D. Willis and Ali Mahdavi Amiri},\nyear={2024},\nurl={https://openreview.net/forum?id=2psWOW7JKO}\n}", "github": "", "project": "", "reviewers": "cWhL;zmpz;gvhW;UPwp", "site": "https://openreview.net/forum?id=2psWOW7JKO", "pdf_size": 2080279, "rating": "3;5;5;5", "confidence": "4;3;4;5", "soundness": "1;2;2;3", "contribution": "2;2;2;3", "presentation": "1;3;3;4", "wc_summary": "91;112;61;169", "wc_strengths": "64;92;80;93", "wc_weaknesses": "961;201;259;441", "wc_questions": "149;128;8;114", "wc_review": "1265;533;408;817", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 108.25, 39.480216564755565 ], "wc_strengths_avg": [ 82.25, 11.712706775122479 ], "wc_weaknesses_avg": [ 465.5, 299.46744397346436 ], "wc_questions_avg": [ 99.75, 54.41679428264771 ], "wc_review_avg": [ 755.75, 329.25474559981666 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7463041457304754602&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;1;3;0", "aff_unique_norm": "Autodesk;Simon Fraser University;McGill University;University of Toronto", "aff_unique_dep": ";Computing Science;;", "aff_unique_url": "https://www.autodesk.com;https://www.sfu.ca;https://www.mcgill.ca;https://www.utoronto.ca", "aff_unique_abbr": "Autodesk;SFU;McGill;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;1;0", "aff_country_unique": "United States;Canada" }, { "title": "SparseFormer: Sparse Visual Recognition via Limited Latent Tokens", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19535", "id": "2pvECsmld3", "author_site": "Ziteng Gao, Zhan Tong, Limin Wang, Mike Zheng Shou", "tldr": "", "abstract": "Human visual recognition is a sparse process, where only a few salient visual cues are attended to rather than every detail being traversed uniformly. However, most current vision networks follow a dense paradigm, processing every single visual unit (such as pixels or patches) in a uniform manner. In this paper, we challenge this dense convention and present a new vision transformer, coined SparseFormer, to explicitly imitate human's sparse visual recognition in an end-to-end manner. SparseFormer learns to represent images using a highly limited number of tokens (e.g., down to $9$) in the latent space with sparse feature sampling procedure instead of processing dense units in the original image space. Therefore, SparseFormer circumvents most of dense operations on the image space and has much lower computational costs. Experiments on the ImageNet-1K classification show that SparseFormer delivers performance on par with canonical or well-established models while offering more favorable accuracy-throughput tradeoff. Moreover, the design of our network can be easily extended to the video classification task with promising performance with lower compute. We hope our work can provide an alternative way for visual modeling and inspire further research on sparse vision architectures. Code and weights are available at https://github.com/showlab/sparseformer.", "keywords": "sparse visual recognition;vision transformer;computer vision;representation learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Ziteng Gao;Zhan Tong;Limin Wang;Mike Zheng Shou", "authorids": "~Ziteng_Gao1;~Zhan_Tong1;~Limin_Wang1;~Mike_Zheng_Shou1", "gender": "M;M;M;", "homepage": "https://sebgao.github.io/;https://github.com/yztongzhan;https://wanglimin.github.io;http://www.columbia.edu/~zs2262/", "dblp": "247/1231;236/0753;68/6610-2;284/0807", "google_scholar": "fbSH2CgAAAAJ;6FsgWBMAAAAJ;HEuN8PcAAAAJ;h1-3lSoAAAAJ", "orcid": ";0000-0002-3169-0599;;", "linkedin": ";;;", "or_profile": "~Ziteng_Gao1;~Zhan_Tong1;~Limin_Wang2;~Zheng_Shou1", "aff": "National University of Singapore;Ant Research;Nanjing University;National University of Singapore", "aff_domain": "nus.edu;antgroup.com;nju.edu.cn;nus.edu.sg", "position": "PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ngao2024sparseformer,\ntitle={SparseFormer: Sparse Visual Recognition via Limited Latent Tokens},\nauthor={Ziteng Gao and Zhan Tong and Limin Wang and Mike Zheng Shou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2pvECsmld3}\n}", "github": "", "project": "", "reviewers": "fS4e;H2So;fz8s;SBDj", "pdf_size": 11614788, "rating": "5;6;6;8", "confidence": "4;5;4;3", "soundness": "3;3;3;3", "contribution": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "96;43;28;51", "wc_strengths": "114;38;42;27", "wc_weaknesses": "270;56;109;1", "wc_questions": "9;65;27;1", "wc_review": "489;202;206;80", "wc_reply_reviewers": "193;0;0;0", "wc_reply_authors": "1215;867;302;16", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.5, 25.342651794948374 ], "wc_strengths_avg": [ 55.25, 34.361133566865924 ], "wc_weaknesses_avg": [ 109.0, 100.49129315517837 ], "wc_questions_avg": [ 25.5, 24.672859582950654 ], "wc_review_avg": [ 244.25, 150.1072533224161 ], "wc_reply_reviewers_avg": [ 48.25, 83.57145146519834 ], "wc_reply_authors_avg": [ 600.0, 468.8747167421165 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10689237670896548255&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=2pvECsmld3", "pdf": "https://openreview.net/pdf?id=2pvECsmld3", "email": "nus.edu;antgroup.com;nju.edu.cn;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "National University of Singapore;Ant Research;Nanjing University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.antgroup.com;https://www.nju.edu.cn", "aff_unique_abbr": "NUS;Ant Research;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Singapore;China" }, { "id": "2qLSkTuqrb", "title": "Translating cognitive models into neural and statistical descriptions of real-world multi-agent foraging behavior", "track": "main", "status": "Reject", "tldr": "", "abstract": "Foraging is a multi-agent social behavior that has been studied from many perspectives, including cognitive science, neuroscience, and statistics. We start from a specific type of cognitive description -- agents with internal preferences expressed as value functions -- and implement it as a biologically plausible neural network. We also present an equivalent statistical model where statistical predictors correspond to components of the value function. We use the neural network to simulate foraging agents in various environmental conditions and use the statistical model to discover which features in the environment best predict the agent's behavior. Our intended primary application is the study of multi-species groups of birds foraging in real-world environments. To test the viability of the statistical approach, we simulate bird agents with different preferences, and use Bayesian inference to recover what each type of agent values. In the multi-agent context, we investigate how communication of information about reward location affects group foraging behavior. We also test our modeling technique on a previously published locust foraging dataset (Gunzel et al., 2023). After evaluating the effectiveness of our method on both synthetic and previously published data, we analyze new multi-agent foraging bird data we captured through high-resolution video recordings. Our method distinguishes between proximity preferences of ducks and sparrows within foraging groups. This analysis framework provides a principled, interpretable, and parametric approach for reasoning about how birds' preferences relate to their decisions about where to move in a complex multi-agent environment.", "keywords": "multi-agent systems;animal behavior;reinforcement learning;probabilistic methods;decision making", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Marjorie Xie;Rafal Urbaniak;Emily L Mackevicius", "authorids": "~Marjorie_Xie1;~Rafal_Urbaniak1;~Emily_L_Mackevicius1", "gender": "F;M;", "homepage": ";https://independent.academia.edu/Rafa%C5%82Urbaniak2;", "dblp": ";46/6491;", "google_scholar": ";LOtWV_0AAAAJ;G5eTd40AAAAJ", "orcid": ";0000-0002-6321-2866;", "linkedin": "marjoriexie/;;", "or_profile": "~Marjorie_Xie1;~Rafal_Urbaniak1;~Emily_L_Mackevicius1", "aff": "Arizona State University;University of Gdansk;Columbia University", "aff_domain": "asu.edu;univ.gda.pl;columbia.edu", "position": "Postdoc;Associate Professor;Postdoc", "bibtex": "@misc{\nxie2024translating,\ntitle={Translating cognitive models into neural and statistical descriptions of real-world multi-agent foraging behavior},\nauthor={Marjorie Xie and Rafal Urbaniak and Emily L Mackevicius},\nyear={2024},\nurl={https://openreview.net/forum?id=2qLSkTuqrb}\n}", "github": "", "project": "", "reviewers": "g6r7;3d6u;vuLZ;Vrpj", "site": "https://openreview.net/forum?id=2qLSkTuqrb", "pdf_size": 4732147, "rating": "3;5;5;6", "confidence": "3;2;2;5", "soundness": "2;3;2;3", "contribution": "2;3;2;3", "presentation": "2;1;1;4", "wc_summary": "27;125;75;87", "wc_strengths": "44;47;59;223", "wc_weaknesses": "313;167;288;412", "wc_questions": "155;425;114;89", "wc_review": "539;764;536;811", "wc_reply_reviewers": "512;85;401;149", "wc_reply_authors": "780;230;388;329", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 1.224744871391589 ], "wc_summary_avg": [ 78.5, 34.99642838919423 ], "wc_strengths_avg": [ 93.25, 75.12115214771403 ], "wc_weaknesses_avg": [ 295.0, 87.24391096231301 ], "wc_questions_avg": [ 195.75, 134.4384152688509 ], "wc_review_avg": [ 662.5, 126.10412364391578 ], "wc_reply_reviewers_avg": [ 286.75, 175.69060162683718 ], "wc_reply_authors_avg": [ 431.75, 208.83770612607293 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3746343246326776, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7oABOmIY7DkJ:scholar.google.com/&scioq=Translating+cognitive+models+into+neural+and+statistical+descriptions+of+real-world+multi-agent+foraging+behavior&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Arizona State University;University of Gdansk;Columbia University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.asu.edu;https://www.ug.edu.pl;https://www.columbia.edu", "aff_unique_abbr": "ASU;UoG;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Poland" }, { "id": "2rqC5FZiAH", "title": "LOTUS: Evasive and Resilient Backdoor Attacks through Sub-Partitioning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Backdoor attack poses a significant security threat to Deep Learning applications. Existing attacks are often not resilient to established backdoor detection and mitigation approaches. This susceptibility primarily stems from the fact that these attacks typically possess an unbounded or under-bounded attack scope. In other words, the trigger can cause misclassification for any input. This unbounded nature implies that the backdoored model overly emphasizes on spurious features of the trigger (e.g., only the color of a square patch), on which trigger inversion techniques can effortlessly generate effective triggers. In addition, the unbounded attack effects can be easily mitigated by backdoor removal methods.\nIn this paper, we propose a novel backdoor attack LOTUS that is evasive and resilient by restricting the attack scope. Specifically, it leverages a secret function to separate samples in the victim class into a set of partitions and applies unique triggers to different partitions. Furthermore, LOTUS incorporates an effective trigger focusing mechanism, ensuring only the trigger corresponding to the partition can induce the backdoor behavior.\nExtensive experimental results show that LOTUS can achieve high attack success rate across 4 datasets and 7 model structures, and effectively evading 13 backdoor detection and mitigation techniques.", "keywords": "Backdoor attack", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Siyuan Cheng;Guanhong Tao;Yingqi Liu;Guangyu Shen;Shengwei An;Shiwei Feng;Xiangzhe Xu;Kaiyuan Zhang;Shiqing Ma;Xiangyu Zhang", "authorids": "~Siyuan_Cheng1;~Guanhong_Tao1;~Yingqi_Liu1;~Guangyu_Shen1;~Shengwei_An1;~Shiwei_Feng1;~Xiangzhe_Xu1;~Kaiyuan_Zhang1;~Shiqing_Ma2;~Xiangyu_Zhang3", "gender": "M;;M;M;;M;;M;;M", "homepage": "https://www.cs.purdue.edu/homes/cheng535/;;https://www.cs.purdue.edu/homes/liu1751/;;https://www.cs.purdue.edu/homes/an93/;https://www.cs.purdue.edu/homes/feng292/;https://sites.google.com/view/alex-xu/;https://kaiyuanzhang.com/;https://people.cs.umass.edu/~shiqingma/;https://www.cs.purdue.edu/homes/xyzhang", "dblp": "263/7049;;92/10048;216/6403;168/9413;138/9141-2;276/3462;147/6644-2;172/8745;", "google_scholar": "GcL9AFMAAAAJ;;gOPVK2UAAAAJ;YiMTVwgAAAAJ;qcmmzeEAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;X_mDnjkAAAAJ;PXbu1wIAAAAJ", "orcid": ";;;;;0000-0001-6959-4327;;0000-0001-6023-363X;0000-0003-1551-8948;", "linkedin": "sycheng98/;;;;;swfeng98/;;kaiyuan-zhang/;shiqing-ma-6590b086;", "or_profile": "~Siyuan_Cheng1;~Guanhong_Tao1;~Yingqi_Liu1;~Guangyu_Shen1;~Shengwei_An1;~Shiwei_Feng1;~Xiangzhe_Xu1;~Kaiyuan_Zhang1;~Shiqing_Ma2;~Xiangyu_Zhang3", "aff": "Sony AI;;Microsoft;Purdue University;Purdue University;Purdue University;Purdue University;Purdue University;University of Massachusetts at Amherst;Purdue University", "aff_domain": "sony.com;;microsoft.com;purdue.edu;purdue.edu;cs.purdue.edu;purdue.edu;cs.purdue.edu;umass.edu;cs.purdue.edu", "position": "Intern;;Researcher;PhD student;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\ncheng2024lotus,\ntitle={{LOTUS}: Evasive and Resilient Backdoor Attacks through Sub-Partitioning},\nauthor={Siyuan Cheng and Guanhong Tao and Yingqi Liu and Guangyu Shen and Shengwei An and Shiwei Feng and Xiangzhe Xu and Kaiyuan Zhang and Shiqing Ma and Xiangyu Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=2rqC5FZiAH}\n}", "github": "", "project": "", "reviewers": "Mwh5;rKsD;1k7k", "site": "https://openreview.net/forum?id=2rqC5FZiAH", "pdf_size": 3226786, "rating": "3;6;6", "confidence": "4;3;3", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "1;3;3", "wc_summary": "73;129;81", "wc_strengths": "57;247;57", "wc_weaknesses": "405;133;23", "wc_questions": "6;63;204", "wc_review": "541;572;365", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 94.33333333333333, 24.729649321321876 ], "wc_strengths_avg": [ 120.33333333333333, 89.56685895029602 ], "wc_weaknesses_avg": [ 187.0, 160.55736254269584 ], "wc_questions_avg": [ 91.0, 83.22259308625273 ], "wc_review_avg": [ 492.6666666666667, 91.15676363032836 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17091094092122633244&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;2;2;2;2;3;2", "aff_unique_norm": "Sony;Microsoft;Purdue University;University of Massachusetts Amherst", "aff_unique_dep": "Sony AI;Microsoft Corporation;;", "aff_unique_url": "https://www.sony.com;https://www.microsoft.com;https://www.purdue.edu;https://www.umass.edu", "aff_unique_abbr": "Sony AI;Microsoft;Purdue;UMass Amherst", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1", "aff_country_unique": "Japan;United States" }, { "id": "2sCcTMWPc2", "title": "TimelyGPT: Recurrent Convolutional Transformer for Long Time-series Representation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Pre-trained models (PTMs) have gained prominence in Natural Language Processing and Computer Vision domains. When it comes to time-series PTMs, their development has been limited. Previous research on time-series transformers has mainly been devoted to small-scale tasks, yet these models have not consistently outperformed traditional models. Additionally, the performance of these transformers on large-scale data remains unexplored. These findings raise doubts about Transformer's capabilities to scale up and capture temporal dependencies. In this study, we re-examine time-series transformers and identify the shortcomings of prior studies. Drawing from these insights, we then introduce a pioneering architecture called Timely Generative Pre-trained Transformer (TimelyGPT). This architecture integrates recurrent attention and temporal convolution modules to effectively capture global-local temporal dependencies in long sequences. The relative position embedding with time decay can effectively deal with trend and periodic patterns from time-series. Our experiments show that TimelyGPT excels in modeling continuously monitored biosignal as well as irregularly-sampled time-series data commonly observed in longitudinal electronic health records. This breakthrough suggests a priority shift in time-series deep learning research, moving from small-scale modeling from scratch to large-scale pre-training.", "keywords": "GPT;Recurrent Neural Network;Position Embedding;Time-series representation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/4fb3df9a8f2fcc826d97acf84bded029dccf72ab.zip", "author": "Ziyang Song;Qincheng Lu;Hao Xu;Yue Li", "authorids": "~Ziyang_Song4;~Qincheng_Lu1;~Hao_Xu17;~Yue_Li15", "gender": "M;;F;M", "homepage": ";https://github.com/wzzlcss;;https://www.cs.mcgill.ca/~yueli/", "dblp": ";;;", "google_scholar": "fptMer8AAAAJ;;;yJgWSl0AAAAJ", "orcid": ";;;0000-0003-3844-4865", "linkedin": ";;hao-xu-41ba83187;yuelicb/", "or_profile": "~Ziyang_Song4;~Qincheng_Lu1;~Hao_Xu17;~Yue_Li15", "aff": "McGill University;McGill University;McGill University, McGill University;McGill University", "aff_domain": "mail.mcgill.ca;mcgill.ca;mail.mcgill.ca;cs.mcgill.ca", "position": "PhD student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@misc{\nsong2024timelygpt,\ntitle={Timely{GPT}: Recurrent Convolutional Transformer for Long Time-series Representation},\nauthor={Ziyang Song and Qincheng Lu and Hao Xu and Yue Li},\nyear={2024},\nurl={https://openreview.net/forum?id=2sCcTMWPc2}\n}", "github": "", "project": "", "reviewers": "JDYd;ZDVD;5JkM;BKAB", "site": "https://openreview.net/forum?id=2sCcTMWPc2", "pdf_size": 1894723, "rating": "5;5;6;6", "confidence": "4;4;5;3", "soundness": "3;2;3;3", "contribution": "3;2;2;2", "presentation": "2;1;3;3", "wc_summary": "116;55;68;41", "wc_strengths": "117;35;71;77", "wc_weaknesses": "134;390;110;148", "wc_questions": "65;4;40;5", "wc_review": "432;484;289;271", "wc_reply_reviewers": "29;412;67;0", "wc_reply_authors": "1309;2947;1335;767", "reply_reviewers": "1;1;1;0", "reply_authors": "4;6;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 70.0, 28.222331583340168 ], "wc_strengths_avg": [ 75.0, 29.086079144497972 ], "wc_weaknesses_avg": [ 195.5, 113.11388066899659 ], "wc_questions_avg": [ 28.5, 25.578311124857326 ], "wc_review_avg": [ 369.0, 91.1015916436151 ], "wc_reply_reviewers_avg": [ 127.0, 166.2513157842668 ], "wc_reply_authors_avg": [ 1589.5, 815.8987375894144 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VfnyZlrcICUJ:scholar.google.com/&scioq=TimelyGPT:+Recurrent+Convolutional+Transformer+for+Long+Time-series+Representation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "McGill University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcgill.ca", "aff_unique_abbr": "McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Enabling Lanuguage Models to Implicitly Learn Self-Improvement", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19534", "id": "2tVHNRZuCs", "author_site": "Ziqi Wang, Le Hou, Tianjian Lu, Yuexin Wu, Yunxuan Li, Hongkun Yu, Heng Ji", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated remarkable capabilities in open-ended text generation tasks. However, the inherent open-ended nature of these tasks implies that there is always room for improvement in the quality of model responses. To address this challenge, various approaches have been proposed to enhance the performance of LLMs. There has been a growing focus on enabling LLMs to self-improve their response quality, thereby reducing the reliance on extensive human annotation efforts for collecting diverse and high-quality training data. Recently, prompting-based methods have been widely explored among self-improvement methods owing to their effectiveness, efficiency, and convenience. However, those methods usually require explicitly and thoroughly written rubrics as inputs to LLMs. It is expensive and challenging to manually derive and provide all necessary rubrics with a real-world complex goal for improvement (e.g., being more helpfulness and less harmful). To this end, we propose an imPlicit self-ImprovemenT (PIT) framework that implicitly learns the improvement goal from human preference data. PIT only requires preference data that are used to train reward models with no extra human efforts. Specifically, we reformulate the training objective of reinforcement learning from human feedback (RLHF) -- instead of maximizing response quality for a given input, we maximize the quality gap of the response conditioned on a reference response. In this way, PIT is implicitly trained with the improvement goal of better aligning with human preferences. Experiments on two real-world datasets and one synthetic dataset show that our method significantly outperforms prompting-based methods.", "keywords": "large language models; self-improvement; alignment", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Ziqi Wang;Le Hou;Tianjian Lu;Yuexin Wu;Yunxuan Li;Hongkun Yu;Heng Ji", "authorids": "~Ziqi_Wang2;~Le_Hou1;~Tianjian_Lu1;~Yuexin_Wu1;~Yunxuan_Li2;~Hongkun_Yu2;~Heng_Ji3", "gender": ";M;M;M;M;M;F", "homepage": "https://www.wzq016.github.io;http://vision.cs.stonybrook.edu/~lehhou/home/index.html;;https://crickwu.github.io;;;http://blender.cs.illinois.edu/hengji.html", "dblp": "38/8097-3;161/9892;;09/1661;;;", "google_scholar": "xYRZiZkAAAAJ;kQ0HeQIAAAAJ;eWEj9g0AAAAJ;sd0nprMAAAAJ;Nun8Dy0AAAAJ;;z7GCqT4AAAAJ", "orcid": ";0000-0001-7323-5300;;;;;", "linkedin": ";;;;;;", "or_profile": "~Ziqi_Wang2;~Le_Hou1;~Tianjian_Lu1;~Yuexin_Wu1;~Yunxuan_Li2;~Hongkun_Yu2;~Heng_Ji3", "aff": "Meta Facebook;Google Research;Google;Google;Google;;University of Illinois, Urbana-Champaign", "aff_domain": "meta.com;google.com;google.com;google.com;google.com;;uiuc.edu", "position": "Intern;Software Engineer;Engineer;Software Engineer;Researcher;;Full Professor", "bibtex": "@inproceedings{\nwang2024enabling,\ntitle={Enabling Lanuguage Models to Implicitly Learn Self-Improvement},\nauthor={Ziqi Wang and Le Hou and Tianjian Lu and Yuexin Wu and Yunxuan Li and Hongkun Yu and Heng Ji},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=2tVHNRZuCs}\n}", "github": "", "project": "", "reviewers": "LeAH;tfvt;c4bA;xn4T", "pdf_size": 599775, "rating": "6;6;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;4", "contribution": "3;3;3;3", "presentation": "2;4;3;3", "wc_summary": "51;154;41;69", "wc_strengths": "105;133;54;92", "wc_weaknesses": "95;177;55;45", "wc_questions": "1;245;1;82", "wc_review": "252;709;151;288", "wc_reply_reviewers": "20;0;0;0", "wc_reply_authors": "496;1549;318;724", "reply_reviewers": "1;0;0;0", "reply_authors": "3;5;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.75, 44.589096200752934 ], "wc_strengths_avg": [ 96.0, 28.416544476765644 ], "wc_weaknesses_avg": [ 93.0, 51.980765673468106 ], "wc_questions_avg": [ 82.25, 99.6126874449234 ], "wc_review_avg": [ 350.0, 213.266265499258 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 771.75, 471.25490713625464 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11160867810813975791&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=2tVHNRZuCs", "pdf": "https://openreview.net/pdf?id=2tVHNRZuCs", "email": "meta.com;google.com;google.com;google.com;google.com;;uiuc.edu", "author_num": 7, "aff_unique_index": "0;1;1;1;1;2", "aff_unique_norm": "Meta;Google;University of Illinois", "aff_unique_dep": "Meta Platforms, Inc.;Google Research;", "aff_unique_url": "https://meta.com;https://research.google;https://illinois.edu", "aff_unique_abbr": "Meta;Google Research;UIUC", "aff_campus_unique_index": "1;1;1;1;2", "aff_campus_unique": ";Mountain View;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2uHTuvDkLZ", "title": "Physics-aware Causal Graph Network for Spatiotemporal Modeling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Interpretable physics equations are widely recognized as valuable inductive biases for constructing robust spatiotemporal models. To harness these valuable pieces of knowledge, existing approaches often presuppose access to the exact underlying equations. However, such an assumption usually doesn't hold, especially in the context of real-world observations. Conversely, causality systematically captures the fundamental causal relations across space and time that are intrinsically present in physics dynamics. Nevertheless, causality is often ignored as a means of integrating prior physics knowledge. In this work, we propose a novel approach that effectively captures and leverages causality to integrate physics equations into spatiotemporal models, without assuming access to precise physics principles. \nSpecifically, we introduce a physics-aware spatiotemporal causal graph network (P-stCGN). Causal relationships are analytically derived from prior physics knowledge and serve as physics-aware causality labels. A causal module is introduced to learn causal weights from spatially close and temporally past observations to current observations via semi-supervised learning. Given the learned causal structure, a forecasting module is introduced to perform predictions guided by the cause-effect relations. Extensive experiments on time series data show that our semi-supervised causal learning approach is robust with noisy and limited data. Furthermore, our evaluations on real-world graph signals demonstrate superior forecasting performance, achieved by utilizing prior physics knowledge from a causal perspective.", "keywords": "physics-informed deep learning; causal learning; spatiotemporal learning", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "/attachment/5873a933b20dc180f679318946973106e5a5d0fa.pdf", "author": "Sungyong Seo;Zijun Cui;Sam Griesemer;Joshua Hikida;Yan Liu", "authorids": "~Sungyong_Seo1;~Zijun_Cui1;~Sam_Griesemer1;joshua.hikida@gmail.com;~Yan_Liu1", "gender": "M;;M;;F", "homepage": "https://sungyongs.github.io/;https://zijunjkl.github.io/;https://samgriesemer.com;;http://www-bcf.usc.edu/~liu32/", "dblp": "178/3209;266/4675;317/5153;;150/4295", "google_scholar": "spYH0tEAAAAJ;https://scholar.google.com/citations?hl=en;fBZ3_FsAAAAJ;;UUKLPMYAAAAJ", "orcid": "0009-0001-0285-5868;0000-0002-4362-197X;0009-0009-0192-9405;;0000-0002-7055-9518", "linkedin": ";;;;", "or_profile": "~Sungyong_Seo1;~Zijun_Cui1;~Sam_Griesemer1;joshua.hikida@gmail.com;~Yan_Liu1", "aff": "Google;University of Southern California;University of Southern California;;University of Southern California", "aff_domain": "google.com;usc.edu;usc.edu;;usc.edu", "position": "Researcher;Postdoc;PhD student;;Professor", "bibtex": "@misc{\nseo2024physicsaware,\ntitle={Physics-aware Causal Graph Network for Spatiotemporal Modeling},\nauthor={Sungyong Seo and Zijun Cui and Sam Griesemer and Joshua Hikida and Yan Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=2uHTuvDkLZ}\n}", "github": "", "project": "", "reviewers": "cRbd;pHbu;EyEy;B5jF", "site": "https://openreview.net/forum?id=2uHTuvDkLZ", "pdf_size": 1130397, "rating": "3;3;5;6", "confidence": "4;5;4;4", "soundness": "3;2;3;3", "contribution": "3;2;2;3", "presentation": "2;3;3;2", "wc_summary": "76;43;141;82", "wc_strengths": "42;33;33;23", "wc_weaknesses": "171;382;134;61", "wc_questions": "69;8;2;3", "wc_review": "358;466;310;169", "wc_reply_reviewers": "0;18;0;0", "wc_reply_authors": "197;208;157;212", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.5, 35.316426772820606 ], "wc_strengths_avg": [ 32.75, 6.722164829874376 ], "wc_weaknesses_avg": [ 187.0, 119.3377559701874 ], "wc_questions_avg": [ 20.5, 28.09359357576029 ], "wc_review_avg": [ 325.75, 106.68733523713112 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 193.5, 21.777281740382566 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11034681177519116032&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Google;University of Southern California", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.usc.edu", "aff_unique_abbr": "Google;USC", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "Mountain View;Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "2uwvigLUr8", "title": "From Deterministic to Probabilistic World: Balancing Enhanced Doubly Robust Learning for Debiased Recommendation", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recommender systems, selection bias arises from the users' selective interactions with items, which poses a widely-recognized challenge for unbiased evaluation and learning for recommendation models. Recently, doubly robust and its variants have been widely studied to achieve debiased learning of prediction models, which enables unbiasedness when either imputed errors or learned propensities are accurate. However, we find that previous studies achieve unbiasedness using the doubly robust learning approaches are all based on deterministic error imputation model and deterministic propensity model, and these approaches fail to be unbiased when using probabilistic models to impute errors and learn propensities. To tackle this problem, in this paper, we first derive the bias of doubly robust learning methods and provide alternative unbiasedness conditions for probabilistic models. Then we propose a novel balancing enhanced doubly robust joint learning approach, which improves the accuracy of the imputed errors and leads to unbiased learning under probabilistic error imputations and learned propensities. We further derive the generalization error bound when using the probabilistic models, and show that it can be effectively controlled by the proposed learning approach. We conduct extensive experiments on three real-world datasets, including a large-scale industrial dataset, to demonstrate the effectiveness of the proposed method.", "keywords": "Recommender system;Selection bias;Doubly robust;Probabilistic model", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/25d5730a6cbaad6e33116f077c573080bb783609.zip", "author": "Haoxuan Li;Chunyuan Zheng;Yanghao Xiao;Min Zhang;Xu Chen;Xiao-Hua Zhou", "authorids": "~Haoxuan_Li6;~Chunyuan_Zheng1;~Yanghao_Xiao1;~Min_Zhang17;~Xu_Chen13;~Xiao-Hua_Zhou1", "gender": "M;M;;;M;", "homepage": "https://haoxuanli-pku.github.io/;;;;https://gsai.ruc.edu.cn/chenxu;", "dblp": "145/4965-1.html;;322/6462;;83/6331-17;", "google_scholar": "gtDqiucAAAAJ;https://scholar.google.com/citations?hl=en;hzfFzKUAAAAJ;;loPoqy0AAAAJ;YJNYC40AAAAJ", "orcid": "0000-0003-3620-3769;0000-0002-0306-7310;0000-0001-9929-4448;;0000-0003-0144-1775;", "linkedin": ";;;;;", "or_profile": "~Haoxuan_Li6;~Chunyuan_Zheng1;~Yanghao_Xiao1;~Min_Zhang17;~Xu_Chen13;~Xiao-Hua_Zhou1", "aff": "Peking University;Peking University;University of Chinese Academy of Sciences;;Renmin University of China;", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;ucas.ac.cn;;ruc.edu.cn;", "position": "PhD student;PhD student;PhD student;;Associate Professor;", "bibtex": "@misc{\nli2024from,\ntitle={From Deterministic to Probabilistic World: Balancing Enhanced Doubly Robust Learning for Debiased Recommendation},\nauthor={Haoxuan Li and Chunyuan Zheng and Yanghao Xiao and Min Zhang and Xu Chen and Xiao-Hua Zhou},\nyear={2024},\nurl={https://openreview.net/forum?id=2uwvigLUr8}\n}", "github": "", "project": "", "reviewers": "oqbo;XsAT;gTTa", "site": "https://openreview.net/forum?id=2uwvigLUr8", "pdf_size": 397216, "rating": "3;6;8", "confidence": "4;2;4", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "2;3;3", "wc_summary": "62;112;98", "wc_strengths": "50;12;38", "wc_weaknesses": "283;1;182", "wc_questions": "2;1;3", "wc_review": "397;126;321", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 90.66666666666667, 21.06075866524175 ], "wc_strengths_avg": [ 33.333333333333336, 15.860503004493758 ], "wc_weaknesses_avg": [ 155.33333333333334, 116.65999980951293 ], "wc_questions_avg": [ 2.0, 0.816496580927726 ], "wc_review_avg": [ 281.3333333333333, 114.13539717760169 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.11470786693528094, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qAn4xqe8nxsJ:scholar.google.com/&scioq=From+Deterministic+to+Probabilistic+World:+Balancing+Enhanced+Doubly+Robust+Learning+for+Debiased+Recommendation&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Peking University;University of Chinese Academy of Sciences;Renmin University of China", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;http://www.ucas.ac.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "Peking U;UCAS;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "2vARyD50w1", "title": "A Two-Branch Neural Network Architecture for Model Protection within Trusted Execution Environments", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deep Neural Networks (DNNs) become increasingly prevalent in mobile applications on edge devices. As the model architecture and weights represent valuable intellectual property for model providers, it's necessary to protect them during inference. Previous works attempted to secure on-device machine learning by leveraging Trusted Execution Environments (TEEs). However, the constrained memory within TEEs prevents the direct model placement, and significant latency overhead is raised when partitioning the model and executing by a sequence in TEE. In our research, we propose a novel framework to restructure conventional CNN models into a unique two-branch architecture that is compatible with TEE deployments. Specifically, the framework generates a model that consists of a branch placed in a normal execution environment and a lightweight counterpart within the TEE. By facilitating unidirectional communication between the two branches, the confidentiality of the model can be protected. To figure out the best architecture for the newly generated network, we introduce a progressive pruning method to gradually identify and remove the redundant channel for the two branches at the same time while maintaining a high inference accuracy for the benign user. Our comprehensive experiments, involving a variety of DNNs and datasets, attest to the effectiveness of our framework. It offers robust security assurances while ensuring efficient computational latency.", "keywords": "Trusted Execution Environment;Security", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Ziyu Liu;Yukui Luo;Xiaolin Xu", "authorids": "~Ziyu_Liu4;~Yukui_Luo1;~Xiaolin_Xu3", "gender": ";M;", "homepage": "https://orcid.org/0000-0003-1844-1114;;", "dblp": ";221/0729;", "google_scholar": ";sOWfQKEAAAAJ;", "orcid": ";0000-0002-5852-4195;", "linkedin": ";;", "or_profile": "~Ziyu_Liu4;~Yukui_Luo1;~Xiaolin_Xu3", "aff": "State University of New York at Binghamton;United States;", "aff_domain": "binghamton.edu;umassd.edu;", "position": "PhD student;Assistant Professor;", "bibtex": "@misc{\nliu2024a,\ntitle={A Two-Branch Neural Network Architecture for Model Protection within Trusted Execution Environments},\nauthor={Ziyu Liu and Yukui Luo and Xiaolin Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=2vARyD50w1}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=2vARyD50w1", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ys9klcchjwUJ:scholar.google.com/&scioq=A+Two-Branch+Neural+Network+Architecture+for+Model+Protection+within+Trusted+Execution+Environments&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "State University of New York at Binghamton;United States", "aff_unique_dep": ";", "aff_unique_url": "https://www.binghamton.edu;https://www.usa.gov", "aff_unique_abbr": "SUNY Binghamton;US", "aff_campus_unique_index": "0", "aff_campus_unique": "Binghamton;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "2vAhX71UCL", "title": "Dreamix: Video Diffusion Models are General Video Editors", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Text-driven image and video diffusion models have recently achieved unprecedented generation realism. While diffusion models have been successfully applied for image editing, none can edit motion in video. We present the first diffusion-based method that is able to perform text-based motion and appearance editing of general, real-world videos. Our approach uses a video diffusion model to combine, at inference time, the low-resolution spatio-temporal information from the original video with new, high resolution information that it synthesized to align with the guiding text prompt. As maintaining high-fidelity to the original video requires retaining some of its high-resolution information, we add a preliminary stage of finetuning the model on the original video, significantly boosting fidelity. We propose to improve motion editability by using a mixed objective that jointly finetunes with full temporal attention and with temporal attention masking. We extend our method for animating images, bringing them to life by adding motion to existing or new objects, and camera movements. Extensive experiments showcase our method's remarkable ability to edit motion in videos.", "keywords": "Video Diffusion;Video Motion Editing", "primary_area": "generative models", "supplementary_material": "/attachment/42a86714507cbe50b9d004fa457d752f84311cdf.zip", "author": "Eyal Molad;Eliahu Horwitz;Dani Valevski;Alex Rav-Acha;Yossi Matias;Yael Pritch;Yaniv Leviathan;Yedid Hoshen", "authorids": "~Eyal_Molad1;~Eliahu_Horwitz1;~Dani_Valevski1;~Alex_Rav-Acha1;~Yossi_Matias2;~Yael_Pritch1;~Yaniv_Leviathan1;~Yedid_Hoshen3", "gender": "M;M;M;M;M;F;;M", "homepage": "https://www.facebook.com/eyal.molad.5;https://horwitz.ai;;https://www.linkedin.com/in/alexravacha/?originalSubdomain=il;https://research.google/people/YossiMatias/;https://research.google/people/106214/;https://yanivle.github.io/;https://www.cs.huji.ac.il/~ydidh/", "dblp": "22/1540;268/8318;331/5332;;m/YossiMatias;15/1134;331/5369;136/0280", "google_scholar": ";NyLx5nIAAAAJ;ECKZ08wAAAAJ;NIRv_L8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;O9fbn38AAAAJ;https://scholar.google.co.il/citations?user=6y1-qS4AAAAJ", "orcid": ";;;;;;0009-0000-4080-4845;", "linkedin": ";eliahu-horwitz/;dani-valevski-a3b5936/?originalSubdomain=il;alexravacha/?originalSubdomain=il;yossimatias/;;yaniv-leviathan/;", "or_profile": "~Eyal_Molad1;~Eliahu_Horwitz1;~Dani_Valevski1;~Alex_Rav-Acha1;~Yossi_Matias2;~Yael_Pritch1;~Yaniv_Leviathan1;~Yedid_Hoshen3", "aff": "Google;Hebrew University of Jerusalem;Google;Google;Tel Aviv University;Google Research;Google;Google", "aff_domain": "google.com;huji.ac.il;google.com;google.com;tau.ac.il;google.com;google.com;google.com", "position": "Researcher;PhD student;Researcher;Researcher;Faculty;Researcher;Researcher;Researcher", "bibtex": "@misc{\nmolad2024dreamix,\ntitle={Dreamix: Video Diffusion Models are General Video Editors},\nauthor={Eyal Molad and Eliahu Horwitz and Dani Valevski and Alex Rav-Acha and Yossi Matias and Yael Pritch and Yaniv Leviathan and Yedid Hoshen},\nyear={2024},\nurl={https://openreview.net/forum?id=2vAhX71UCL}\n}", "github": "", "project": "", "reviewers": "UUNx;cmbb;E6Fh;JbPW", "site": "https://openreview.net/forum?id=2vAhX71UCL", "pdf_size": 24795060, "rating": "3;5;5;8", "confidence": "5;3;4;5", "soundness": "2;3;3;3", "contribution": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "41;94;51;59", "wc_strengths": "25;41;36;50", "wc_weaknesses": "306;135;152;91", "wc_questions": "2;3;4;129", "wc_review": "374;273;243;329", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "321;293;223;187", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 61.25, 19.954636052807377 ], "wc_strengths_avg": [ 38.0, 9.027735042633894 ], "wc_weaknesses_avg": [ 171.0, 81.0586207630009 ], "wc_questions_avg": [ 34.5, 54.56418239101544 ], "wc_review_avg": [ 304.75, 50.509281325316834 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 256.0, 53.48831648126533 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.1266600992762247, "gs_citation": 189, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4680453094018320926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;2;0;0;0", "aff_unique_norm": "Google;Hebrew University of Jerusalem;Tel Aviv University", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.huji.ac.il;https://www.tau.ac.il", "aff_unique_abbr": "Google;HUJI;TAU", "aff_campus_unique_index": "0;1;0;0;0;0;0", "aff_campus_unique": "Mountain View;Jerusalem;", "aff_country_unique_index": "0;1;0;0;1;0;0;0", "aff_country_unique": "United States;Israel" }, { "id": "2wFXD2upSQ", "title": "A Demon at Work: Leveraging Neuron Death for Efficient Neural Network Pruning", "track": "main", "status": "Reject", "tldr": "", "abstract": "When training deep neural networks, the phenomenon of \"dying neurons\" \u2014units that become inactive and output zero throughout training\u2014has traditionally been viewed as undesirable, linked with optimization challenges, and contributing to plasticity loss, particularly in continual learning scenarios. In this paper, we reassess this phenomenon through the lens of network sparsity and pruning. By systematically exploring the influence of various hyperparameter configurations on the occurrence of dying neurons, we unveil their potential to facilitate simple yet effective structured pruning algorithms. We introduce \"Demon's Pruning\" (DemP), a method that controls the proliferation of dead neurons, dynamically sparsifying neural networks as training progresses. Remarkably, our approach, characterized by its simplicity and broad applicability, outperforms existing structured pruning techniques, while achieving results comparable to prevalent unstructured pruning methods. These findings pave the way for leveraging dying neurons as a valuable resource for efficient model compression and optimization.", "keywords": "Pruning;Sparsity;Deep Learning;Regularization;Model Compression", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/607e518fa6d13a801a426ee1054563b8b7ecd961.pdf", "author": "Simon Dufort-Labb\u00e9;Pierluca D'Oro;Evgenii Nikishin;Razvan Pascanu;Pierre-Luc Bacon;Aristide Baratin", "authorids": "~Simon_Dufort-Labb\u00e91;~Pierluca_D'Oro1;~Evgenii_Nikishin1;~Razvan_Pascanu1;~Pierre-Luc_Bacon1;~Aristide_Baratin1", "gender": "M;M;M;M;;", "homepage": "https://github.com/SimonDufLab/;https://proceduralia.github.io;http://evgenii-nikishin.github.io/;https://razp.info;;", "dblp": "314/6338.html;248/8326;294/4770;65/8368.html;;", "google_scholar": ";https://scholar.google.it/citations?user=AuVp7pkAAAAJ;ez9FSEAAAAAJ;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;;", "orcid": ";;;;;", "linkedin": "simon-dufort-labb%C3%A9-8593421b5/;;;;;", "or_profile": "~Simon_Dufort-Labb\u00e91;~Pierluca_D'Oro1;~Evgenii_Nikishin1;~Razvan_Pascanu1;~Pierre-Luc_Bacon1;~Aristide_Baratin1", "aff": "Universit\u00e9 de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al;University of Montreal;Google DeepMind;;", "aff_domain": "umontreal.ca;umontreal.ca;umontreal.ca;google.com;;", "position": "PhD student;PhD student;PhD student;Research Scientist;;", "bibtex": "@misc{\ndufort-labb{\\'e}2024a,\ntitle={A Demon at Work: Leveraging Neuron Death for Efficient Neural Network Pruning},\nauthor={Simon Dufort-Labb{\\'e} and Pierluca D'Oro and Evgenii Nikishin and Razvan Pascanu and Pierre-Luc Bacon and Aristide Baratin},\nyear={2024},\nurl={https://openreview.net/forum?id=2wFXD2upSQ}\n}", "github": "", "project": "", "reviewers": "tiWA;i9Pr;k7gu;YsTs", "site": "https://openreview.net/forum?id=2wFXD2upSQ", "pdf_size": 1159785, "rating": "5;5;6;6", "confidence": "2;4;3;3", "soundness": "3;4;2;3", "contribution": "2;2;2;2", "presentation": "2;3;2;3", "wc_summary": "44;56;84;63", "wc_strengths": "45;34;48;116", "wc_weaknesses": "288;76;196;44", "wc_questions": "3;132;86;600", "wc_review": "380;298;414;823", "wc_reply_reviewers": "107;0;0;166", "wc_reply_authors": "509;663;601;896", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 61.75, 14.53229162933362 ], "wc_strengths_avg": [ 60.75, 32.32162588732194 ], "wc_weaknesses_avg": [ 151.0, 97.29850975220535 ], "wc_questions_avg": [ 205.25, 232.55039776358157 ], "wc_review_avg": [ 478.75, 203.17649347303936 ], "wc_reply_reviewers_avg": [ 68.25, 71.36657130617948 ], "wc_reply_authors_avg": [ 667.25, 142.9831720867879 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lqILG4iqw1oJ:scholar.google.com/&scioq=A+Demon+at+Work:+Leveraging+Neuron+Death+for+Efficient+Neural+Network+Pruning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;University of Montreal;Google", "aff_unique_dep": ";;Google DeepMind", "aff_unique_url": "https://www.umontreal.ca;https://wwwumontreal.ca;https://deepmind.com", "aff_unique_abbr": "UdeM;UM;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Canada;United Kingdom" }, { "id": "2wwPG1wpsu", "title": "LST-Bench:A Benchmark for long sequence time-series forecasting Task", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper introduces LST-Bench, a comprehensive benchmark designed for evaluating long sequence time-series forecasting(LSTF) models. This benchmark has been developed in response to recent advancements in deep learning methods in the field of LSTF tasks. LST-Bench includes Transformer-based, MLP-based, CNN-based, and RNN-based models, evaluating the performance of 11 major forecasting models on a set of commonly used 7 datasets and 7 new datasets that we have introduced. We conduct a thorough analysis of the experimental results, including the overall prediction performance of models and their generalization across different prediction lengths and datasets. Notably, we found that regardless of the model architecture, the phenomenon referred to as \"Degeneracy\" occurs when the model's predictions consistently maintain a low Mean Squared Error value but are characterized by repetitive and simplistic pattern generation, thus losing the meaningfulness of the predictions. Also, the model's optimal performance is very close to its performance after training for just one epoch. These two phenomenons emphasize the need for further investigation. Our LST-Bench will serve as a valuable resource for advancing research in the field of time series forecasting.", "keywords": "Time Series;Deep Learning;Neural Networks;Data Mining", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "lanhao li;Haoyi Zhou;Bodan Chen;Siyang Xiao;Jianxin Li", "authorids": "~lanhao_li1;~Haoyi_Zhou1;~Bodan_Chen1;~Siyang_Xiao1;~Jianxin_Li3", "gender": ";M;M;;M", "homepage": "https://scholar.google.com/citations?user=WpeTvLgAAAAJ&hl=zh-CN&oi=ao;https://www.zhouhaoyi.com/;https://github.com/guolalala;;http://myjianxin.github.io", "dblp": ";162/1287;;;l/JianxinLi-2.html", "google_scholar": "WpeTvLgAAAAJ;mbrFlN0AAAAJ;;;EY2lqD0AAAAJ", "orcid": ";0000-0002-2393-3634;;;0000-0001-5152-0055", "linkedin": ";haoyi-zhou-54a7a69a/;;;", "or_profile": "~lanhao_li1;~Haoyi_Zhou1;~Bodan_Chen1;~Siyang_Xiao1;~Jianxin_Li3", "aff": "Beihang University;Beihang University;Beihang University;;Beihang University ", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;;buaa.edu.cn", "position": "PhD student;Assistant Professor;MS student;;Full Professor", "bibtex": "@misc{\nli2024lstbencha,\ntitle={{LST}-Bench:A Benchmark for long sequence time-series forecasting Task},\nauthor={lanhao li and Haoyi Zhou and Bodan Chen and Siyang Xiao and Jianxin Li},\nyear={2024},\nurl={https://openreview.net/forum?id=2wwPG1wpsu}\n}", "github": "", "project": "", "reviewers": "1Wsy;inoa;PPjW;t1kK", "site": "https://openreview.net/forum?id=2wwPG1wpsu", "pdf_size": 1829584, "rating": "1;3;3;3", "confidence": "5;4;4;4", "soundness": "1;2;2;2", "contribution": "1;1;2;2", "presentation": "2;1;3;2", "wc_summary": "149;89;112;88", "wc_strengths": "27;57;46;50", "wc_weaknesses": "74;202;159;47", "wc_questions": "1;26;20;2", "wc_review": "251;374;337;187", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 109.5, 24.743686063317245 ], "wc_strengths_avg": [ 45.0, 11.113055385446435 ], "wc_weaknesses_avg": [ 120.5, 62.62786919575023 ], "wc_questions_avg": [ 12.25, 10.96300597464035 ], "wc_review_avg": [ 287.25, 73.08342835417616 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0o-2uYpzHrcJ:scholar.google.com/&scioq=LST-Bench:A+Benchmark+for+long+sequence+time-series+forecasting+Task&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "2xYO9oxh0y", "title": "DiffSDS: A geometric sequence diffusion model for protein backbone inpainting", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Can a pure transformer learn protein structure under geometric constraints? Recent research has simplified protein structures as sequences of folding angles, making transformers suitable for unconstrained protein backbone generation. Unfortunately, such simplification is unsuitable for the constrained protein inpainting problem: we reveal theoretically that applying geometric constraints to the angle space would result in gradient vanishing or exploding, called \\textbf{GradCurse}. As a remedy, we suggest adding a hidden \\textbf{a}tomic \\textbf{d}irection \\textbf{s}pace (\\textbf{ADS}) layer upon the transformer encoder, converting invariant backbone angles into equivariant direction vectors. Geometric constraints could be efficiently imposed on the direction space while avoiding GradCurse. Meanwhile, a Direct2Seq decoder with mathematical guarantees is also introduced to reconstruct the folding angles. We apply the \\textbf{dual-space} model as the denoising neural network during the conditional diffusion process, resulting in a constrained generative model--\\textbf{DiffSDS}. Extensive experiments show that the proposed DiffSDS outperforms the sequence diffusion baseline, and even achieves competitive results with coordinate diffusion models, filling the gap between sequence and coordinate diffusion models.", "keywords": "Conditional sequence diffusion", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "/attachment/a228d5f7fa06f769004034b3ad444e586cb60cd4.pdf", "author": "Zhangyang Gao;Cheng Tan;Lirong Wu;Yufei Huang;Bozhen Hu;Stan Z. Li", "authorids": "~Zhangyang_Gao1;~Cheng_Tan1;~Lirong_Wu1;~Yufei_Huang4;~Bozhen_Hu1;~Stan_Z._Li2", "gender": "M;M;;M;M;M", "homepage": ";https://chengtan9907.github.io/;;https://2021.igem.org/Team:ZJU-China;;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "275/3266;70/1533-12.html;15/10330;68/1946-2;279/8665;l/StanZLi", "google_scholar": "4SclT-QAAAAJ;6kTV6aMAAAAJ;Tk7TrCoAAAAJ;qmTjdwIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-1026-6083;;;0009-0007-8184-4529;0000-0002-3428-0114;", "linkedin": ";;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Zhangyang_Gao1;~Cheng_Tan1;~Lirong_Wu1;~Yufei_Huang4;~Bozhen_Hu1;~Stan_Z._Li1", "aff": "Westlake University, China;Zhejiang University & Westlake University;Westlake University;Zhejiang University;Westlake University;Westlake University", "aff_domain": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;zju.edu.cn;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Chair Professor", "bibtex": "@misc{\ngao2024diffsds,\ntitle={Diff{SDS}: A geometric sequence diffusion model for protein backbone inpainting},\nauthor={Zhangyang Gao and Cheng Tan and Lirong Wu and Yufei Huang and Bozhen Hu and Stan Z. Li},\nyear={2024},\nurl={https://openreview.net/forum?id=2xYO9oxh0y}\n}", "github": "", "project": "", "reviewers": "qjdQ;9CLX;YgsM", "site": "https://openreview.net/forum?id=2xYO9oxh0y", "pdf_size": 1497240, "rating": "3;3;5", "confidence": "5;3;4", "soundness": "2;3;3", "contribution": "1;2;2", "presentation": "2;3;3", "wc_summary": "38;95;117", "wc_strengths": "49;54;91", "wc_weaknesses": "222;231;156", "wc_questions": "43;86;128", "wc_review": "352;466;492", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 83.33333333333333, 33.289971796657056 ], "wc_strengths_avg": [ 64.66666666666667, 18.732028424302822 ], "wc_weaknesses_avg": [ 203.0, 33.436506994600975 ], "wc_questions_avg": [ 85.66666666666667, 34.70190516703978 ], "wc_review_avg": [ 436.6666666666667, 60.80204674917522 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:B3EO7Rd_TzcJ:scholar.google.com/&scioq=DiffSDS:+A+geometric+sequence+diffusion+model+for+protein+backbone+inpainting&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;0;0", "aff_unique_norm": "Westlake University;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.westlake.edu.cn;http://www.zju.edu.cn", "aff_unique_abbr": "WU;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "2y8XnaIiB8", "title": "Vision-Language Dataset Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Dataset distillation methods offer the promise of reducing a large-scale dataset down to a significantly smaller set of (potentially synthetic) training examples, which preserve sufficient information for training a new model from scratch. So far dataset distillation methods have been developed for image classification. However, with the rise in capabilities of vision-language models, and especially given the scale of datasets necessary to train these models, the time is ripe to expand dataset distillation methods beyond image classification. In this work, we take the first steps towards this goal by expanding on the idea of trajectory matching to create a distillation method for vision-language datasets. The key challenge is that vision-language datasets do not have a set of discrete classes. To overcome this, our proposed vision-and-language dataset distillation method jointly distill the images and their corresponding language descriptions in a contrastive formulation. Since there are no existing baselines, we compare our approach to three coreset selection methods (strategic subsampling of the training dataset), which we adapt to the vision-language setting. We demonstrate significant improvements on the challenging Flickr30K and COCO retrieval benchmarks: for example, on Flickr30K the best coreset selection method which selects 1000 image-text pairs for training is able to achieve only 5.6% image-to-text retrieval accuracy (i.e., recall@1); in contrast, our dataset distillation approach almost doubles that to 9.9% with just 100 (an order of magnitude fewer) training pairs.", "keywords": "dataset distillation;dataset condensation;multimodal machine learning;vision-language", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/f2a2ff64d9271fb236fe1b9ce83a3dfb91a5799e.zip", "author": "Xindi Wu;Byron Zhang;Zhiwei Deng;Olga Russakovsky", "authorids": "~Xindi_Wu1;~Byron_Zhang1;~Zhiwei_Deng3;~Olga_Russakovsky1", "gender": "F;M;M;F", "homepage": "https://xindiwu.github.io/;;http://www.zhiweideng.com;http://cs.princeton.edu/~olgarus", "dblp": "235/0784;;160/3578;52/6883", "google_scholar": "hvnUnrUAAAAJ;;tWBPUHwAAAAJ;TB5OwW8AAAAJ", "orcid": ";;;0000-0001-5272-3241", "linkedin": ";byron-zhang/;;", "or_profile": "~Xindi_Wu1;~Byron_Zhang1;~Zhiwei_Deng3;~Olga_Russakovsky1", "aff": "Princeton University;;Google Deepmind;Princeton University", "aff_domain": "cs.princeton.edu;;google.com;princeton.edu", "position": "PhD student;;Research Scientist;Associate Professor", "bibtex": "@misc{\nwu2024visionlanguage,\ntitle={Vision-Language Dataset Distillation},\nauthor={Xindi Wu and Byron Zhang and Zhiwei Deng and Olga Russakovsky},\nyear={2024},\nurl={https://openreview.net/forum?id=2y8XnaIiB8}\n}", "github": "", "project": "", "reviewers": "Evkk;MV4s;tXui;i1Lj", "site": "https://openreview.net/forum?id=2y8XnaIiB8", "pdf_size": 30037120, "rating": "5;5;6;6", "confidence": "5;5;2;4", "soundness": "2;3;4;3", "contribution": "3;3;2;3", "presentation": "3;3;4;4", "wc_summary": "42;40;89;135", "wc_strengths": "20;21;48;120", "wc_weaknesses": "165;56;50;207", "wc_questions": "2;203;46;1", "wc_review": "229;320;233;463", "wc_reply_reviewers": "61;0;0;0", "wc_reply_authors": "1579;1073;336;484", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 76.5, 39.05444917035702 ], "wc_strengths_avg": [ 52.25, 40.69628361410904 ], "wc_weaknesses_avg": [ 119.5, 68.17074152449862 ], "wc_questions_avg": [ 63.0, 82.84624312544292 ], "wc_review_avg": [ 311.25, 94.85877661028525 ], "wc_reply_reviewers_avg": [ 15.25, 26.413774815425377 ], "wc_reply_authors_avg": [ 868.0, 494.47598526116514 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8164965809277259, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1032649700553466713&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Princeton University;DeepMind", "aff_unique_dep": ";DeepMind", "aff_unique_url": "https://www.princeton.edu;https://deepmind.com", "aff_unique_abbr": "Princeton;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "2yBuTFvXRh", "title": "AMPNet: Attention as Message Passing for Graph Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have emerged as a powerful representation learning framework for graph-structured data. A key limitation of conventional GNNs is their representation of each node with a singular feature vector, potentially overlooking intricate details about individual node features. Here, we propose an Attention-based Message-Passing layer for GNNs (AMPNet) that encodes individual features per node and models feature-level interactions through cross-node attention during message-passing steps. We demonstrate the abilities of AMPNet through extensive benchmarking on real-world biological systems such as fMRI brain activity recordings and spatial genomic data, improving over existing baselines by 20% on fMRI signal reconstruction, and further improving another 8% with positional embedding added. Finally, we validate the ability of AMPNet to uncover meaningful feature-level interactions through case studies on biological systems. We anticipate that our architecture will be highly applicable to graph-structured data where node entities encompass rich feature-level information.", "keywords": "Graph Neural Networks;Attention;Message Passing", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/46313d1906028a6dcce6a833a5d1dda8c857e325.pdf", "author": "Syed A Rizvi;Nhi Nguyen;Haoran Lyu;Benjamin W Christensen;Josue Ortega Caro;Emanuele Zappala;Maryam Bagherian;Antonio Henrique de Oliveira Fonseca;Christopher Lee Averill;Chadi Abdallah;Zhitao Ying;Maria Brbic;Rahul Madhav Dhodapkar;David van Dijk", "authorids": "~Syed_A_Rizvi1;~Nhi_Nguyen1;~Haoran_Lyu1;~Benjamin_W_Christensen1;~Josue_Ortega_Caro1;~Emanuele_Zappala1;~Maryam_Bagherian1;~Antonio_Henrique_de_Oliveira_Fonseca1;christopher.averill@bcm.edu;~Chadi_Abdallah1;~Zhitao_Ying1;~Maria_Brbic1;~Rahul_Madhav_Dhodapkar1;~David_van_Dijk1", "gender": "M;F;;M;;M;F;M;;M;M;F;M;M", "homepage": "https://syedarizvi.com/;https://www.linkedin.com/in/nhi-nguyen-a427401a9/;;;;https://eazappala.com/;;https://ahof1704.github.io/;;;https://www.cs.yale.edu/homes/ying-rex;https://brbiclab.epfl.ch/;;http://www.vandijklab.org", "dblp": ";;;;;;;;;;209/4936;130/3233;;136/9930", "google_scholar": "2rhnnZ4AAAAJ;;;;;J4OOzEwAAAAJ;;;;HJsH1PEAAAAJ;6fqNXooAAAAJ;ltxmeroAAAAJ;ivfFMbEAAAAJ;fjjZr6UAAAAJ", "orcid": "0000-0002-7932-9524;;;;;;;;;0000-0001-5783-6181;;0000-0002-1120-1778;0000-0002-2014-7515;", "linkedin": "syed-a-rizvi-01/;nhi-nguyen-a427401a9/;;benjwc/;;;maryam-bagherian-93a358ba/;;;;rex-ying-92770148/;;;", "or_profile": "~Syed_A_Rizvi1;~Nhi_Nguyen1;~Haoran_Lyu1;~Benjamin_W_Christensen1;~Josue_Ortega_Caro1;~Emanuele_Zappala1;~Maryam_Bagherian1;~Antonio_Henrique_de_Oliveira_Fonseca1;christopher.averill@bcm.edu;~Chadi_Abdallah1;~Zhitao_Ying1;~Maria_Brbic1;~Rahul_Madhav_Dhodapkar1;~David_van_Dijk1", "aff": "Yale University;New York University;;ETHZ - ETH Zurich;;Idaho State University;Yale University;Yale University;;Baylor College of Medicine;Yale University;EPFL - EPF Lausanne;;Yale University", "aff_domain": "yale.edu;nyu.edu;;ethz.ch;;isu.edu;yale.edu;yale.edu;;bcm.edu;yale.edu;epfl.ch;;yale.edu", "position": "PhD student;PhD student;;MS student;;Assistant Professor;Postdoc;PhD student;;Associate Professor;Assistant Professor;Assistant Professor;;Assistant Professor", "bibtex": "@misc{\nrizvi2024ampnet,\ntitle={{AMPN}et: Attention as Message Passing for Graph Neural Networks},\nauthor={Syed A Rizvi and Nhi Nguyen and Haoran Lyu and Benjamin W Christensen and Josue Ortega Caro and Emanuele Zappala and Maryam Bagherian and Antonio Henrique de Oliveira Fonseca and Christopher Lee Averill and Chadi Abdallah and Zhitao Ying and Maria Brbic and Rahul Madhav Dhodapkar and David van Dijk},\nyear={2024},\nurl={https://openreview.net/forum?id=2yBuTFvXRh}\n}", "github": "", "project": "", "reviewers": "Kcsy;vyBv;QxkE;Jr5A", "site": "https://openreview.net/forum?id=2yBuTFvXRh", "pdf_size": 3026514, "rating": "3;3;3;5", "confidence": "5;4;4;4", "soundness": "2;2;2;2", "contribution": "1;1;2;3", "presentation": "2;3;2;3", "wc_summary": "51;102;106;45", "wc_strengths": "35;25;102;57", "wc_weaknesses": "93;8;138;215", "wc_questions": "73;27;69;3", "wc_review": "252;162;415;320", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.0, 28.115831839019098 ], "wc_strengths_avg": [ 54.75, 29.634228520412 ], "wc_weaknesses_avg": [ 113.5, 74.92162571647788 ], "wc_questions_avg": [ 43.0, 29.29163703175362 ], "wc_review_avg": [ 287.25, 92.6320004102254 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7069628564186932802&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0;0;4;0;5;0", "aff_unique_norm": "Yale University;New York University;ETH Zurich;Idaho State University;Baylor College of Medicine;EPFL", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.yale.edu;https://www.nyu.edu;https://www.ethz.ch;https://www.isu.edu;https://www.bcm.edu;https://www.epfl.ch", "aff_unique_abbr": "Yale;NYU;ETHZ;ISU;BCM;EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;1;0;0;0;0;0;1;0", "aff_country_unique": "United States;Switzerland" }, { "id": "2zoi9YI21Y", "title": "Towards a Self-Made Model: Zero-Shot Self-Supervised Purification for Adversarial Attacks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adversarial purification is an adversarial defense method without robustness training for the classifier and regardless of the form of attacks, aiming to remove the adversarial perturbations on the attacked images. Such methods can defend against various unseen threats without modifying the classifier in contrast to empirical defenses. However, previous purification methods require careful training of a strong generative model or incorporating additional knowledge when training a classifier to be comparable to adversarial training. Retraining promising generative models or classifiers on large-scale datasets (e.g., ImageNet) is extremely challenging and computation-consuming. In this work, following the natural image manifold hypothesis, we propose a zero-shot self-supervised method for adversarial purification named \\textit{ZeroPur}: For an adversarial example that lies beyond the natural image manifold, its corrupted embedding vector is first restored so that it is moved close to the natural image manifold. The embedding is then fine-tuned on finer intermediate-level discrepancies to project it back within the manifold. The whole purification process is done from coarse to fine, which does not rely on any generative model and does not require retraining the classifier to incorporate additional knowledge. Extensive experiments on three datasets including CIFAR-10, CIFAR-100, and ImageNet with various classifier architectures including ResNet and WideResNet, demonstrate that our method achieves state-of-the-art robust performance. Code released.", "keywords": "adversarial attacks;adversarial defense;adversarial purification", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Xiuli Bi;Zonglin Yang;Bo Liu;Bin Xiao;Weisheng Li;Chi-Man Pun;Pietro Lio", "authorids": "~Xiuli_Bi1;~Zonglin_Yang2;~Bo_Liu23;~Bin_Xiao4;~Weisheng_Li2;~Chi-Man_Pun1;~Pietro_Lio1", "gender": "F;M;M;M;M;;M", "homepage": ";https://github.com/ZhivkoYang;;;http://cs.cqupt.edu.cn;https://cmpun.github.io/;https://www.cst.cam.ac.uk/people/pl219", "dblp": "92/860.html;;;43/5134-2;;p/ChiManPun;l/PietroLio.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-TW;;fXlz210AAAAJ;https://scholar.google.com/citations?hl=zh-TW;;JTkP_EAAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ", "orcid": "0000-0003-3134-217X;;0000-0002-3164-6299;0000-0001-8469-5302;;0000-0003-1788-3746;0000-0002-0540-5053", "linkedin": ";;;;;;", "or_profile": "~Xiuli_Bi1;~Zonglin_Yang2;~Bo_Liu23;~Bin_Xiao4;~Weisheng_Li2;~Chi-Man_Pun1;~Pietro_Lio1", "aff": "Chongqing University of Post and Telecommunications;Chongqing University of Post and Telecommunications;Chongqing University of Post and Telecommunications;Chongqing University of Posts and Tel.;Chongqing Post and Communications University;University of Macau;University of Cambridge", "aff_domain": "cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn;edu.cn;cqupt.edu.cn;um.edu.mo;cam.ac.uk", "position": "Full Professor;MS student;Associate Professor;Full Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nbi2024towards,\ntitle={Towards a Self-Made Model: Zero-Shot Self-Supervised Purification for Adversarial Attacks},\nauthor={Xiuli Bi and Zonglin Yang and Bo Liu and Bin Xiao and Weisheng Li and Chi-Man Pun and Pietro Lio},\nyear={2024},\nurl={https://openreview.net/forum?id=2zoi9YI21Y}\n}", "github": "", "project": "", "reviewers": "tryU;1obD;GavC", "site": "https://openreview.net/forum?id=2zoi9YI21Y", "pdf_size": 3359310, "rating": "3;3;5", "confidence": "4;2;4", "soundness": "2;1;2", "contribution": "2;2;2", "presentation": "3;1;2", "wc_summary": "84;75;108", "wc_strengths": "44;26;76", "wc_weaknesses": "208;443;392", "wc_questions": "27;16;250", "wc_review": "363;560;826", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 89.0, 13.92838827718412 ], "wc_strengths_avg": [ 48.666666666666664, 20.677416559027762 ], "wc_weaknesses_avg": [ 347.6666666666667, 100.93011883916955 ], "wc_questions_avg": [ 97.66666666666667, 107.80950277637349 ], "wc_review_avg": [ 583.0, 189.71733359571198 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5000000000000001, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dPw7dxhVcs8J:scholar.google.com/&scioq=Towards+a+Self-Made+Model:+Zero-Shot+Self-Supervised+Purification+for+Adversarial+Attacks&hl=en&as_sdt=0,34", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;3;4", "aff_unique_norm": "Chongqing University of Post and Telecommunications;Chongqing University of Posts and Telecommunications;Chongqing Post and Communications University;University of Macau;University of Cambridge", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.cqupt.edu.cn;http://www.cqupt.edu.cn/;http://www.cqupt.edu.cn;https://www.um.edu.mo;https://www.cam.ac.uk", "aff_unique_abbr": "CQUPT;CQUPT;CQUPT;UM;Cambridge", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Macau SAR;Cambridge", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "30L0rr9W8A", "title": "LatentCBF: A Control Barrier Function in Latent Space for Safe Control", "track": "main", "status": "Reject", "tldr": "", "abstract": "Safe control is crucial for safety-critical autonomous systems that are deployed in dynamic and uncertain environments. Quadratic-programming-control-barrier-function (QP-CBF) is becoming a popular tool for safe controller synthesis. Traditional QP-CBF relies on explicit knowledge of the system dynamics and access to all states, which are not always available in practice. We propose LatentCBF (LCBF), a control barrier function defined in the latent space, which only needs an agent's observations, not full states. The transformation from observations to latent space is established by a Lipschitz network-based AutoEncoder. In addition, the system dynamics and control barrier functions are all learned in the latent space. We demonstrate the efficiency, safety, and robustness of LCBFs in simulation for quadrotors and cars.", "keywords": "Representation Learning;Reinforcement Learning;Optimal Control;End-to-End Learning;Convex Optimization;Control Barrier Function;Autonomous Driving;CARLA;Robotics", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Somnath Sendhil Kumar;Qin Lin;John Dolan", "authorids": "~Somnath_Sendhil_Kumar1;~Qin_Lin1;~John_Dolan1", "gender": "M;M;M", "homepage": "https://hex-plex.github.io;;https://www.ri.cmu.edu/ri-faculty/john-m-dolan/", "dblp": ";;52/532.html", "google_scholar": ";https://scholar.google.nl/citations?user=-HX_b_0AAAAJ;xLk_w7kAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Somnath_Sendhil_Kumar1;~Qin_Lin1;~John_Dolan1", "aff": "Microsoft Research;Cleveland State University;School of Computer Science, Carnegie Mellon University", "aff_domain": "research.microsoft.com;csuohio.edu;cs.cmu.edu", "position": "Researcher;Assistant Professor;Full Professor", "bibtex": "@misc{\nkumar2024latentcbf,\ntitle={Latent{CBF}: A Control Barrier Function in Latent Space for Safe Control},\nauthor={Somnath Sendhil Kumar and Qin Lin and John Dolan},\nyear={2024},\nurl={https://openreview.net/forum?id=30L0rr9W8A}\n}", "github": "", "project": "", "reviewers": "ciJn;5PWg;ERPL;pAha", "site": "https://openreview.net/forum?id=30L0rr9W8A", "pdf_size": 2311052, "rating": "3;3;5;5", "confidence": "4;4;4;4", "soundness": "2;2;2;3", "contribution": "2;2;2;2", "presentation": "3;1;3;4", "wc_summary": "97;110;37;193", "wc_strengths": "29;62;23;62", "wc_weaknesses": "76;277;166;217", "wc_questions": "245;174;74;153", "wc_review": "447;623;300;625", "wc_reply_reviewers": "52;250;139;266", "wc_reply_authors": "863;1159;593;1176", "reply_reviewers": "1;2;2;2", "reply_authors": "2;3;2;2", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 109.25, 55.64339583454626 ], "wc_strengths_avg": [ 44.0, 18.12456896039186 ], "wc_weaknesses_avg": [ 184.0, 73.69871097922947 ], "wc_questions_avg": [ 161.5, 60.94464701678073 ], "wc_review_avg": [ 498.75, 135.60673840189506 ], "wc_reply_reviewers_avg": [ 176.75, 87.06140074682925 ], "wc_reply_authors_avg": [ 947.75, 239.6636966668085 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2612126896855845512&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "Microsoft;Cleveland State University;Carnegie Mellon University", "aff_unique_dep": "Microsoft Research;;School of Computer Science", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.csuohio.edu;https://www.cmu.edu", "aff_unique_abbr": "MSR;CSU;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Separating common from salient patterns with Contrastive Representation Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19533", "id": "30N3bNAiw3", "author_site": "Robin Louiset, Edouard Duchesnay, Grigis Antoine, Pietro Gori", "tldr": "", "abstract": "Contrastive Analysis is a sub-field of Representation Learning that aims at separating 1) salient factors of variation - that only exist in the target dataset (i.e., diseased subjects) in contrast with 2) common factors of variation between target and background (i.e., healthy subjects) datasets. Despite their relevance, current models based on Variational Auto-Encoders have shown poor performance in learning semantically-expressive representations. On the other hand, Contrastive Representation Learning has shown tremendous performance leaps in various applications (classification, clustering, etc.). In this work, we propose to leverage the ability of Contrastive Learning to learn semantically expressive representations when performing Contrastive Analysis. Namely, we reformulate Contrastive Analysis under the lens of the InfoMax Principle and identify two Mutual Information terms to maximize and one to minimize. We decompose the two first terms into an Alignment and a Uniformity term, as commonly done in Contrastive Learning. Then, we motivate a novel Mutual Information minimization strategy to prevent information leakage between common and salient distributions. We validate our method on datasets designed to assess the pattern separation capability in Contrastive Analysis, including MNIST superimposed on CIFAR10, CelebA accessories, dSprites item superimposed on a digit grid, and three medical datasets.", "keywords": "Contrastive Learning;Mutual Information;Contrastive Analysis;Disentanglement", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Robin Louiset;Edouard Duchesnay;Antoine Grigis;Pietro Gori", "authorids": "~Robin_Louiset1;~Edouard_Duchesnay1;~Antoine_Grigis1;~Pietro_Gori1", "gender": "M;M;;", "homepage": ";https://duchesnay.github.io/;https://perso.telecom-paristech.fr/pgori/index.html;", "dblp": ";;134/9724;", "google_scholar": "yKGCUNQAAAAJ;https://scholar.google.fr/citations?user=mG6V3q4AAAAJ;https://scholar.google.fr/citations?user=id9wCjsAAAAJ;https://scholar.google.fr/citations?user=nDapWF8AAAAJ", "orcid": ";0000-0002-4073-3490;;", "linkedin": ";edouard-duchesnay-27b47b8;;", "or_profile": "~Robin_Louiset1;~Edouard_Duchesnay1;~Pietro_Gori1;~Grigis_Antoine1", "aff": "CEA;CEA;Telecom Paris;CEA", "aff_domain": "cea.fr;cea.fr;telecom-paris.fr;cea.fr", "position": "PhD student;Full Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nlouiset2024separating,\ntitle={Separating common from salient patterns with Contrastive Representation Learning},\nauthor={Robin Louiset and Edouard Duchesnay and Antoine Grigis and Pietro Gori},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=30N3bNAiw3}\n}", "github": "", "project": "", "reviewers": "F95G;4LjD;etAa;fALd;Btev", "pdf_size": 4134629, "rating": "5;8;8;8;8", "confidence": "2;3;4;3;3", "soundness": "3;3;3;4;3", "contribution": "2;3;2;4;3", "presentation": "2;3;3;3;3", "wc_summary": "219;87;55;110;94", "wc_strengths": "135;23;147;163;98", "wc_weaknesses": "175;40;560;115;138", "wc_questions": "103;14;28;60;38", "wc_review": "632;164;790;448;368", "wc_reply_reviewers": "0;0;107;0;0", "wc_reply_authors": "1255;233;2590;903;1549", "reply_reviewers": "0;0;1;0;0", "reply_authors": "7;2;9;4;4", "rating_avg": [ 7.4, 1.2 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "contribution_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 113.0, 55.939252765835185 ], "wc_strengths_avg": [ 113.2, 49.929550368494205 ], "wc_weaknesses_avg": [ 205.6, 182.61281444630328 ], "wc_questions_avg": [ 48.6, 31.058654188486656 ], "wc_review_avg": [ 480.4, 215.69756605024546 ], "wc_reply_reviewers_avg": [ 21.4, 42.8 ], "wc_reply_authors_avg": [ 1306.0, 777.6945415778614 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 5.2, 2.4819347291981715 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7905694150420948, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8882038361941535553&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "openreview": "https://openreview.net/forum?id=30N3bNAiw3", "pdf": "https://openreview.net/pdf?id=30N3bNAiw3", "email": "cea.fr;cea.fr;telecom-paris.fr;cea.fr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives;Telecom Paris", "aff_unique_dep": ";", "aff_unique_url": "https://www cea fr;https://www.telecom-paris.fr", "aff_unique_abbr": "CEA;Telecom Paris", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Matrix Manifold Neural Networks++", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19532", "id": "30aSE3FB3L", "author_site": "Xuan Son Nguyen, Yang, Aymeric Histace", "tldr": "", "abstract": "Deep neural networks (DNNs) on Riemannian manifolds have garnered increasing interest in various applied areas. For instance, DNNs on spherical and hyperbolic manifolds have been designed to solve a wide range of computer vision and nature language processing tasks. One of the key factors that contribute to the success of these networks is that spherical and hyperbolic manifolds have the rich algebraic structures of gyrogroups and gyrovector spaces. This enables principled and effective generalizations of the most successful DNNs to these manifolds. Recently, some works have shown that many concepts in the theory of gyrogroups and gyrovector spaces can also be generalized to matrix manifolds such as Symmetric Positive Definite (SPD) and Grassmann manifolds. As a result, some building blocks for SPD and Grassmann neural networks, e.g., isometric models and multinomial logistic regression (MLR) can be derived in a way that is fully analogous to their spherical and hyperbolic counterparts. Building upon these works, in this paper, we design fully-connected (FC) and convolutional layers for SPD neural networks. We also develop MLR on Symmetric Positive Semi-definite (SPSD) manifolds, and propose a method for performing backpropagation with the Grassmann logarithmic map in the projector perspective. We demonstrate the effectiveness of the proposed approach in the human action recognition and node classification tasks.", "keywords": "manifold learning;representation learning;gyrovector spaces;deep learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Xuan Son Nguyen;Shuo Yang;Aymeric Histace", "authorids": "~Xuan_Son_Nguyen2;~Shuo_Yang17;~Aymeric_Histace1", "gender": "M;M;M", "homepage": "https://nguyenxuanson10.github.io/;https://syangunique1111.github.io;https://aymeric.histace.free.fr", "dblp": "69/9959;;72/6341", "google_scholar": ";;https://scholar.google.fr/citations?user=y0MU8CAAAAAJ", "orcid": ";0009-0009-5849-1889;", "linkedin": ";shuo-yang-a51b97181/;", "or_profile": "~Xuan_Son_Nguyen2;~Shuo_Yang17;~Aymeric_Histace1", "aff": "Ecole Nationale Sup\u00e9rieure de l'Electronique et de ses Applications;Ecole Nationale Sup\u00e9rieure de l'Electronique et de ses Applications;ETIS", "aff_domain": "ensea.fr;ensea.fr;ensea.fr", "position": "Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nnguyen2024matrix,\ntitle={Matrix Manifold Neural Networks++},\nauthor={Xuan Son Nguyen and Shuo Yang and Aymeric Histace},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=30aSE3FB3L}\n}", "github": "", "project": "", "reviewers": "2o5X;iDyb;pWPq", "pdf_size": 430741, "rating": "3;6;8", "confidence": "5;4;4", "soundness": "2;3;4", "contribution": "2;2;4", "presentation": "2;3;2", "wc_summary": "51;118;22", "wc_strengths": "30;88;64", "wc_weaknesses": "66;233;106", "wc_questions": "259;77;73", "wc_review": "406;516;265", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1308;793;322", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 63.666666666666664, 40.202266381663385 ], "wc_strengths_avg": [ 60.666666666666664, 23.79542439676633 ], "wc_weaknesses_avg": [ 135.0, 71.19456908126256 ], "wc_questions_avg": [ 136.33333333333334, 86.75380235022683 ], "wc_review_avg": [ 395.6666666666667, 102.73049963645438 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 807.6666666666666, 402.6663907283823 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9176629354822472, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3398805311670412901&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=30aSE3FB3L", "pdf": "https://openreview.net/pdf?id=30aSE3FB3L", "email": "ensea.fr;ensea.fr;ensea.fr", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Ecole Nationale Sup\u00e9rieure de l'Electronique et de ses Applications;ETIS", "aff_unique_dep": ";", "aff_unique_url": "https://www.enssea.fr;", "aff_unique_abbr": "ENSEA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France;" }, { "title": "Repelling Random Walks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19531", "id": "31IOmrnoP4", "author_site": "Isaac Reid, Eli Berger, Krzysztof Choromanski, Adrian Weller", "tldr": "", "abstract": "We present a novel quasi-Monte Carlo mechanism to improve graph-based sampling, coined repelling random walks. By inducing correlations between the trajectories of an interacting ensemble such that their marginal transition probabilities are unmodified, we are able to explore the graph more efficiently, improving the concentration of statistical estimators whilst leaving them unbiased. The mechanism has a trivial drop-in implementation. We showcase the effectiveness of repelling random walks in a range of settings including estimation of graph kernels, the PageRank vector and graphlet concentrations. We provide detailed experimental evaluation and robust theoretical guarantees. To our knowledge, repelling random walks constitute the first rigorously studied quasi-Monte Carlo scheme correlating the directions of walkers on a graph, inviting new research in this exciting nascent domain.", "keywords": "Graphs;random walkers;quasi-Monte Carlo;kernel;PageRank;graphlets;scalable;mixing", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Isaac Reid;Eli Berger;Krzysztof Marcin Choromanski;Adrian Weller", "authorids": "~Isaac_Reid3;~Eli_Berger1;~Krzysztof_Marcin_Choromanski1;~Adrian_Weller1", "gender": "M;M;;M", "homepage": "https://isaac-reid.github.io;http://math.haifa.ac.il/berger/;;http://mlg.eng.cam.ac.uk/adrian/", "dblp": "287/4898;;78/11411;73/8324", "google_scholar": "3JPyAi0AAAAJ;;;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ", "orcid": "0000-0002-1664-1975;;;", "linkedin": ";;;", "or_profile": "~Isaac_Reid3;~Eli_Berger1;~Krzysztof_Marcin_Choromanski1;~Adrian_Weller1", "aff": "University of Cambridge;University of Haifa;Google Brain Robotics & Columbia University;University of Cambridge", "aff_domain": "cam.ac.uk;haifa.ac.il;columbia.edu;cam.ac.uk", "position": "PhD student;Associate Professor;research scientist & adjunct assistant professor;Principal Researcher", "bibtex": "@inproceedings{\nreid2024repelling,\ntitle={Repelling Random Walks},\nauthor={Isaac Reid and Eli Berger and Krzysztof Marcin Choromanski and Adrian Weller},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=31IOmrnoP4}\n}", "github": "", "project": "", "reviewers": "vsCv;KSvf;1jyG", "pdf_size": 620477, "rating": "6;6;6", "confidence": "3;4;4", "soundness": "3;2;3", "contribution": "3;2;2", "presentation": "3;2;3", "wc_summary": "55;67;43", "wc_strengths": "29;46;24", "wc_weaknesses": "13;257;56", "wc_questions": "279;63;47", "wc_review": "376;433;170", "wc_reply_reviewers": "40;484;0", "wc_reply_authors": "571;2075;434", "reply_reviewers": "1;3;0", "reply_authors": "1;5;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 55.0, 9.797958971132712 ], "wc_strengths_avg": [ 33.0, 9.41629792788369 ], "wc_weaknesses_avg": [ 108.66666666666667, 106.34639418219855 ], "wc_questions_avg": [ 129.66666666666666, 105.79645026601266 ], "wc_review_avg": [ 326.3333333333333, 112.96705517785065 ], "wc_reply_reviewers_avg": [ 174.66666666666666, 219.3404254172546 ], "wc_reply_authors_avg": [ 1026.6666666666667, 743.3905807552013 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8497924878712380616&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=31IOmrnoP4", "pdf": "https://openreview.net/pdf?id=31IOmrnoP4", "email": "cam.ac.uk;haifa.ac.il;columbia.edu;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Cambridge;University of Haifa;Google", "aff_unique_dep": ";;Google Brain Robotics", "aff_unique_url": "https://www.cam.ac.uk;https://www.haifa.ac.il;https://ai.google", "aff_unique_abbr": "Cambridge;UoH;Google", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Cambridge;;Mountain View", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United Kingdom;Israel;United States" }, { "id": "324zEJCo3a", "title": "Local Vs. Global Interpretability: A Computational Perspective", "track": "main", "status": "Reject", "tldr": "", "abstract": "The local and global interpretability of various ML models has been studied extensively in recent years. Yet despite significant progress in the field, many of the known results are either informal or lack sufficient mathematical rigor. In this work, we propose a framework based on computational complexity theory to systematically evaluate the local and global interpretability of different ML models. In essence, our framework examines various forms of explanations that can be computed either locally or globally and assesses the computational complexity involved in generating them. We begin by rigorously studying global explanations, and establish: (1) a duality relationship between local and global forms of explanations; and (2) the inherent uniqueness associated with certain global forms of explanations. We then proceed to evaluate the computational complexity associated with these forms of explanations, with a particular emphasis on three model types usually positioned at the extremes of the interpretability spectrum: (1) linear models; (2) decision trees; and (3) neural networks.\nOur findings reveal that, assuming standard complexity assumptions such as P!=NP, computing global explanations is computationally more difficult for linear models than for their local counterparts. Surprisingly, this phenomenon is not universally applicable to decision trees and neural networks: in certain scenarios, computing a global explanation is actually more tractable than computing a local one. We consider these results as compelling evidence of the importance of analyzing ML explainability from a computational complexity perspective, as the means of gaining a deeper understanding of the inherent interpretability of diverse ML models.", "keywords": "interpretability;explainable AI", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Shahaf Bassan;Guy Amir;Guy Katz", "authorids": "~Shahaf_Bassan1;~Guy_Amir1;~Guy_Katz1", "gender": ";M;M", "homepage": ";https://guyam2.github.io/;http://www.katz-lab.com", "dblp": ";277/9596;23/10321", "google_scholar": ";CSJEObYAAAAJ;https://scholar.google.com.tw/citations?user=3nYG5BMAAAAJ", "orcid": ";;", "linkedin": ";https://linkedin.com/in/guy-amir-a335a3ba;", "or_profile": "~Shahaf_Bassan1;~Guy_Amir1;~Guy_Katz1", "aff": ";Cornell University;Hebrew University of Jerusalem", "aff_domain": ";cornell.edu;huji.ac.il", "position": ";Postdoc;Associate Professor", "bibtex": "@misc{\nbassan2024local,\ntitle={Local Vs. Global Interpretability: A Computational Perspective},\nauthor={Shahaf Bassan and Guy Amir and Guy Katz},\nyear={2024},\nurl={https://openreview.net/forum?id=324zEJCo3a}\n}", "github": "", "project": "", "reviewers": "D69M;aaJG;RGYJ;qXuS", "site": "https://openreview.net/forum?id=324zEJCo3a", "pdf_size": 424863, "rating": "6;6;6;6", "confidence": "5;1;1;3", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "3;4;3;2", "wc_summary": "97;38;87;103", "wc_strengths": "60;42;21;111", "wc_weaknesses": "125;112;41;192", "wc_questions": "150;6;1;85", "wc_review": "432;198;150;491", "wc_reply_reviewers": "44;0;0;0", "wc_reply_authors": "790;533;293;478", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.5, 1.6583123951777 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.25, 25.616157010761782 ], "wc_strengths_avg": [ 58.5, 33.30540496676178 ], "wc_weaknesses_avg": [ 117.5, 53.59337645642417 ], "wc_questions_avg": [ 60.5, 61.48373768729419 ], "wc_review_avg": [ 317.75, 146.24358960310022 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 523.5, 177.7027011612373 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Io_M2wWVXycJ:scholar.google.com/&scioq=Local+Vs.+Global+Interpretability:+A+Computational+Perspective&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Cornell University;Hebrew University of Jerusalem", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.huji.ac.il", "aff_unique_abbr": "Cornell;HUJI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Jerusalem", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Israel" }, { "title": "DDMI: Domain-agnostic Latent Diffusion Models for Synthesizing High-Quality Implicit Neural Representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19530", "id": "327tbF3S65", "author_site": "Dogyun Park, Sihyeon Kim, Sojin Lee, Hyunwoo Kim", "tldr": "", "abstract": "Recent studies have introduced a new class of generative models for synthesizing implicit neural representations (INRs) that capture arbitrary continuous signals in various domains. These models opened the door for domain-agnostic generative models, but they often fail to achieve high-quality generation. We observed that the existing methods generate the weights of neural networks to parameterize INRs and evaluate the network with fixed positional embeddings (PEs). Arguably, this architecture limits the expressive power of generative models and results in low-quality INR generation. To address this limitation, we propose Domain-agnostic Latent Diffusion Model for INRs (DDMI) that generates adaptive positional embeddings instead of neural networks' weights. Specifically, we develop a Discrete-to-continuous space Variational AutoEncoder (D2C-VAE) that seamlessly connects discrete data and continuous signal functions in the shared latent space. Additionally, we introduce a novel conditioning mechanism for evaluating INRs with the hierarchically decomposed PEs to further enhance expressive power. Extensive experiments across four modalities, \\eg, 2D images, 3D shapes, Neural Radiance Fields, and videos, with seven benchmark datasets, demonstrate the versatility of DDMI and its superior performance compared to the existing INR generative models. Code is available at \\href{https://github.com/mlvlab/DDMI}{https://github.com/mlvlab/DDMI}.", "keywords": "Implicit neural representation;generative model;domain agnostic;diffusion model", "primary_area": "generative models", "supplementary_material": "", "author": "Dogyun Park;Sihyeon Kim;Sojin Lee;Hyunwoo J. Kim", "authorids": "~Dogyun_Park2;~Sihyeon_Kim1;~Sojin_Lee1;~Hyunwoo_J._Kim3", "gender": "M;F;F;M", "homepage": "https://dogyunpark.github.io/;;;https://hyunwoojkim.com/publications", "dblp": "323/9575;304/2362;342/6155;150/4259", "google_scholar": "Cgc-2roAAAAJ;;roblxQYAAAAJ;https://scholar.google.co.kr/citations?user=LfBoJt8AAAAJ", "orcid": ";;0000-0001-9198-8437;0000-0002-2181-9264", "linkedin": "dogyunpark/;sihyeon-kim-a91aaa212/;sojin-lee-4b5138262/;", "or_profile": "~Dogyun_Park2;~Sihyeon_Kim1;~Sojin_Lee1;~Hyunwoo_Kim1", "aff": "Korea University;Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\npark2024ddmi,\ntitle={{DDMI}: Domain-agnostic Latent Diffusion Models for Synthesizing High-Quality Implicit Neural Representations},\nauthor={Dogyun Park and Sihyeon Kim and Sojin Lee and Hyunwoo J. Kim},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=327tbF3S65}\n}", "github": "", "project": "", "reviewers": "9btm;KC2J;qBmp;6uL5", "pdf_size": 29003266, "rating": "6;6;6;6", "confidence": "5;4;3;4", "soundness": "4;3;2;3", "contribution": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "82;83;77;171", "wc_strengths": "28;78;56;127", "wc_weaknesses": "243;119;117;291", "wc_questions": "90;188;5;9", "wc_review": "443;468;255;598", "wc_reply_reviewers": "71;237;0;0", "wc_reply_authors": "518;567;605;616", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 103.25, 39.18146883413127 ], "wc_strengths_avg": [ 72.25, 36.237929024711114 ], "wc_weaknesses_avg": [ 192.5, 76.41171376170017 ], "wc_questions_avg": [ 73.0, 74.55534856735632 ], "wc_review_avg": [ 441.0, 122.45203142455416 ], "wc_reply_reviewers_avg": [ 77.0, 96.81683737862954 ], "wc_reply_authors_avg": [ 576.5, 38.356876828021335 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2438860377824535516&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=327tbF3S65", "pdf": "https://openreview.net/pdf?id=327tbF3S65", "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "32camXjW25", "title": "Covariance-corrected Whitening Alleviates Network Degeneration on Imbalanced Classification", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Class imbalance is a critical issue in image classification that significantly affects the performance of deep recognition models. In this work, We first identify a network degeneration dilemma that hinders the model learning by introducing a high linear dependence among the features inputted into the classifier. To overcome this challenge, we propose a novel framework called Whitening-Net to mitigate the degenerate solutions, in which ZCA whitening is integrated before the linear classifier to normalize and decorrelate the batch samples. However, in scenarios with extreme class imbalance, the batch covariance statistic exhibits significant fluctuations, impeding the convergence of the whitening operation. Therefore, we propose two covariance-corrected modules, the Group-based Relatively Balanced Batch Sampler (GRBS) and the Batch Embedded Training (BET), to get more accurate and stable batch covariance, thereby reinforcing the capability of whitening. Our modules can be trained end-to-end without incurring substantial computational costs. Comprehensive empirical evaluations conducted on benchmark datasets, including CIFAR-LT-10/100, ImageNet-LT, and iNaturalist-LT, validate the effectiveness of our proposed approaches.", "keywords": "imbalanced classification;neural network;ZCA whitening;sampling", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/075d985c0fd34c578a57df71c77e9023457c74c8.pdf", "author": "Zhiwei Zhang;Hongsheng Li", "authorids": "~Zhiwei_Zhang3;~Hongsheng_Li3", "gender": ";M", "homepage": ";http://www.ee.cuhk.edu.hk/~hsli", "dblp": ";27/7402-1", "google_scholar": ";BN2Ze-QAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Zhiwei_Zhang3;~Hongsheng_Li3", "aff": ";The Chinese University of Hong Kong", "aff_domain": ";cuhk.edu.hk", "position": ";Associate Professor", "bibtex": "@misc{\nanonymous2024covariancecorrected,\ntitle={Covariance-corrected Whitening Alleviates Network Degeneration on Imbalanced Classification},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=32camXjW25}\n}", "github": "", "project": "", "reviewers": "xPQZ;bhjW;piR7;iY3M", "site": "https://openreview.net/forum?id=32camXjW25", "pdf_size": 2226529, "rating": "5;5;6;6", "confidence": "4;2;4;4", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "92;79;91;47", "wc_strengths": "40;7;142;50", "wc_weaknesses": "517;106;228;74", "wc_questions": "14;3;6;52", "wc_review": "663;195;467;223", "wc_reply_reviewers": "19;0;121;127", "wc_reply_authors": "1268;406;1199;440", "reply_reviewers": "1;0;1;2", "reply_authors": "4;2;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 77.25, 18.198557635153396 ], "wc_strengths_avg": [ 59.75, 50.08180807438965 ], "wc_weaknesses_avg": [ 231.25, 174.69884802138793 ], "wc_questions_avg": [ 18.75, 19.613452016409553 ], "wc_review_avg": [ 387.0, 191.2694434560837 ], "wc_reply_reviewers_avg": [ 66.75, 57.68177788522126 ], "wc_reply_authors_avg": [ 828.25, 406.1615288281252 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hBAExJM8MmoJ:scholar.google.com/&scioq=Covariance-corrected+Whitening+Alleviates+Network+Degeneration+on+Imbalanced+Classification&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "331CmSWDjz", "title": "Unsupervised Feature Learning with Emergent Data-Driven Prototypicality", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Given an image set without any labels, our goal is to train a model that maps each image to a point in a feature space such that, not only proximity indicates visual similarity, but where it is located directly encodes how prototypical the image is according to the dataset.\n\nOur key insight is to perform unsupervised feature learning in hyperbolic instead of Euclidean space, where the distance between points still reflects image similarity, and yet we gain additional capacity for representing prototypicality with the location of the point: The closer it is to the origin, the more prototypical it is. The latter property is simply emergent from optimizing the usual metric learning objective: The image similar to many training instances is best placed at the center of corresponding points in Euclidean space, but closer to the origin in hyperbolic space.\n\nWe propose an unsupervised feature learning algorithm in **H**yperbolic space with sphere p**ACK**ing. **HACK** first generates uniformly packed particles in the Poincare ball of hyperbolic space and then assigns each image uniquely to each particle. Images after congealing are regarded more typical of the dataset it belongs to. With our feature mapper simply trained to spread out training instances in hyperbolic space, we observe that images move closer to the origin with congealing, validating our idea of unsupervised prototypicality discovery. We demonstrate that our data-driven prototypicality provides an easy and superior unsupervised instance selection to reduce sample complexity, increase model generalization with atypical instances and robustness with typical ones.", "keywords": "Representation Learning;Hyperbolic Space;Prototypicality;Unsupervised Learning", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Yunhui Guo;Youren Zhang;Yubei Chen;Stella X. Yu", "authorids": "~Yunhui_Guo2;~Youren_Zhang1;~Yubei_Chen1;~Stella_X._Yu2", "gender": "M;M;M;F", "homepage": "https://yunhuiguo.github.io/;;https://redwood.berkeley.edu/people/yubei-chen/;http://www.eecs.umich.edu/~stellayu", "dblp": "165/3105;;30/10064;58/5089", "google_scholar": "BxIXuZYAAAAJ;;WeyLqFUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";youren-zhang-92a447251/;yubei-chen-05998a39/;", "or_profile": "~Yunhui_Guo2;~Youren_Zhang1;~Yubei_Chen1;~Stella_Yu2", "aff": "University of Texas at Dallas;University of Michigan - Ann Arbor;University of California, Davis;University of California, Berkeley", "aff_domain": "utdallas.edu;umich.edu;ucdavis.edu;berkeley.edu", "position": "Assistant Professor;MS student;Assistant Professor;Adjunct Professor", "bibtex": "@misc{\nguo2024unsupervised,\ntitle={Unsupervised Feature Learning with Emergent Data-Driven Prototypicality},\nauthor={Yunhui Guo and Youren Zhang and Yubei Chen and Stella X. Yu},\nyear={2024},\nurl={https://openreview.net/forum?id=331CmSWDjz}\n}", "github": "", "project": "", "reviewers": "9yFS;qJ86;PGEd", "site": "https://openreview.net/forum?id=331CmSWDjz", "pdf_size": 50621215, "rating": "3;5;5", "confidence": "4;2;4", "soundness": "2;3;2", "contribution": "3;3;2", "presentation": "2;3;2", "wc_summary": "107;138;61", "wc_strengths": "95;18;24", "wc_weaknesses": "203;70;160", "wc_questions": "178;79;4", "wc_review": "583;305;249", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 102.0, 31.63331577098213 ], "wc_strengths_avg": [ 45.666666666666664, 34.96982826507572 ], "wc_weaknesses_avg": [ 144.33333333333334, 55.41560149352246 ], "wc_questions_avg": [ 87.0, 71.26008700527947 ], "wc_review_avg": [ 379.0, 146.05021967346255 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1732935440179672701&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Texas at Dallas;University of Michigan;University of California, Davis;University of California, Berkeley", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.utdallas.edu;https://www.umich.edu;https://www.ucdavis.edu;https://www.berkeley.edu", "aff_unique_abbr": "UT Dallas;UM;UC Davis;UC Berkeley", "aff_campus_unique_index": "0;1;2;3", "aff_campus_unique": "Dallas;Ann Arbor;Davis;Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "33UGifHHfg", "title": "Multi-Task Learning with Hypernetworks and Task Metadata", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Multi-task learning architectures aim to model a set of related tasks simultaneously by sharing parameters across networks to exploit shared knowledge and improve performance. Designing multi-task architectures is challenging due to the trade-off between parameter efficiency and the ability to flexibly model task differences at all network layers. We propose a novel multi-task learning architecture called Multi-Task Hypernetworks, which circumvents this trade-off, generating flexible task networks with a minimal number of parameters per task. Our approach uses a hypernetwork to generate different network weights for each task from task-specific embeddings and enable abstract knowledge transfer between tasks. Our approach stands out from existing multi-task learning architectures by providing the added capability to effectively leverage task-level metadata to explicitly learn task relationships and task functions. We show empirically that Multi-Task Hypernetworks outperform many state-of-the-art multi-task learning architectures on small tabular data problems, and leverage metadata more effectively than existing methods.", "keywords": "multi-task learning;hypernetworks;metadata", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/594e6fbc22c188b064c28331c7230e85299882c9.zip", "author": "Olivier Graffeuille;Yun Sing Koh;J\u00f6rg Wicker;Moritz K Lehmann", "authorids": "~Olivier_Graffeuille1;~Yun_Sing_Koh2;~J\u00f6rg_Wicker1;~Moritz_K_Lehmann1", "gender": "M;;;", "homepage": ";https://profiles.auckland.ac.nz/y-koh;https://wicker.nz;https://www.moritzlehmann.org", "dblp": ";23/1879.html;56/3110;", "google_scholar": "TvP8GX0AAAAJ;0L38IrAAAAAJ;https://scholar.google.co.nz/citations?user=_Q11KBoAAAAJ;", "orcid": ";0000-0001-7256-4049;0000-0003-0533-3368;0000-0001-7346-3901", "linkedin": "olivier-graffeuille-b2672a122/;yun-sing-koh-a7ba358/;j%C3%B6rg-wicker-bb197417/;moritzlehmann/", "or_profile": "~Olivier_Graffeuille1;~Yun_Sing_Koh2;~J\u00f6rg_Wicker1;~Moritz_K_Lehmann1", "aff": "University of Auckland;University of Auckland;University of Auckland;University of Waikato", "aff_domain": "auckland.ac.nz;auckland.ac.nz;auckland.ac.nz;waikato.ac.nz", "position": "PhD student;Full Professor;Associate Professor;Adjunct Research Fellow", "bibtex": "@misc{\ngraffeuille2024multitask,\ntitle={Multi-Task Learning with Hypernetworks and Task Metadata},\nauthor={Olivier Graffeuille and Yun Sing Koh and J{\\\"o}rg Wicker and Moritz K Lehmann},\nyear={2024},\nurl={https://openreview.net/forum?id=33UGifHHfg}\n}", "github": "", "project": "", "reviewers": "88C4;RnjU;66Fq;bXYN", "site": "https://openreview.net/forum?id=33UGifHHfg", "pdf_size": 8781379, "rating": "3;3;5;5", "confidence": "4;3;4;4", "soundness": "3;2;2;2", "contribution": "2;2;2;2", "presentation": "3;3;3;4", "wc_summary": "43;72;48;54", "wc_strengths": "17;39;33;55", "wc_weaknesses": "254;138;300;216", "wc_questions": "2;31;46;61", "wc_review": "316;280;427;386", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "226;285;240;181", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 54.25, 10.96300597464035 ], "wc_strengths_avg": [ 36.0, 13.601470508735444 ], "wc_weaknesses_avg": [ 227.0, 59.371710435189584 ], "wc_questions_avg": [ 35.0, 21.805962487356524 ], "wc_review_avg": [ 352.25, 57.57766494049581 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 233.0, 37.1012129181783 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:leTQgLQ2MKwJ:scholar.google.com/&scioq=Multi-Task+Learning+with+Hypernetworks+and+Task+Metadata&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Auckland;University of Waikato", "aff_unique_dep": ";", "aff_unique_url": "https://www.auckland.ac.nz;https://www.waikato.ac.nz", "aff_unique_abbr": "UoA;UoW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "New Zealand" }, { "title": "Conformal Risk Control", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19529", "id": "33XGfHLtZg", "author_site": "Anastasios Angelopoulos, Stephen Bates, Adam Fisch, Lihua Lei, Tal Schuster", "tldr": "", "abstract": "We extend conformal prediction to control the expected value of any monotone loss function. The algorithm generalizes split conformal prediction together with its coverage guarantee. Like conformal prediction, the conformal risk control procedure is tight up to an $\\mathcal{O}(1/n)$ factor. We also introduce extensions of the idea to distribution shift, quantile risk control, multiple and adversarial risk control, and expectations of U-statistics. Worked examples from computer vision and natural language processing demonstrate the usage of our algorithm to bound the false negative rate, graph distance, and token-level F1-score.", "keywords": "conformal prediction;uncertainty quantification", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/0edbe9a906df6f1f354c50be92a440afe03dab24.zip", "author": "Anastasios Nikolas Angelopoulos;Stephen Bates;Adam Fisch;Lihua Lei;Tal Schuster", "authorids": "~Anastasios_Nikolas_Angelopoulos1;~Stephen_Bates1;~Adam_Fisch2;~Lihua_Lei2;~Tal_Schuster1", "gender": "M;;M;Not Specified;", "homepage": "http://angelopoulos.ai;https://stephenbates19.github.io/;https://lihualei71.github.io/;https://people.csail.mit.edu/tals/;https://people.csail.mit.edu/fisch/", "dblp": ";;;190/7491;https://dblp.org/pers/f/Fisch:Adam.html", "google_scholar": "nfX25MMAAAAJ;;https://scholar.google.co.uk/citations?user=-lKb3XwAAAAJ;oo8QRmIAAAAJ;https://scholar.google.com/citations?authorid=LYRkQhMAAAAJ", "orcid": ";0000-0002-3273-8179;;;", "linkedin": "anastasiosa/;;;;", "or_profile": "~Anastasios_Nikolas_Angelopoulos1;~Stephen_Bates1;~Lihua_Lei2;~Tal_Schuster1;~Adam_Fisch1", "aff": "University of California, Berkeley;Massachusetts Institute of Technology;;Google;Massachusetts Institute of Technology", "aff_domain": "berkeley.edu;mit.edu;;google.com;mit.edu", "position": "PhD student;Assistant Professor;;Researcher;PhD student", "bibtex": "@inproceedings{\nangelopoulos2024conformal,\ntitle={Conformal Risk Control},\nauthor={Anastasios Nikolas Angelopoulos and Stephen Bates and Adam Fisch and Lihua Lei and Tal Schuster},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=33XGfHLtZg}\n}", "github": "", "project": "", "reviewers": "fQpV;TBcZ;tJUM;GsY4;pT4W;nYbV", "pdf_size": 1468508, "rating": "6;6;6;8;8;8", "confidence": "3;4;3;4;3;4", "soundness": "3;3;3;4;3;4", "contribution": "3;2;3;4;3;4", "presentation": "3;3;3;4;3;4", "wc_summary": "93;45;40;59;75;28", "wc_strengths": "41;36;43;122;160;79", "wc_weaknesses": "151;106;122;540;309;94", "wc_questions": "124;6;155;3;22;18", "wc_review": "409;193;360;724;566;219", "wc_reply_reviewers": "48;15;36;19;192;0", "wc_reply_authors": "733;270;771;726;335;483", "reply_reviewers": "1;1;1;1;1;0", "reply_authors": "1;2;2;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 56.666666666666664, 21.974732965132677 ], "wc_strengths_avg": [ 80.16666666666667, 46.53105295272061 ], "wc_weaknesses_avg": [ 220.33333333333334, 159.9777762343535 ], "wc_questions_avg": [ 54.666666666666664, 60.99635690396236 ], "wc_review_avg": [ 411.8333333333333, 186.62923731886767 ], "wc_reply_reviewers_avg": [ 51.666666666666664, 64.59274125025368 ], "wc_reply_authors_avg": [ 553.0, 200.98341556788543 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.33333333333333326, "gs_citation": 155, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14372458069187038582&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=33XGfHLtZg", "pdf": "https://openreview.net/pdf?id=33XGfHLtZg", "email": "berkeley.edu;mit.edu;;google.com;mit.edu", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;MIT;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "34QscjTwOc", "title": "Visualizing the Emergence of Primitive Interactions During the Training of DNNs", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Although the learning of deep neural networks (DNNs) is widely believed to be a fitting process without an explicit symbolic structure, previous studies have discovered (Ren et al., 2023a; Li & Zhang, 2023b) and proven (Ren et al., 2023c) that well-trained DNNs usually encode sparse interactions, which can be considered as primitives of the inference. In this study, we redefine the interaction on principal feature components in intermediate-layer features, which significantly simplifies the interaction and enables us to explore the dynamics of interactions throughout the learning of the DNN. Specifically, we visualize how new interactions are gradually learned and how previously learned interactions are gradually forgotten during the training process. We categorize all interactions into five distinct groups (reliable, withdrawing, forgetting, betraying, and fluctuating interactions), which provides a novel perspective for understanding the learning process of DNNs.", "keywords": "Visualization;Representation Complexity;Neural Network", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Jie Ren;Xinhao Zheng;Jiyu Liu;Quanshi Zhang", "authorids": "~Jie_Ren1;~Xinhao_Zheng2;~Jiyu_Liu1;~Quanshi_Zhang1", "gender": "F;M;M;M", "homepage": "https://jie-ren.github.io/;https://github.com/void-zxh;https://github.com/LJY-XCX;http://qszhang.com", "dblp": "r/JieRen-18;;;http://dblp.uni-trier.de/pers/hd/z/Zhang:Quanshi", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;iFFhHK0AAAAJ", "orcid": "0000-0001-9918-3000;;;", "linkedin": ";;;", "or_profile": "~Jie_Ren1;~Xinhao_Zheng2;~Jiyu_Liu1;~Quanshi_Zhang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;MS student;Undergrad student;Associate Professor", "bibtex": "@misc{\nren2024visualizing,\ntitle={Visualizing the Emergence of Primitive Interactions During the Training of {DNN}s},\nauthor={Jie Ren and Xinhao Zheng and Jiyu Liu and Quanshi Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=34QscjTwOc}\n}", "github": "", "project": "", "reviewers": "8d78;kGww;UCkY", "site": "https://openreview.net/forum?id=34QscjTwOc", "pdf_size": 19755599, "rating": "3;3;6", "confidence": "4;4;3", "soundness": "2;2;4", "contribution": "2;2;3", "presentation": "3;2;3", "wc_summary": "128;40;132", "wc_strengths": "32;298;197", "wc_weaknesses": "268;438;257", "wc_questions": "208;34;108", "wc_review": "636;810;694", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 100.0, 42.45782220824175 ], "wc_strengths_avg": [ 175.66666666666666, 109.63677404147853 ], "wc_weaknesses_avg": [ 321.0, 82.85328398239062 ], "wc_questions_avg": [ 116.66666666666667, 71.29905718560067 ], "wc_review_avg": [ 713.3333333333334, 72.33870947762954 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TMrupN3nH0cJ:scholar.google.com/&scioq=Visualizing+the+Emergence+of+Primitive+Interactions+During+the+Training+of+DNNs&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Polynomial Width is Sufficient for Set Representation with High-dimensional Features", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19528", "id": "34STseLBrQ", "author_site": "Peihao Wang, Shenghao Yang, Shu Li, Zhangyang Wang, Pan Li", "tldr": "", "abstract": "Set representation has become ubiquitous in deep learning for modeling the inductive bias of neural networks that are insensitive to the input order. DeepSets is the most widely used neural network architecture for set representation. It involves embedding each set element into a latent space with dimension $L$, followed by a sum pooling to obtain a whole-set embedding, and finally mapping the whole-set embedding to the output. In this work, we investigate the impact of the dimension $L$ on the expressive power of DeepSets. Previous analyses either oversimplified high-dimensional features to be one-dimensional features or were limited to complex analytic activations, thereby diverging from practical use or resulting in $L$ that grows exponentially with the set size $N$ and feature dimension $D$. To investigate the minimal value of $L$ that achieves sufficient expressive power, we present two set-element embedding layers: (a) linear + power activation (LP) and (b) linear + exponential activations (LE). We demonstrate that $L$ being $\\operatorname{poly}(N, D)$ is sufficient for set representation using both embedding layers. We also provide a lower bound of $L$ for the LP embedding layer. Furthermore, we extend our results to permutation-equivariant set functions and the complex field.", "keywords": "Set Representation; Permutation Invariance; Permutation Equivariance", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Peihao Wang;Shenghao Yang;Shu Li;Zhangyang Wang;Pan Li", "authorids": "~Peihao_Wang1;~Shenghao_Yang1;~Shu_Li6;~Zhangyang_Wang1;~Pan_Li2", "gender": "M;M;Not Specified;M;", "homepage": "https://peihaowang.github.io/;https://cs.uwaterloo.ca/~s286yang/;https://www.shuli.me/Shu-Li-087d71e8ce9340c5b4f7c08c7babe814;https://vita-group.github.io;", "dblp": "239/4075;41/4482-2;66/6852;119/4026;https://dblp.org/pers/hd/l/Li_0005:Pan", "google_scholar": "fqf2tBsAAAAJ;ocLDM-AAAAAJ;knBDWikAAAAJ;pxFyKAIAAAAJ;IroP0EwAAAAJ", "orcid": ";;;;", "linkedin": "peihao-wang-25a411162/;;;;pan-li-b951105a/", "or_profile": "~Peihao_Wang1;~Shenghao_Yang1;~Shu_Li6;~Zhangyang_Wang1;~Pan_Li2", "aff": "University of Texas, Austin;University of Waterloo;Purdue University;University of Texas at Austin;Purdue University", "aff_domain": "utexas.edu;uwaterloo.ca;purdue.edu;utexas.edu;purdue.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024polynomial,\ntitle={Polynomial Width is Sufficient for Set Representation with High-dimensional Features},\nauthor={Peihao Wang and Shenghao Yang and Shu Li and Zhangyang Wang and Pan Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=34STseLBrQ}\n}", "github": "", "project": "", "reviewers": "XKbR;oY1k;jrmV;a75R", "pdf_size": 10522719, "rating": "5;8;8;8", "confidence": "3;3;3;3", "soundness": "4;4;3;3", "contribution": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "964;349;231;31", "wc_strengths": "82;104;278;33", "wc_weaknesses": "502;208;209;166", "wc_questions": "184;1;97;241", "wc_review": "1732;662;815;471", "wc_reply_reviewers": "0;22;0;34", "wc_reply_authors": "815;261;141;535", "reply_reviewers": "0;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 393.75, 348.3040159113874 ], "wc_strengths_avg": [ 124.25, 92.41313488893232 ], "wc_weaknesses_avg": [ 271.25, 134.34912541583589 ], "wc_questions_avg": [ 130.75, 90.78098644540056 ], "wc_review_avg": [ 920.0, 484.38982235385583 ], "wc_reply_reviewers_avg": [ 14.0, 14.628738838327793 ], "wc_reply_authors_avg": [ 438.0, 260.3247971285102 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13203073421312026361&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=34STseLBrQ", "pdf": "https://openreview.net/pdf?id=34STseLBrQ", "email": "utexas.edu;uwaterloo.ca;purdue.edu;utexas.edu;purdue.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "University of Texas at Austin;University of Waterloo;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://uwaterloo.ca;https://www.purdue.edu", "aff_unique_abbr": "UT Austin;UW;Purdue", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Beating Price of Anarchy and Gradient Descent without Regret in Potential Games", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19527", "id": "36L7W3ri4U", "author_site": "Iosif Sakos, Stefanos Leonardos, Stelios Stavroulakis, William Overman, Ioannis Panageas, Georgios Piliouras", "tldr": "", "abstract": "Arguably one of the thorniest problems in game theory is that of equilibrium selection. Specifically, in the presence of multiple equilibria do self-interested learning dynamics typically select the socially optimal ones? We study a rich class of continuous-time no-regret dynamics in potential games (PGs). Our class of dynamics, *Q-Replicator Dynamics* (QRD), include gradient descent (GD), log-barrier and replicator dynamics (RD) as special cases. We start by establishing *pointwise convergence* of all QRD to Nash equilibria in almost all PGs. In the case of GD, we show a tight average case performance within a factor of two of optimal, for a class of symmetric $2\\times2$ potential games with unbounded Price of Anarchy (PoA). Despite this positive result, we show that GD is not always the optimal choice even in this restricted setting. Specifically, GD outperforms RD, if and only if *risk-* and *payoff-dominance* equilibria coincide. Finally, we experimentally show how these insights extend to all QRD dynamics and that unbounded gaps between average case performance and PoA analysis are common even in larger settings.", "keywords": "q-replicator dynamics;potential games;average price of anarchy;learning", "primary_area": "learning theory", "supplementary_material": "", "author": "Iosif Sakos;Stefanos Leonardos;Stelios Andrew Stavroulakis;William Overman;Ioannis Panageas;Georgios Piliouras", "authorids": "~Iosif_Sakos1;~Stefanos_Leonardos1;~Stelios_Andrew_Stavroulakis1;~William_Overman1;~Ioannis_Panageas1;~Georgios_Piliouras1", "gender": "M;M;;M;;M", "homepage": ";https://stefanosleonardos.com/;https://steliostavroulakis.github.io/;https://panageas.github.io;;https://willoverman.github.io/", "dblp": "271/1082;192/1237;315/4382;139/3829;62/1236;294/4924", "google_scholar": "https://scholar.google.gr/citations?user=69xvSfQAAAAJ;PtiGrVsAAAAJ;PPCdElkAAAAJ;5NiFWuwAAAAJ;;B2XPxEkAAAAJ", "orcid": "0000-0002-1871-9078;;0000-0002-2059-3152;;;", "linkedin": "joseph-sakos-3b3a6a200?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BP9xevRgnRfKhbYYoPyDf3Q%3D%3D;stefanos-leonardos/;https://linkedin.com/in/steliostavroulakis;;;", "or_profile": "~Iosif_Sakos1;~Stefanos_Leonardos1;~Stelios_Andrew_Stavroulakis1;~Ioannis_Panageas1;~Georgios_Piliouras1;~Will_Overman1", "aff": "Singapore University of Technology and Design;King's College London, University of London;University of California, Irvine;Donald Bren School of Information and Computer Sciences, University of California, Irvine;Singapore University of Technology and Design;Stanford University", "aff_domain": "sutd.edu.sg;kcl.ac.uk;uci.edu;ics.uci.edu;sutd.edu.sg;stanford.edu", "position": "PhD student;Lecturer;PhD student;Assistant Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nsakos2024beating,\ntitle={Beating Price of Anarchy and Gradient Descent without Regret in Potential Games},\nauthor={Iosif Sakos and Stefanos Leonardos and Stelios Andrew Stavroulakis and William Overman and Ioannis Panageas and Georgios Piliouras},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=36L7W3ri4U}\n}", "github": "", "project": "", "reviewers": "xUnV;ycpS", "pdf_size": 5645636, "rating": "6;8", "confidence": "2;3", "soundness": "4;4", "contribution": "3;3", "presentation": "3;3", "wc_summary": "183;294", "wc_strengths": "52;149", "wc_weaknesses": "49;33", "wc_questions": "253;52", "wc_review": "537;528", "wc_reply_reviewers": "0;0", "wc_reply_authors": "729;562", "reply_reviewers": "0;0", "reply_authors": "1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 4.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 238.5, 55.5 ], "wc_strengths_avg": [ 100.5, 48.5 ], "wc_weaknesses_avg": [ 41.0, 8.0 ], "wc_questions_avg": [ 152.5, 100.5 ], "wc_review_avg": [ 532.5, 4.5 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 645.5, 83.5 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4530536547724086878&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=36L7W3ri4U", "pdf": "https://openreview.net/pdf?id=36L7W3ri4U", "email": "sutd.edu.sg;kcl.ac.uk;uci.edu;ics.uci.edu;sutd.edu.sg;stanford.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;0;3", "aff_unique_norm": "Singapore University of Technology and Design;King's College London;University of California, Irvine;Stanford University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sutd.edu.sg;https://www.kcl.ac.uk;https://www.uci.edu;https://www.stanford.edu", "aff_unique_abbr": "SUTD;KCL;UCI;Stanford", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Irvine;Stanford", "aff_country_unique_index": "0;1;2;2;0;2", "aff_country_unique": "Singapore;United Kingdom;United States" }, { "id": "370Bvdd3z7", "title": "EC-Conf: An Ultra-fast Diffusion Model for Molecular Conformation Generation with Equivariant Consistency", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "Despite recent advancement in 3D molecule conformation generation driven by diffusion models, its high computational cost in iterative diffusion/denoising process limits its application. In this paper, an equivariant consistency model (EC-Conf) was proposed as a fast diffusion method for low-energy conformation generation. In EC-Conf, a modified SE (3)-equivariant transformer model was directly used to encode the Cartesian molecular conformations and a highly efficient consistency diffusion process was carried out to generate molecular conformations. It was demonstrated that, with only one sampling step, it can already achieve comparable quality to other diffusion-based models running with thousands denoising steps. Its performance can be further improved with a few more sampling iterations. The performance of EC-Conf is evaluated on both GEOM-QM9 and GEOM-Drugs sets. Our results demonstrate that the efficiency of EC-Conf for learning the distribution of low energy molecular conformation is at least two magnitudes higher than current SOTA diffusion models and could potentially become a useful tool for conformation generation and sampling. In the near future, we will release our code.", "keywords": "fast diffusion model;equivariant consistency;molecule conformation generation", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/f686381db6eabba03063ab0b42b7b31d80b416f5.zip", "author": "Zhiguang Fan;Yuedong Yang;Mingyuan Xu;Hongming Chen", "authorids": "~Zhiguang_Fan1;~Yuedong_Yang1;~Mingyuan_Xu1;~Hongming_Chen2", "gender": "M;M;M;M", "homepage": "https://github.com/zhi520/;http://biomed.nscc-gz.cn;;https://github.com/zhi520/EcConf", "dblp": ";98/2972;;", "google_scholar": ";AfjwTKoAAAAJ;NRbE1AoAAAAJ;", "orcid": ";0000-0002-6782-2813;;", "linkedin": ";;;", "or_profile": "~Zhiguang_Fan1;~Yuedong_Yang1;~Mingyuan_Xu1;~Hongming_Chen2", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;;", "aff_domain": "sysu.edu.cn;sysu.edu.cn;;", "position": "PhD student;Full Professor;;", "bibtex": "@misc{\nfan2024ecconf,\ntitle={{EC}-Conf: An Ultra-fast Diffusion Model for Molecular Conformation Generation with Equivariant Consistency},\nauthor={Zhiguang Fan and Yuedong Yang and Mingyuan Xu and Hongming Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=370Bvdd3z7}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=370Bvdd3z7", "pdf_size": 2041176, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5808189580291946410&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 16, "aff_unique_index": "0;0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "381QSrWdF2", "title": "Law of Balance and Stationary Distribution of Stochastic Gradient Descent", "track": "main", "status": "Reject", "tldr": "", "abstract": "The stochastic gradient descent (SGD) algorithm is the algorithm we use to train neural networks. However, it remains poorly understood how the SGD navigates the highly nonlinear and degenerate loss landscape of a neural network. In this work, we prove that the minibatch noise of SGD regularizes the solution towards a balanced solution whenever the loss function contains a rescaling symmetry. Because the difference between a simple diffusion process and SGD dynamics is the most significant when symmetries are present, our theory implies that the loss function symmetries constitute an essential probe of how SGD works. We then apply this result to derive the stationary distribution of stochastic gradient flow for a diagonal linear network with arbitrary depth and width. The stationary distribution exhibits complicated nonlinear phenomena such as phase transitions, broken ergodicity, and fluctuation inversion. These phenomena are shown to exist uniquely in deep networks, implying a fundamental difference between deep and shallow models.", "keywords": "stochastic gradient descent;stationary distribution;stochastic differential equation;phase transition", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Liu Ziyin;Hongchao Li;Masahito Ueda", "authorids": "~Liu_Ziyin1;~Hongchao_Li2;~Masahito_Ueda1", "gender": ";M;M", "homepage": "https://www.mit.edu/~ziyinl/;https://sites.google.com/view/condmat-hongchaoli;http://cat.phys.s.u-tokyo.ac.jp/index-e.html", "dblp": ";;", "google_scholar": "NpN9oRMAAAAJ;;https://scholar.google.co.jp/citations?user=Xpjx9CwAAAAJ", "orcid": ";;0000-0002-5367-1436", "linkedin": ";;", "or_profile": "~Liu_Ziyin1;~Hongchao_Li2;~Masahito_Ueda1", "aff": "Massachusetts Institute of Technology;;The University of Tokyo", "aff_domain": "mit.edu;;u-tokyo.ac.jp", "position": "Postdoc;;Full Professor", "bibtex": "@misc{\nziyin2024law,\ntitle={Law of Balance and Stationary Distribution of Stochastic Gradient Descent},\nauthor={Liu Ziyin and Hongchao Li and Masahito Ueda},\nyear={2024},\nurl={https://openreview.net/forum?id=381QSrWdF2}\n}", "github": "", "project": "", "reviewers": "8S7u;wHcL;eqby;Y3Mm", "site": "https://openreview.net/forum?id=381QSrWdF2", "pdf_size": 1371746, "rating": "3;5;6;8", "confidence": "4;2;4;3", "soundness": "2;3;4;4", "contribution": "2;2;3;4", "presentation": "2;3;4;4", "wc_summary": "61;33;45;81", "wc_strengths": "56;18;58;82", "wc_weaknesses": "90;45;67;23", "wc_questions": "293;21;3;67", "wc_review": "500;117;173;253", "wc_reply_reviewers": "134;0;0;24", "wc_reply_authors": "2503;515;449;523", "reply_reviewers": "1;0;0;1", "reply_authors": "4;1;1;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 55.0, 18.0 ], "wc_strengths_avg": [ 53.5, 22.907422377910613 ], "wc_weaknesses_avg": [ 56.25, 24.933661985356263 ], "wc_questions_avg": [ 96.0, 116.1077086157504 ], "wc_review_avg": [ 260.75, 146.3427056603779 ], "wc_reply_reviewers_avg": [ 39.5, 55.43239125276845 ], "wc_reply_authors_avg": [ 997.5, 869.6750830051417 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.25087260300212727, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14809324360039312152&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "MIT;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Japan" }, { "title": "Language Model Self-improvement by Reinforcement Learning Contemplation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19526", "id": "38E4yUbrgr", "author_site": "Jing-Cheng Pang, Pengyuan Wang, Kaiyuan Li, XiongHui Chen, Jiacheng Xu, Zongzhang Zhang, Yang Yu", "tldr": "", "abstract": "Language model self-improvement (LMSI) techniques have recently gained significant attention as they improve language models without requiring external supervision. A common approach is reinforcement learning from AI feedback (RLAIF), which trains a reward model based on AI preference data and employs a reinforcement learning algorithm to train the language model. \nHowever, RLAIF relies on the heuristic assumption that an AI model can provide effective feedback and correct wrong answers, requiring a solid capability of the language model. This paper presents a novel LMSI method, Reinforcement Learning Contemplation (RLC). We disclose that it is simpler for language models to evaluate a sentence than to generate it, even for small language models. Leveraging the gap between the evaluation and generation, RLC evaluates generated answers and updates language model parameters using reinforcement learning to maximize evaluation scores. Through testing on various challenging reasoning tasks and text summarization task, our experiments show that RLC effectively improves language model performance without external supervision, resulting in an answering accuracy increase (from 31.23% to 37.09%) for BigBench-hard reasoning tasks, and a rise in BERTScore for CNN/Daily Mail summarization tasks. Furthermore, RLC can be applied to models of different sizes, showcasing its broad applicability.", "keywords": "Reinforcement learning; language model self-improvement; text evaluation", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/d95b45aacba89101b72096a3248f805ad0e1a028.zip", "author": "Jing-Cheng Pang;Pengyuan Wang;Kaiyuan Li;Xiong-Hui Chen;Jiacheng Xu;Zongzhang Zhang;Yang Yu", "authorids": "~Jing-Cheng_Pang1;~Pengyuan_Wang3;~Kaiyuan_Li2;~Xiong-Hui_Chen1;~Jiacheng_Xu3;~Zongzhang_Zhang1;~Yang_Yu5", "gender": "M;;;M;;M;", "homepage": "https://www.lamda.nju.edu.cn/pangjc;;;http://www.lamda.nju.edu.cn/chenxh/;http://www.lamda.nju.edu.cn/xujc/;http://www.lamda.nju.edu.cn/zhangzz;", "dblp": "254/2679;;;241/7938;188/6025-3;90/8724;", "google_scholar": "R3Y_WrkAAAAJ;;;H5pguCYAAAAJ;;sG7WEAgAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Jing-Cheng_Pang1;~Pengyuan_Wang3;~Kaiyuan_Li2;~Xiong-Hui_Chen1;~Jiacheng_Xu3;~Zongzhang_Zhang1;~Yang_Yu5", "aff": "Nanjing University;;;Nanjing University;Nanjing University;Nanjing University;", "aff_domain": "nju.edu.cn;;;nju.edu.cn;nju.edu.cn;nju.edu.cn;", "position": "PhD student;;;PhD student;MS student;Associate Professor;", "bibtex": "@inproceedings{\npang2024language,\ntitle={Language Model Self-improvement by Reinforcement Learning Contemplation},\nauthor={Jing-Cheng Pang and Pengyuan Wang and Kaiyuan Li and Xiong-Hui Chen and Jiacheng Xu and Zongzhang Zhang and Yang Yu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=38E4yUbrgr}\n}", "github": "", "project": "", "reviewers": "BB78;jzQi;scBc;Rzu4;ksaG", "pdf_size": 1099213, "rating": "3;5;6;8;8", "confidence": "3;4;4;4;3", "soundness": "2;3;3;3;3", "contribution": "2;2;2;3;3", "presentation": "2;4;3;4;3", "wc_summary": "58;62;74;156;145", "wc_strengths": "11;52;37;71;100", "wc_weaknesses": "127;208;47;255;190", "wc_questions": "40;198;104;128;39", "wc_review": "236;520;262;610;474", "wc_reply_reviewers": "0;0;43;185;24", "wc_reply_authors": "863;983;1052;1526;731", "reply_reviewers": "0;0;1;3;1", "reply_authors": "3;3;3;5;2", "rating_avg": [ 6.0, 1.8973665961010275 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 99.0, 42.5205832509386 ], "wc_strengths_avg": [ 54.2, 30.155596495509748 ], "wc_weaknesses_avg": [ 165.4, 72.02943842624347 ], "wc_questions_avg": [ 101.8, 59.51268772287133 ], "wc_review_avg": [ 420.4, 146.857209560852 ], "wc_reply_reviewers_avg": [ 50.4, 69.20866997710618 ], "wc_reply_authors_avg": [ 1031.0, 270.5527674964719 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 3.2, 0.9797958971132712 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.21516574145596765, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8291999413488443176&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=38E4yUbrgr", "pdf": "https://openreview.net/pdf?id=38E4yUbrgr", "email": "nju.edu.cn;;;nju.edu.cn;nju.edu.cn;nju.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "39HaKNXpsu", "title": "Adapt and Diffuse: Sample-adaptive Reconstruction via Latent Diffusion Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Inverse problems arise in a multitude of applications, where the goal is to recover a clean signal from noisy and possibly (non)linear observations. The difficulty of a reconstruction problem depends on multiple factors, such as the structure of the ground truth signal, the severity of the degradation, the implicit bias of the reconstruction model and the complex interactions between the above factors. This results in natural sample-by-sample variation in the difficulty of a reconstruction task, which is often overlooked by contemporary techniques. Recently, diffusion-based inverse problem solvers have established new state-of-the-art in various reconstruction tasks. Our key observation in this paper is that most existing solvers lack the ability to adapt their compute power to the difficulty of the reconstruction task, resulting in long inference times, subpar performance and wasteful resource allocation. We propose a novel method that we call severity encoding, to estimate the degradation severity of noisy, degraded signals in the latent space of an autoencoder. We show that the estimated severity has strong correlation with the true corruption level and can give useful hints at the difficulty of reconstruction problems on a sample-by-sample basis. Furthermore, we propose a reconstruction method based on latent diffusion models that leverages the predicted degradation severities to fine-tune the reverse diffusion sampling trajectory and thus achieve sample-adaptive inference times. We perform numerical experiments on both linear and nonlinear inverse problems and demonstrate that our technique achieves performance comparable to state-of-the-art diffusion-based techniques, with significant improvements in computational efficiency.", "keywords": "inverse problems;diffusion models;image reconstruction;sample-adaptive reconstruction", "primary_area": "generative models", "supplementary_material": "/attachment/bc5b7304cc96bab3acf9bc103f43fdbd46aa5572.zip", "author": "Zalan Fabian;Berk Tinaz;Mahdi Soltanolkotabi", "authorids": "~Zalan_Fabian1;~Berk_Tinaz1;~Mahdi_Soltanolkotabi1", "gender": "M;M;M", "homepage": "https://z-fabian.github.io/;https://berktinaz.github.io/;http://www-bcf.usc.edu/~soltanol/", "dblp": "192/2874;275/8488;75/6691", "google_scholar": "5EKjsXQAAAAJ;gzIzOtAAAAAJ;narJyMAAAAAJ", "orcid": ";;", "linkedin": ";berk-tinaz/;", "or_profile": "~Zalan_Fabian1;~Berk_Tinaz1;~Mahdi_Soltanolkotabi1", "aff": "University of Southern California;Amazon;University of Southern California", "aff_domain": "usc.edu;amazon.com;usc.edu", "position": "Postdoc;Intern;Associate Professor", "bibtex": "@misc{\nfabian2024adapt,\ntitle={Adapt and Diffuse: Sample-adaptive Reconstruction via Latent Diffusion Models},\nauthor={Zalan Fabian and Berk Tinaz and Mahdi Soltanolkotabi},\nyear={2024},\nurl={https://openreview.net/forum?id=39HaKNXpsu}\n}", "github": "", "project": "", "reviewers": "VtJz;Gc37;fcte;Cd1z", "site": "https://openreview.net/forum?id=39HaKNXpsu", "pdf_size": 33178025, "rating": "5;5;6;8", "confidence": "3;4;2;3", "soundness": "2;2;3;3", "contribution": "2;2;2;3", "presentation": "3;2;3;4", "wc_summary": "69;106;151;57", "wc_strengths": "35;9;117;103", "wc_weaknesses": "31;349;120;191", "wc_questions": "26;3;143;84", "wc_review": "161;467;531;435", "wc_reply_reviewers": "24;171;10;13", "wc_reply_authors": "495;995;680;666", "reply_reviewers": "1;1;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 95.75, 36.656343243700675 ], "wc_strengths_avg": [ 66.0, 45.221676218380054 ], "wc_weaknesses_avg": [ 172.75, 116.48256307276209 ], "wc_questions_avg": [ 64.0, 54.32770932038272 ], "wc_review_avg": [ 398.5, 141.40986528527634 ], "wc_reply_reviewers_avg": [ 54.5, 67.46295279633111 ], "wc_reply_authors_avg": [ 709.0, 180.47298966881442 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.28867513459481287, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4530409226247603787&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Southern California;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com", "aff_unique_abbr": "USC;Amazon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "39VR7gVEJs", "title": "Analyzing Implicit Regularization In Federated Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Backward error analysis is a powerful technique that can check how much the path of the gradient flow is modified under the influence of a finite learning rate. Through this technique, it is also possible to find an implicit regularizer that affects the convergence behavior of an optimizer. With a backward error analysis, this paper seeks a more intuitive but quantitative way to understand the convergence behaviour under various federated learning algorithms. We prove that the implicit regularizer for FedAvg disperses the gradient of each client from the average gradient, increasing the gradient variance. We then theoretically present that the implicit regularizer of FedAvg hampers the convergence if the variance of gradients from clients decreases following the gradient of the cost function. In order to verify our analysis, we run experiments on FedAvg with and without the drifting term and confirm that FedAvg without the drifting term shows higher test accuracies. Our analysis also explains the convergence behavior of variance reduction methods such as SCAFFOLD, FedDyn, and FedSAM to show that the implicit regularizers of those methods have a smaller or zero drifting effect when the learning rate is small. Especially, we provide a possible reason FedSAM can perform better than FedAvg but might not perform as well as other stable variance reduction methods under data heterogeneity.", "keywords": "federated learning;implicit regularization;backward error analysis;optimization", "primary_area": "optimization", "supplementary_material": "/attachment/7b869b9fa8957c46f777cbba2f0b448dc6a58700.pdf", "author": "Jinwoo Lim;Sangyoon Yu;Suhyun Kim;Soo-Mook Moon", "authorids": "~Jinwoo_Lim1;~Sangyoon_Yu1;~Suhyun_Kim1;~Soo-Mook_Moon1", "gender": "M;M;;M", "homepage": "https://github.com/jinwoolim8180;;https://kdst.tistory.com/;https://altair.snu.ac.kr/smoon/", "dblp": "212/5596;350/3784;45/6898-1;37/4764", "google_scholar": ";RrZWQZUAAAAJ;;", "orcid": ";;;", "linkedin": ";sangyoon-yu-391501225/;;", "or_profile": "~Jinwoo_Lim1;~Sangyoon_Yu1;~Suhyun_Kim1;~Soo-Mook_Moon1", "aff": "Seoul National University;Seoul National University;Korea Institute of Science and Technology;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;kist.re.kr;snu.ac.kr", "position": "MS student;MS student;Principal Researcher;Full Professor", "bibtex": "@misc{\nlim2024analyzing,\ntitle={Analyzing Implicit Regularization In Federated Learning},\nauthor={Jinwoo Lim and Sangyoon Yu and Suhyun Kim and Soo-Mook Moon},\nyear={2024},\nurl={https://openreview.net/forum?id=39VR7gVEJs}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=39VR7gVEJs", "pdf_size": 698499, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:P1OjMWbWBfkJ:scholar.google.com/&scioq=Analyzing+Implicit+Regularization+In+Federated+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Seoul National University;Korea Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.kist.re.kr", "aff_unique_abbr": "SNU;KIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Training Unbiased Diffusion Models From Biased Dataset", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19525", "id": "39cPKijBed", "author_site": "Yeongmin Kim, Byeonghu Na, Minsang Park, JoonHo Jang, Dongjun Kim, Wanmo Kang, Il-chul Moon", "tldr": "", "abstract": "With significant advancements in diffusion models, addressing the potential risks of dataset bias becomes increasingly important. Since generated outputs directly suffer from dataset bias, mitigating latent bias becomes a key factor in improving sample quality and proportion. This paper proposes time-dependent importance reweighting to mitigate the bias for the diffusion models. We demonstrate that the time-dependent density ratio becomes more precise than previous approaches, thereby minimizing error propagation in generative learning. While directly applying it to score-matching is intractable, we discover that using the time-dependent density ratio both for reweighting and score correction can lead to a tractable form of the objective function to regenerate the unbiased data density. Furthermore, we theoretically establish a connection with traditional score-matching, and we demonstrate its convergence to an unbiased distribution. The experimental evidence supports the usefulness of the proposed method, which outperforms baselines including time-independent importance reweighting on CIFAR-10, CIFAR-100, FFHQ, and CelebA with various bias settings. Our code is available at https://github.com/alsdudrla10/TIW-DSM.", "keywords": "diffusion model;density ratio estimation;dataset bias", "primary_area": "generative models", "supplementary_material": "", "author": "Yeongmin Kim;Byeonghu Na;Minsang Park;JoonHo Jang;Dongjun Kim;Wanmo Kang;Il-chul Moon", "authorids": "~Yeongmin_Kim1;~Byeonghu_Na1;~Minsang_Park1;~JoonHo_Jang1;~Dongjun_Kim1;~Wanmo_Kang1;~Il-chul_Moon1", "gender": "M;M;M;;M;M;", "homepage": "https://sites.google.com/view/yeongmin-space/%ED%99%88;https://sites.google.com/view/byeonghu-na;;https://aailab.kaist.ac.kr/xe2/members_phdstudent/16877;https://sites.google.com/view/dongjun-kim?pli=1;https://sites.google.com/site/wanmokang/;", "dblp": ";276/5100;;241/9686;03/4394;;", "google_scholar": "SBF13JUAAAAJ;https://scholar.google.co.kr/citations?user=mJoqpmEAAAAJ;https://scholar.google.co.kr/citations?user=PhyT2gQAAAAJ;oYbKry4AAAAJ;https://scholar.google.com/citations?hl=ko;;", "orcid": ";0000-0003-3463-2674;;;;;", "linkedin": ";byeonghu-na-17942120b/;;;;;", "or_profile": "~Yeongmin_Kim1;~Byeonghu_Na1;~Minsang_Park1;~JoonHo_Jang1;~Dongjun_Kim1;~Wanmo_Kang1;~Il-chul_Moon1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Sony AI;;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;sony.com;;", "position": "PhD student;PhD student;MS student;PhD student;Intern;;", "bibtex": "@inproceedings{\nkim2024training,\ntitle={Training Unbiased Diffusion Models From Biased Dataset},\nauthor={Yeongmin Kim and Byeonghu Na and Minsang Park and JoonHo Jang and Dongjun Kim and Wanmo Kang and Il-chul Moon},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=39cPKijBed}\n}", "github": "", "project": "", "reviewers": "WcK7;EYEW;irEP;6Mt9", "pdf_size": 39868109, "rating": "5;5;6;8", "confidence": "3;4;4;4", "soundness": "3;2;3;3", "contribution": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "48;58;69;48", "wc_strengths": "93;66;62;134", "wc_weaknesses": "84;321;168;19", "wc_questions": "1;44;11;257", "wc_review": "226;489;310;458", "wc_reply_reviewers": "0;0;84;82", "wc_reply_authors": "604;1321;1434;583", "reply_reviewers": "0;0;1;1", "reply_authors": "4;7;6;4", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 55.75, 8.671072598012312 ], "wc_strengths_avg": [ 88.75, 28.71737279069936 ], "wc_weaknesses_avg": [ 148.0, 112.98893751159889 ], "wc_questions_avg": [ 78.25, 104.42072351789179 ], "wc_review_avg": [ 370.75, 107.51598718330219 ], "wc_reply_reviewers_avg": [ 41.5, 41.506023659223246 ], "wc_reply_authors_avg": [ 985.5, 394.10055823355543 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 5.25, 1.299038105676658 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10250792016981496700&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=39cPKijBed", "pdf": "https://openreview.net/pdf?id=39cPKijBed", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;sony.com;;", "author_num": 7, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Sony", "aff_unique_dep": ";Sony AI", "aff_unique_url": "https://www.kaist.ac.kr;https://www.sony.com", "aff_unique_abbr": "KAIST;Sony AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "South Korea;Japan" }, { "id": "3APJXDoSyS", "title": "The Closer, The Better: Towards Better Representation Learning for Few-Shot Class-Incremental Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Aiming to incrementally learn new classes with only few samples while preserving the knowledge of base (old) classes, few-shot class-incremental learning (FSCIL) faces several challenges, such as overfitting and catastrophic forgetting.\nTo bypass the issues, many works have employed a non-parametric classifier: representing each class with the average of features obtained with a fixed feature extractor trained on base classes.\nUnder such formulation, representation learning is critical to tackle the unique challenges of FSCIL: (1) the transferability of the learned representation to new knowledge, (2) the discriminability between all classes, regardless of old or new.\nRecent advances in representation learning, such as contrastive learning, have greatly improved the transferability, which is often attributed to the spread of intra-class features.\nHowever, we observe that solely improving the transferability can harm the discriminability of FSCIL models, as too much spread of features can degrade the quality of the feature-mean class representation. \nUpon the observation and further experimental analysis, we claim that not only we need to increase the intra-class distance, but we also need to decrease the inter-class distance.\nTrying to secure the spread of features and discriminability within a more confined space due to small inter-class distances, the learned representation strikes a good balance between the transferability and discriminability.\nThe strong performance, without any weight update while learning new classes, demonstrates the effective discriminability and transferability of our new representation, founded upon our seemingly counter-intuitive claim: the-Closer-the-Better (CnB).", "keywords": "few-shot learning;incremental learning;representation learing", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Junghun Oh;Sungyong Baik;Kyoung Mu Lee", "authorids": "~Junghun_Oh1;~Sungyong_Baik1;~Kyoung_Mu_Lee2", "gender": "M;M;M", "homepage": ";https://dsybaik-hy.github.io/;https://cv.snu.ac.kr/kmlee/", "dblp": "281/8729;243/2775;17/4029", "google_scholar": "fCFkL9EAAAAJ;lQ4gotkAAAAJ;Hofj9kAAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Junghun_Oh1;~Sungyong_Baik1;~Kyoung_Mu_Lee1", "aff": "Seoul National University;Hanyang University;Seoul National University", "aff_domain": "snu.ac.kr;hanyang.ac.kr;snu.ac.kr", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\noh2024the,\ntitle={The Closer, The Better: Towards Better Representation Learning for Few-Shot Class-Incremental Learning},\nauthor={Junghun Oh and Sungyong Baik and Kyoung Mu Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=3APJXDoSyS}\n}", "github": "", "project": "", "reviewers": "JXg4;fd7A;YAmj;h7Ap", "site": "https://openreview.net/forum?id=3APJXDoSyS", "pdf_size": 3460420, "rating": "3;5;5;5", "confidence": "5;4;4;2", "soundness": "3;3;3;2", "contribution": "2;3;2;2", "presentation": "3;2;3;3", "wc_summary": "71;100;153;52", "wc_strengths": "43;63;96;25", "wc_weaknesses": "160;83;200;377", "wc_questions": "70;46;114;4", "wc_review": "344;292;563;458", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.0, 38.11167800031901 ], "wc_strengths_avg": [ 56.75, 26.34743820563965 ], "wc_weaknesses_avg": [ 205.0, 107.84015949543101 ], "wc_questions_avg": [ 58.5, 39.80891859872609 ], "wc_review_avg": [ 414.25, 104.78638986051575 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7_vC3k1vi54J:scholar.google.com/&scioq=The+Closer,+The+Better:+Towards+Better+Representation+Learning+for+Few-Shot+Class-Incremental+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Seoul National University;Hanyang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.hanyang.ac.kr", "aff_unique_abbr": "SNU;HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "3ARfhjGfdF", "title": "Towards Control-Centric Representations in Reinforcement Learning from Images", "track": "main", "status": "Reject", "tldr": "", "abstract": "Image-based Reinforcement Learning is a practical yet challenging task. A major hurdle lies in extracting control-centric representations while disregarding irrelevant information. While approaches that follow the bisimulation principle exhibit the potential in learning state representations to address this issue, they still grapple with the limited expressive capacity of latent dynamics and the inadaptability to sparse reward environments. To address these limitations, we introduce Rebis, which aims to capture control-centric information by integrating reward-free control information alongside reward-specific knowledge. Rebis utilizes a transformer architecture to implicitly model the dynamics and incorporates block-wise masking to eliminate spatiotemporal redundancy. Moreover, Rebis combines bisimulation-based loss with asymmetric reconstruction loss to prevent feature collapse in environments with sparse rewards. Empirical studies on two large benchmarks, including Atari games and DeepMind Control Suit, demonstrate that Rebis has superior performance compared to existing methods, proving its effectiveness.", "keywords": "Bisimulation;Reinforcement Learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/d1147544da9ccd27c7350c8730e01c1b33d3a073.zip", "author": "Chen Liu;Hongyu Zang;Xin Li;Yong Heng;Yifei Wang;Zhen Fang;Yisen Wang;Mingzhong Wang", "authorids": "~Chen_Liu16;~Hongyu_Zang1;~Xin_Li31;~Yong_Heng1;~Yifei_Wang1;~Zhen_Fang5;~Yisen_Wang1;~Mingzhong_Wang2", "gender": "F;M;F;M;M;M;M;M", "homepage": "https://github.com/LC990512;https://zanghyu.github.io/;https://cs.bit.edu.cn/szdw/jsml/js/lixin/index.htm;;https://yifeiwang77.com;https://github.com/Lucien-Evans-123;https://yisenwang.github.io/;https://www.usc.edu.au/staff/dr-mingzhong-wang", "dblp": ";212/2592.html;09/1365-33.html;https://dblp.uni-trier.de/pid/333/0444;00/555-1;;172/1346-1;12/5272", "google_scholar": ";2kmSy50AAAAJ;https://scholar.google.com/citations?hl=zh-TW;;-CLy6YsAAAAJ;;uMWPDboAAAAJ;Jj30mIUAAAAJ", "orcid": ";;0000-0003-4257-4347;0000-0002-1721-9995;;;;0000-0002-6533-8104", "linkedin": ";;;;;;;", "or_profile": "~Chen_Liu16;~Hongyu_Zang1;~Xin_Li31;~Yong_Heng1;~Yifei_Wang1;~Zhen_Fang5;~Yisen_Wang1;~Mingzhong_Wang2", "aff": "Beijing Institute of Technology;Meituan;Beijing Institute of Technology;;Massachusetts Institute of Technology;Beijing Institute of Technology;Peking University;University of the Sunshine Coast", "aff_domain": "bit.edu.cn;meituan.com;bit.edu.cn;;mit.edu;bit.edu.cn;pku.edu.cn;usc.edu.au", "position": "MS student;Researcher;Associate Professor;;Postdoc;MS student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nliu2024towards,\ntitle={Towards Control-Centric Representations in Reinforcement Learning from Images},\nauthor={Chen Liu and Hongyu Zang and Xin Li and Yong Heng and Yifei Wang and Zhen Fang and Yisen Wang and Mingzhong Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=3ARfhjGfdF}\n}", "github": "", "project": "", "reviewers": "hpbt;sEtP;6U7G;TC9E", "site": "https://openreview.net/forum?id=3ARfhjGfdF", "pdf_size": 972714, "rating": "3;5;6;8", "confidence": "5;3;4;4", "soundness": "3;2;3;4", "contribution": "2;2;3;3", "presentation": "3;2;4;3", "wc_summary": "63;51;103;76", "wc_strengths": "66;34;98;69", "wc_weaknesses": "362;74;111;62", "wc_questions": "33;170;39;22", "wc_review": "524;329;351;229", "wc_reply_reviewers": "342;85;45;0", "wc_reply_authors": "2346;1894;980;500", "reply_reviewers": "1;1;1;0", "reply_authors": "5;3;2;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 73.25, 19.3180614969515 ], "wc_strengths_avg": [ 66.75, 22.664675157610354 ], "wc_weaknesses_avg": [ 152.25, 122.43850497290467 ], "wc_questions_avg": [ 66.0, 60.35312750802563 ], "wc_review_avg": [ 358.25, 106.16820380886172 ], "wc_reply_reviewers_avg": [ 118.0, 132.7761273723556 ], "wc_reply_authors_avg": [ 1430.0, 728.3117464382955 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.39223227027636803, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9xBPJ_cmW7gJ:scholar.google.com/&scioq=Towards+Control-Centric+Representations+in+Reinforcement+Learning+from+Images&hl=en&as_sdt=0,44", "gs_version_total": 7, "aff_unique_index": "0;1;0;2;0;3;4", "aff_unique_norm": "Beijing Institute of Technology;Meituan;Massachusetts Institute of Technology;Peking University;University of the Sunshine Coast", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.meituan.com;https://web.mit.edu;http://www.pku.edu.cn;https://www.usc.edu.au", "aff_unique_abbr": "BIT;Meituan;MIT;Peking U;USC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;2", "aff_country_unique": "China;United States;Australia" }, { "id": "3ARp595Ucc", "title": "Beyond Joint Demonstrations: Personalized Expert Guidance for Efficient Multi-Agent Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Multi-Agent Reinforcement Learning (MARL) algorithms face the challenge of efficient exploration due to the exponential increase in the size of the joint state-action space. While demonstration-guided learning has proven beneficial in single-agent settings, its direct applicability to MARL is hindered by the practical difficulty of obtaining joint expert demonstrations.\nIn this work, we introduce a novel concept of personalized expert demonstrations that an agent-specific expert provides. These demonstrations are tailored for an individual agent or, more broadly, for an individual type of agent in a heterogeneous team. It is crucial to emphasize that these demonstrations solely pertain to single-agent behaviors and do not encompass any cooperative elements. Consequently, it is essential to note that these demonstrations may not be inherently optimal when employed within a cooperative setting.\nTo bootstrap the learning from the personalized expert demonstrations, we reformulate the MARL problem in occupancy measure space and propose two innovative algorithms, namely expert-guided MARL (EG-MARL) and Generalized EG-MARL (GEG-MARL). These algorithms involve the acquisition of personalized reward signals through demonstrations to guide agent exploration and the fostering of collaborative behaviors through environmental reward feedbacks.\nOur proposed algorithms are evaluated across both discrete and continuous environments. The results underscore the capacity of our methods to learn near-optimal policies even when provided with suboptimal demonstrations, and they excel in solving coordinated tasks that challenge state-of-the-art MARL algorithms.", "keywords": "Multi-agent Reinforcement Learning;Learning from Demonstration;Actor-Critic", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/45330b371de6c68e13d663a4537e6d921738db5f.zip", "author": "Peihong Yu;Amrit Bedi;Alec Koppel;Carl Busart;Priya Narayan;Dinesh Manocha;Pratap Tokekar", "authorids": "~Peihong_Yu1;~Amrit_Bedi1;~Alec_Koppel1;carl.e.busart.civ@army.mil;~Priya_Narayan1;~Dinesh_Manocha3;~Pratap_Tokekar1", "gender": "F;M;M;;;M;M", "homepage": "https://peihongyu.com/;https://sites.google.com/view/amritsinghbedi/home;http://koppel.netlify.app/;;https://scholar.google.com/citations?user=Za8YrUsAAAAJ&hl=en;https://www.cs.umd.edu/people/dmanocha;https://tokekar.com/", "dblp": "201/8446;176/2707.html;149/0076;;;m/DineshManocha;", "google_scholar": "rzUHzRgAAAAJ;91WLA6QAAAAJ;8ClxyjIAAAAJ;;;X08l_4IAAAAJ;FKAovywAAAAJ", "orcid": ";;0000-0003-2447-2873;;;0000-0001-7047-9801;", "linkedin": ";;alec-koppel-9860b697/;;;dinesh-manocha-2311846;prataptokekar/", "or_profile": "~Peihong_Yu1;~Amrit_Bedi1;~Alec_Koppel1;carl.e.busart.civ@army.mil;~Priya_Narayan1;~Dinesh_Manocha3;~Pratap_Tokekar1", "aff": "University of Maryland, College Park;University of Maryland, College Park;J.P. Morgan Chase;;;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;jpmorgan.com;;;umd.edu;umd.edu", "position": "PhD student;Researcher;Research Team Lead;;;Professor;Associate Professor", "bibtex": "@misc{\nyu2024beyond,\ntitle={Beyond Joint Demonstrations: Personalized Expert Guidance for Efficient Multi-Agent Reinforcement Learning},\nauthor={Peihong Yu and Amrit Bedi and Alec Koppel and Carl Busart and Priya Narayan and Dinesh Manocha and Pratap Tokekar},\nyear={2024},\nurl={https://openreview.net/forum?id=3ARp595Ucc}\n}", "github": "", "project": "", "reviewers": "oHLh;C51y;eCAT;G5rt", "site": "https://openreview.net/forum?id=3ARp595Ucc", "pdf_size": 8786283, "rating": "3;3;3;6", "confidence": "3;4;4;3", "soundness": "2;1;1;3", "contribution": "2;2;1;3", "presentation": "3;2;2;3", "wc_summary": "71;63;93;89", "wc_strengths": "26;60;29;58", "wc_weaknesses": "265;51;210;134", "wc_questions": "71;113;58;54", "wc_review": "433;287;390;335", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "22;22;22;22", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.0, 12.409673645990857 ], "wc_strengths_avg": [ 43.25, 15.801503093060482 ], "wc_weaknesses_avg": [ 165.0, 80.59466483583141 ], "wc_questions_avg": [ 74.0, 23.37733945512192 ], "wc_review_avg": [ 361.25, 55.17415608779168 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 22.0, 0.0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2428337089863033995&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Maryland;JPMorgan Chase & Co.", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.jpmorganchase.com", "aff_unique_abbr": "UMD;JPM", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3BUhO4ISkJ", "title": "InstructEdit: Improving Automatic Masks for Diffusion-based Image Editing With User Instructions", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent works have explored text-guided image editing using diffusion models and generated edited images based on text prompts. However, the models struggle to accurately locate the regions to be edited and faithfully perform precise edits. In this work, we propose a framework termed InstructEdit that can do fine-grained editing based on user instructions. Our proposed framework has three components: language processor, segmenter, and image editor. The first component, the language processor, processes the user instruction using a large language model. The goal of this processing is to parse the user instruction and output prompts for the segmenter and captions for the image editor. We adopt ChatGPT and optionally BLIP2 for this step. The second component, the segmenter, uses the segmentation prompt provided by the language processor. We employ a state-of-the-art segmentation framework Grounded Segment Anything to automatically generate a high-quality mask based on the segmentation prompt. The third component, the image editor, uses the captions from the language processor and the masks from the segmenter to compute the edited image. We adopt Stable Diffusion and the mask-guided generation from DiffEdit for this purpose. What are the main achievements, outcomes, results? Experiments show that our method outperforms previous editing methods in fine-grained editing applications where the input image contains a complex object or multiple objects. We improve the mask quality over DiffEdit and thus improve the quality of edited images. We also show that our framework can accept multiple forms of user instructions as input.", "keywords": "Text-guided image editing;Language models;Diffusion models", "primary_area": "generative models", "supplementary_material": "/attachment/7862385af9e77b777f4cac4ee55b6cc22640d5eb.zip", "author": "Qian Wang;Biao Zhang;Michael Birsak;Peter Wonka", "authorids": "~Qian_Wang19;~Biao_Zhang5;~Michael_Birsak1;~Peter_Wonka1", "gender": "F;;M;M", "homepage": "https://qianwangx.github.io/;https://1zb.github.io;;http://peterwonka.net", "dblp": ";83/3266-5;147/4629;98/5522", "google_scholar": ";h5KukxEAAAAJ;GF0RLAwAAAAJ;https://scholar.google.com.tw/citations?user=0EKXSXgAAAAJ", "orcid": ";;;0000-0003-0627-9746", "linkedin": ";;michael-birsak;", "or_profile": "~Qian_Wang19;~Biao_Zhang5;~Michael_Birsak1;~Peter_Wonka1", "aff": "King Abdullah University of Science and Technology;KAUST;King Abdullah University of Science and Technology;KAUST", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "PhD student;Postdoc;Researcher;Full Professor", "bibtex": "@misc{\nwang2024instructedit,\ntitle={InstructEdit: Improving Automatic Masks for Diffusion-based Image Editing With User Instructions},\nauthor={Qian Wang and Biao Zhang and Michael Birsak and Peter Wonka},\nyear={2024},\nurl={https://openreview.net/forum?id=3BUhO4ISkJ}\n}", "github": "", "project": "", "reviewers": "YLK1;6zrz;pqYf;PLFP", "site": "https://openreview.net/forum?id=3BUhO4ISkJ", "pdf_size": 12686786, "rating": "3;3;3;6", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "contribution": "1;2;2;2", "presentation": "2;3;3;3", "wc_summary": "56;37;109;176", "wc_strengths": "63;27;150;72", "wc_weaknesses": "346;135;227;184", "wc_questions": "134;2;120;18", "wc_review": "599;201;606;450", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.5, 53.94673298727181 ], "wc_strengths_avg": [ 78.0, 44.84974916317816 ], "wc_weaknesses_avg": [ 223.0, 78.1184997295775 ], "wc_questions_avg": [ 68.5, 58.98092912119984 ], "wc_review_avg": [ 464.0, 164.12952202452794 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10130802581700303433&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kast.kau.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Saudi Arabia" }, { "id": "3CGpIeU212", "title": "Are training trajectories of deep single-spike and deep ReLU network equivalent?", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Communication by binary and sparse spikes is a key factor for the energy efficiency of biological brains. However, training deep spiking neural networks (SNNs) with backpropagation is harder than with artificial neural networks (ANNs), which is puzzling given that recent theoretical results provide exact mapping algorithms from ReLU to time-to-first-spike (TTFS) SNNs. Building upon these results, we analyze in theory and in simulation the learning dynamics of TTFS-SNNs. Our analysis highlights that even when an SNN can be mapped exactly to a ReLU network, it cannot always be robustly trained by gradient descent. The reason for that is the emergence of a specific instance of the vanishing-and-exploding gradient problem leading to a bias in the gradient descent trajectory in comparison with the equivalent ANN. After identifying this issue we derive a generic solution for the network initialization and SNN parameterization which guarantees that the SNN can be trained as robustly as its ANN counterpart. Our theoretical findings are illustrated in practice on image classification datasets. Our method achieves the same accuracy as deep ConvNets on CIFAR10 and enables fine-tuning on the much larger PLACES365 dataset without loss of accuracy compared to the ANN. We argue that the combined perspective of conversion and fine-tuning with robust gradient descent in SNN will be decisive to optimize SNNs for hardware implementations needing low latency and resilience to noise and quantization.", "keywords": "deep spiking neural network training;time-to-first-spike;vanishing gradient problem;ReLU network;network initialization", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Ana Stanojevic;Stanis\u0142aw Wo\u017aniak;Guillaume Bellec;Giovanni Cherubini;Angeliki Pantazi;Wulfram Gerstner", "authorids": "~Ana_Stanojevic1;~Stanis\u0142aw_Wo\u017aniak1;~Guillaume_Bellec1;~Giovanni_Cherubini2;~Angeliki_Pantazi1;~Wulfram_Gerstner1", "gender": "F;M;M;M;F;", "homepage": ";;https://guillaumebellec.github.io;https://researcher.watson.ibm.com/researcher/view.php?person=zurich-cbi;https://research.ibm.com/people/angeliki-pantazi;https://lcnwww.epfl.ch/gerstner/", "dblp": "262/5987.html;173/2997;;07/5927;83/1243;g/WGerstner", "google_scholar": "3DNfrZYAAAAJ;axP7kosAAAAJ;fSXUVvAAAAAJ;J6NdeI4AAAAJ;https://scholar.google.ch/citations?user=BEMNIoQAAAAJ;https://scholar.google.ch/citations?user=vSd2RnEAAAAJ", "orcid": ";;0000-0001-7568-4994;0000-0002-0573-2919;0000-0003-4720-4038;0000-0002-4344-2189", "linkedin": "ana--stanojevic/;;;giovanni-cherubini-a379b29/;;", "or_profile": "~Ana_Stanojevic1;~Stanis\u0142aw_Wo\u017aniak1;~Guillaume_Bellec1;~Giovanni_Cherubini2;~Angeliki_Pantazi1;~Wulfram_Gerstner1", "aff": ";IBM Research;Swiss Federal Institute of Technology Lausanne;International Business Machines;International Business Machines;EPFL - EPF Lausanne", "aff_domain": ";ibm.com;epfl.ch;ibm.com;ibm.com;epfl.ch", "position": ";Researcher;Postdoc;Researcher;Principal Researcher;Full Professor", "bibtex": "@misc{\nstanojevic2024are,\ntitle={Are training trajectories of deep single-spike and deep Re{LU} network equivalent?},\nauthor={Ana Stanojevic and Stanis{\\l}aw Wo{\\'z}niak and Guillaume Bellec and Giovanni Cherubini and Angeliki Pantazi and Wulfram Gerstner},\nyear={2024},\nurl={https://openreview.net/forum?id=3CGpIeU212}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=3CGpIeU212", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10709245391331482397&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "IBM;Swiss Federal Institute of Technology Lausanne;International Business Machines Corporation;EPFL", "aff_unique_dep": "IBM Research;;;", "aff_unique_url": "https://www.ibm.com/research;https://www.epfl.ch;https://www.ibm.com;https://www.epfl.ch", "aff_unique_abbr": "IBM;EPFL;IBM;EPFL", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "id": "3D0mOtnHGR", "title": "Preprocessing Enhanced Image Compression for Machine Vision", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recently, more and more images are compressed and sent to the back-end devices for machine analysis tasks (e.g. object detection) instead of being purely watched by humans. \nHowever, most traditional or learned image codecs are designed to minimize the distortion of the human visual system without considering the increased demand from machine vision systems. In this work, we propose a preprocessing enhanced image compression method for machine vision tasks to address this challenge. Instead of relying on the learned image codecs for end-to-end optimization, our framework is built upon the traditional non-differential codecs, which means it is standard compatible and can be easily deployed in practical applications. Specifically, we propose a neural preprocessing module before the encoder to maintain the useful semantic information for the downstream tasks and suppress the irrelevant information for bitrate saving. Furthermore, our neural preprocessing module is quantization adaptive and can be used in different compression ratios. \nMore importantly, to jointly optimize the preprocessing module with the downstream machine vision tasks, we introduce the proxy network for the traditional non-differential codecs in the back-propagation stage. We provide extensive experiments by evaluating our compression method for several representative downstream tasks with different backbone networks. Experimental results show our method achieves a better trade-off between the coding bitrate and the performance of the downstream machine vision tasks by saving about 20\\% bitrate.", "keywords": "image compression;machine vision;preprocessing", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Guo Lu;Xingtong Ge;Tianxiong Zhong;Jing Geng;Guosheng Yu", "authorids": "~Guo_Lu2;~Xingtong_Ge1;~Tianxiong_Zhong1;~Jing_Geng1;~Guosheng_Yu1", "gender": "M;M;M;F;M", "homepage": "https://guolusjtu.github.io/guoluhomepage/;https://xingtongge.github.io/;;;http://www.nohomepage.org", "dblp": "76/7805;;322/3730;;", "google_scholar": "R9iwlJcAAAAJ;Rf8UmtoAAAAJ;;;", "orcid": ";;;0000-0003-4076-6134;", "linkedin": ";xingtong-ge/;;;", "or_profile": "~Guo_Lu2;~Xingtong_Ge1;~Tianxiong_Zhong1;~Jing_Geng1;~Guosheng_Yu1", "aff": "Shanghai Jiaotong University;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;", "aff_domain": "sjtu.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;", "position": "Assistant Professor;MS student;MS student;Assistant Professor;", "bibtex": "@misc{\nlu2024preprocessing,\ntitle={Preprocessing Enhanced Image Compression for Machine Vision},\nauthor={Guo Lu and Xingtong Ge and Tianxiong Zhong and Jing Geng and Guosheng Yu},\nyear={2024},\nurl={https://openreview.net/forum?id=3D0mOtnHGR}\n}", "github": "", "project": "", "reviewers": "fHZ6;uQPP;maca", "site": "https://openreview.net/forum?id=3D0mOtnHGR", "pdf_size": 12594433, "rating": "3;3;5", "confidence": "5;5;5", "soundness": "2;2;3", "contribution": "1;2;3", "presentation": "2;2;3", "wc_summary": "55;32;41", "wc_strengths": "20;23;47", "wc_weaknesses": "178;200;174", "wc_questions": "5;3;4", "wc_review": "258;258;266", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 42.666666666666664, 9.46337971105226 ], "wc_strengths_avg": [ 30.0, 12.083045973594572 ], "wc_weaknesses_avg": [ 184.0, 11.430952132988164 ], "wc_questions_avg": [ 4.0, 0.816496580927726 ], "wc_review_avg": [ 260.6666666666667, 3.7712361663282534 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10381605748897511859&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Shanghai Jiao Tong University;Beijing Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "SJTU;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "3DPTnFokLp", "title": "Benchmarking Multimodal Variational Autoencoders: CdSprites+ Dataset and Toolkit", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "Multimodal Variational Autoencoders (VAEs) have been the subject of intense research in the past years as they can integrate multiple modalities into a joint representation and can thus serve as a promising tool for both data classification and generation. Several approaches toward multimodal VAE learning have been proposed so far, their comparison and evaluation have however been rather inconsistent. One reason is that the models differ at the implementation level, another problem is that the datasets commonly used in these cases were not initially designed to evaluate multimodal generative models. This paper addresses both mentioned issues. First, we propose a toolkit for systematic multimodal VAE training and comparison. The toolkit currently comprises 4 existing multimodal VAEs and 6 commonly used benchmark datasets along with instructions on how to easily add a new model or a dataset. Second, we present a disentangled bimodal dataset designed to comprehensively evaluate the joint generation and cross-generation capabilities across multiple difficulty levels. We demonstrate the utility of our dataset by comparing the implemented state-of-the-art models.", "keywords": "variational autoencoders;multimodal generative models;multimodal datasets;benchmark toolkits", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/bc058164973cba4ee50609720a7f7173ada9dca6.zip", "author": "Gabriela Sejnova;Michal Vavrecka;Karla Stepanova", "authorids": "~Gabriela_Sejnova1;~Michal_Vavrecka1;~Karla_Stepanova1", "gender": "F;;F", "homepage": ";;http://karlastepanova.cz", "dblp": ";;", "google_scholar": ";AhgkbbIAAAAJ;I-PURrEAAAAJ", "orcid": "0000-0002-0045-6425;;0000-0003-4239-2092", "linkedin": ";;", "or_profile": "~Gabriela_Sejnova1;~Michal_Vavrecka1;~Karla_Stepanova1", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@misc{\nsejnova2024benchmarking,\ntitle={Benchmarking Multimodal Variational Autoencoders: CdSprites+ Dataset and Toolkit},\nauthor={Gabriela Sejnova and Michal Vavrecka and Karla Stepanova},\nyear={2024},\nurl={https://openreview.net/forum?id=3DPTnFokLp}\n}", "github": "", "project": "", "reviewers": "8MxM;naTF;EZ4B;2Pvg", "site": "https://openreview.net/forum?id=3DPTnFokLp", "pdf_size": 4416460, "rating": "3;5;5;6", "confidence": "5;4;3;3", "soundness": "3;3;3;3", "contribution": "1;2;2;3", "presentation": "3;2;2;3", "wc_summary": "46;98;112;32", "wc_strengths": "37;50;60;31", "wc_weaknesses": "681;327;204;23", "wc_questions": "58;145;78;3", "wc_review": "822;620;454;89", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "399;448;796;174", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.0, 33.734255586866 ], "wc_strengths_avg": [ 44.5, 11.280514172678478 ], "wc_weaknesses_avg": [ 308.75, 240.5871723513122 ], "wc_questions_avg": [ 71.0, 50.788778288121875 ], "wc_review_avg": [ 496.25, 268.82371082179486 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 454.25, 222.72446542757712 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.899228803025897, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15045849436249823363&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3 }, { "id": "3DdCB6MRvu", "title": "3D Tissue Reconstruction and Generation for Single-Cell Spatial Transcriptomics using Neural Radiance Fields", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Single-cell spatial transcriptomics (scST) is a groundbreaking technique that allows for the exploration of gene expression patterns, cell-cell interactions, and tissue organization at the single-cell level. Traditional approaches in scST reconstruction mainly focus on assigning two-dimensional (2D) coordinates to individual cells within a pre-established region. This often requires a large amount of 2D slice data, such as ssDNAs images, which escalates both costs and the complexity involved in studying and reconstructing the tissue's three-dimensional (3D) organization. Here, we introduce a novel method for scST reconstruction, which is a Neural Radiance Fields (NeRF)-based 3D-aware generative model termed STscan, that aims to reconstruct a 3D scST scene using a minimal amount from 2D images (fewer than 10). Additionally, STscan can identify cell types and their expression levels within this 3D environment. To the best of our knowledge, STscan is the first NeRF-based method specifically designed for single-cell ST reconstruction, and it is the first end-to-end solution capable of directly reconstructing in vitro cell-cell environments from ssDNA images. This approach has the potential to significantly reduce both the complexity and cost associated with scST studies.", "keywords": "Single-Cell Spatial Transcriptomics;Neural Radiance Fields;3D reconstruction", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Yantong Cai;Xiaoxue Chen;Lin Li;Guangchuang Yu;Hao Zhao", "authorids": "~Yantong_Cai1;~Xiaoxue_Chen1;~Lin_Li22;~Guangchuang_Yu1;~Hao_Zhao1", "gender": "F;F;;M;M", "homepage": "https://github.com/yangtcai;https://air.tsinghua.edu.cn/;;https://yulab-smu.top/;https://sites.google.com/view/fromandto", "dblp": "369/6081;;;;08/3737-2.html", "google_scholar": ";;;https://scholar.google.com/citations?hl=zh-CN;ygQznUQAAAAJ", "orcid": "0000-0003-2137-4979;;0000-0002-6522-1889;0000-0002-6485-8781;", "linkedin": ";;;;", "or_profile": "~Yantong_Cai1;~Xiaoxue_Chen1;~Lin_Li22;~Guangchuang_Yu1;~Hao_Zhao1", "aff": "Dermatology Hospital, Southern Medical University;Tsinghua University;Southern Methodist University, Southern Methodist University;Southern Medical University;Peking University", "aff_domain": "smu.edu;tsinghua.edu.cn;smu.edu;smu.edu.cn;pku.edu.cn", "position": "Postdoc;PhD student;MS student;Full Professor;Postdoc", "bibtex": "@misc{\ncai2024d,\ntitle={3D Tissue Reconstruction and Generation for Single-Cell Spatial Transcriptomics using Neural Radiance Fields},\nauthor={Yantong Cai and Xiaoxue Chen and Lin Li and Guangchuang Yu and Hao Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=3DdCB6MRvu}\n}", "github": "", "project": "", "reviewers": "2SGX;Vcft;TxGH", "site": "https://openreview.net/forum?id=3DdCB6MRvu", "pdf_size": 4712870, "rating": "3;3;5", "confidence": "5;3;4", "soundness": "2;2;3", "contribution": "2;2;2", "presentation": "2;2;2", "wc_summary": "71;127;73", "wc_strengths": "31;21;60", "wc_weaknesses": "188;362;144", "wc_questions": "72;5;124", "wc_review": "362;515;401", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 90.33333333333333, 25.94010194445829 ], "wc_strengths_avg": [ 37.333333333333336, 16.539514973407037 ], "wc_weaknesses_avg": [ 231.33333333333334, 94.12521211426593 ], "wc_questions_avg": [ 67.0, 48.71002634639676 ], "wc_review_avg": [ 426.0, 64.91532946846992 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:v5UVPhgjLnYJ:scholar.google.com/&scioq=3D+Tissue+Reconstruction+and+Generation+for+Single-Cell+Spatial+Transcriptomics+using+Neural+Radiance+Fields&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Southern Medical University;Tsinghua University;Southern Methodist University;Peking University", "aff_unique_dep": "Dermatology Hospital;;;", "aff_unique_url": "http://www.fsmu.edu.cn;https://www.tsinghua.edu.cn;https://www.smu.edu;http://www.pku.edu.cn", "aff_unique_abbr": ";THU;SMU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Chain of Thought Empowers Transformers to Solve Inherently Serial Problems", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19524", "id": "3EWTEy9MTM", "author_site": "Zhiyuan Li, Hong Liu, Denny Zhou, Tengyu Ma", "tldr": "", "abstract": "Generating a sequence of intermediate steps, \\emph{a.k.a.}, a chain of thought (CoT), is a highly effective method to improve the accuracy of large language models (LLMs) on arithmetics and symbolic reasoning tasks. However, the mechanism behind CoT remains unclear. \nThis work provides a theoretical understanding of the power of CoT for decoder-only transformers through the lens of expressiveness. Conceptually, CoT empowers the model with the ability to perform inherently serial computation, which is otherwise lacking in transformers, especially when depth is low. Given input length $n$, previous works have constant-depth transformers with finite precision $\\mathsf{poly}(n)$ embedding size can only solve problems in $\\mathsf{TC}^0$ without CoT. We first show an even tighter expressiveness upper bound for constant-depth transformers with constant-bit precision, which can only solve problems in $\\mathsf{AC}^0$, a proper subset of $ \\mathsf{TC}^0$. However, with $T$ steps of CoT, constant-depth transformers using constant-bit precision and $O(\\log n)$ embedding size can solve any problem solvable by boolean circuits of size $T$. Empirically, enabling CoT dramatically improves the accuracy for tasks that are hard for parallel computation, including the composition of permutation groups, iterated squaring, and circuit value problems, especially for low-depth transformers.", "keywords": "Chain of thought;language modeling;circuit complexity;deep learning theory", "primary_area": "learning theory", "supplementary_material": "", "author": "Zhiyuan Li;Hong Liu;Denny Zhou;Tengyu Ma", "authorids": "~Zhiyuan_Li2;~Hong_Liu5;~Denny_Zhou1;~Tengyu_Ma1", "gender": "M;M;M;", "homepage": "https://zhiyuanli.ttic.edu;;http://ai.stanford.edu/~tengyuma/;https://dennyzhou.github.io/", "dblp": "l/ZhiyuanLi;;54/9061;178/3277", "google_scholar": "https://scholar.google.com/citations?hl=en;BUc2uq0AAAAJ;i38QlUwAAAAJ;UwLsYw8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zhiyuan_Li2;~Hong_Liu5;~Tengyu_Ma1;~Dengyong_Zhou2", "aff": "Toyota Technological Institute at Chicago;Stanford University;Facebook AI Research;Google DeepMind", "aff_domain": "ttic.edu;stanford.edu;fb.com;google.com", "position": "Assistant Professor;PhD student;Visiting Scientist;Research Scientist", "bibtex": "@inproceedings{\nli2024chain,\ntitle={Chain of Thought Empowers Transformers to Solve Inherently Serial Problems},\nauthor={Zhiyuan Li and Hong Liu and Denny Zhou and Tengyu Ma},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3EWTEy9MTM}\n}", "github": "", "project": "", "reviewers": "pRpF;qiA8;zX88;woBt;Aro9;wS3v", "pdf_size": 4402629, "rating": "3;5;6;8;8;8", "confidence": "4;4;4;5;2;2", "soundness": "3;3;4;4;3;3", "contribution": "4;3;2;3;3;3", "presentation": "2;3;3;3;4;3", "wc_summary": "41;163;97;109;137;38", "wc_strengths": "253;38;124;137;68;45", "wc_weaknesses": "317;107;712;434;28;296", "wc_questions": "173;115;111;124;1;3", "wc_review": "784;423;1044;804;234;382", "wc_reply_reviewers": "0;0;0;125;0;0", "wc_reply_authors": "615;497;239;749;24;161", "reply_reviewers": "0;0;0;1;0;0", "reply_authors": "1;1;1;2;1;1", "rating_avg": [ 6.333333333333333, 1.8856180831641267 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 97.5, 46.03531253288067 ], "wc_strengths_avg": [ 110.83333333333333, 73.64649044968509 ], "wc_weaknesses_avg": [ 315.6666666666667, 222.7724299718337 ], "wc_questions_avg": [ 87.83333333333333, 63.99587660327986 ], "wc_review_avg": [ 611.8333333333334, 284.1879757406276 ], "wc_reply_reviewers_avg": [ 20.833333333333332, 46.58474953124561 ], "wc_reply_authors_avg": [ 380.8333333333333, 258.08552630647245 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3952847075210475, "gs_citation": 109, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11728270641767049962&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3EWTEy9MTM", "pdf": "https://openreview.net/pdf?id=3EWTEy9MTM", "email": "ttic.edu;stanford.edu;fb.com;google.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Toyota Technological Institute at Chicago;Stanford University;Meta;Google", "aff_unique_dep": ";;Facebook AI Research;Google DeepMind", "aff_unique_url": "https://www.tti-chicago.org;https://www.stanford.edu;https://research.facebook.com;https://deepmind.com", "aff_unique_abbr": "TTI Chicago;Stanford;FAIR;DeepMind", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Chicago;Stanford;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Self-Supervised Heterogeneous Graph Learning: a Homophily and Heterogeneity View", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19523", "id": "3FJOKjooIj", "author_site": "YUJIE MO, Feiping Nie, Ping Hu, Heng Tao Shen, Zheng Zhang, Xinchao Wang, Xiaofeng Zhu", "tldr": "", "abstract": "Self-supervised heterogeneous graph learning has achieved promising results in various real applications, but it still suffers from the following issues: (i) meta-paths can be employed to capture the homophily in the heterogeneous graph, but meta-paths are human-defined, requiring substantial expert knowledge and computational costs; and (ii) the heterogeneity in the heterogeneous graph is usually underutilized, leading to the loss of task-related information. To solve these issues, this paper proposes to capture both homophily and heterogeneity in the heterogeneous graph without pre-defined meta-paths. Specifically, we propose to learn a self-expressive matrix to capture the homophily from the subspace and nearby neighbors. Meanwhile, we propose to capture the heterogeneity by aggregating the information of nodes from different types. We further design a consistency loss and a specificity loss, respectively, to extract the consistent information between homophily and heterogeneity and to preserve their specific task-related information. We theoretically analyze that the learned homophilous representations exhibit the grouping effect to capture the homophily, and considering both homophily and heterogeneity introduces more task-related information. Extensive experimental results verify the superiority of the proposed method on different downstream tasks.", "keywords": "Graph representation learning;Heterogeneous graph;Self-supervised learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Yujie Mo;Feiping Nie;Ping Hu;Heng Tao Shen;Zheng Zhang;Xinchao Wang;Xiaofeng Zhu", "authorids": "~Yujie_Mo1;~Feiping_Nie2;~Ping_Hu3;~Heng_Tao_Shen3;~Zheng_Zhang7;~Xinchao_Wang1;~Xiaofeng_Zhu7", "gender": ";M;M;M;M;M;M", "homepage": "https://yujiemo.github.io/;https://dblp.org/pid/80/5755.html;http://feinanshan.github.io;;https://sites.google.com/site/seanzhuxf/;https://cfm.uestc.edu.cn/~shenht/;https://sites.google.com/site/sitexinchaowang/", "dblp": "282/0552;;53/5490-1;181/2621-6.html;60/4671-1;s/HTShen;", "google_scholar": "MH4nZY0AAAAJ;;ddrD2TgAAAAJ;tpVOb2EAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=krryaDkAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": "0000-0001-7784-6221;;;0000-0003-1470-6998;0000-0001-6840-0578;;", "linkedin": ";;;;;;", "or_profile": "~Yujie_Mo1;~Feiping_Nie2;~Ping_Hu3;~Zheng_Zhang7;~Xiaofeng_Zhu7;~Hengtao_Shen1;~Xinchao_WANG3", "aff": "University of Electronic Science and Technology of China;Northwest Polytechnical University Xi'an;University of Electronic Science and Technology of China;Harbin Institute of Technology;University of Electronic Science and Technology of China;Tongji University;National University of Singapore", "aff_domain": "uestc.edu.cn;nwpu.edu.cn;uestc.edu.cn;hit.edu.cn;uestc.edu.cn;tongji.edu.cn;nus.edu", "position": "PhD student;Full Professor;Full Professor;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmo2024selfsupervised,\ntitle={Self-Supervised Heterogeneous Graph Learning: a Homophily and Heterogeneity View},\nauthor={Yujie Mo and Feiping Nie and Ping Hu and Heng Tao Shen and Zheng Zhang and Xinchao Wang and Xiaofeng Zhu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3FJOKjooIj}\n}", "github": "", "project": "", "reviewers": "BScY;h7xr;6ShR;rAzi;EpeM;Zp3d", "pdf_size": 7690475, "rating": "6;6;6;8;8;8", "confidence": "4;3;4;4;5;3", "soundness": "3;3;2;3;4;3", "contribution": "3;4;2;4;3;3", "presentation": "3;3;3;3;3;4", "wc_summary": "81;59;58;86;75;93", "wc_strengths": "99;46;28;81;139;126", "wc_weaknesses": "73;88;227;75;91;237", "wc_questions": "89;51;187;58;2;6", "wc_review": "342;244;500;300;307;462", "wc_reply_reviewers": "0;0;164;0;0;0", "wc_reply_authors": "354;331;1712;377;213;973", "reply_reviewers": "0;0;2;0;0;0", "reply_authors": "1;1;4;1;1;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "contribution_avg": [ 3.1666666666666665, 0.6871842709362768 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 75.33333333333333, 13.072447700751718 ], "wc_strengths_avg": [ 86.5, 39.928060308509856 ], "wc_weaknesses_avg": [ 131.83333333333334, 71.17681426482145 ], "wc_questions_avg": [ 65.5, 62.12017922275069 ], "wc_review_avg": [ 359.1666666666667, 91.45931092871602 ], "wc_reply_reviewers_avg": [ 27.333333333333332, 61.11919138499424 ], "wc_reply_authors_avg": [ 660.0, 530.1647542666965 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.74535599249993 ], "reply_authors_avg": [ 1.6666666666666667, 1.1055415967851332 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.24253562503633294, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5048017698044699648&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=3FJOKjooIj", "pdf": "https://openreview.net/pdf?id=3FJOKjooIj", "email": "uestc.edu.cn;nwpu.edu.cn;uestc.edu.cn;hit.edu.cn;uestc.edu.cn;tongji.edu.cn;nus.edu", "author_num": 7, "aff_unique_index": "0;1;0;2;0;3;4", "aff_unique_norm": "University of Electronic Science and Technology of China;Northwest Polytechnical University;Harbin Institute of Technology;Tongji University;National University of Singapore", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.uestc.edu.cn;http://www.nwpu.edu.cn;http://www.hit.edu.cn/;https://www.tongji.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "UESTC;NWPU;HIT;Tongji;NUS", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Xi'an;Harbin", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "id": "3GDKJSQnW2", "title": "Pivotal Prompt Tuning for Video Dynamic Editing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Text-conditioned image editing has recently provided high-quality edits on images based on diffusion frameworks. Unfortunately, this success did not carry over to video editing, which continues to be challenging. Video editing is limited to rigid editing such as object overlay and style transfer. This paper proposes pivotal dynamic editing (PDEdit) for performing spatial-temporal non-rigid video editing based only on the target text, which has never been attempted before. PDEdit is capable of changing the motion of an object/person in the video, either at a specific moment or throughout the video, while preserving the temporal consistency of edited motions and a high level of fidelity to the original input video. In contrast to previous works, the proposed method performs editing based only on the input video and target text. It does not require any other auxiliary inputs (e.g., object masks or source video captions). Based on the video diffusion model, PDEdit using the proposed prompt pivoting leverages the target text prompt for editing the input video. The quality and adaptability of the proposed method on numerous input videos from different domains show the proposed to be highly effective. It can produce high-fidelity video edits under a single unified PDEdit framework. The code for this work will be made publicly available.", "keywords": "Video Editing;Multi-modal video generation;Prompt Analysis;Diffusion model", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/8c5f85ae029cba91c9ab3b50f72e63a6c0436ca0.zip", "author": "Sunjae Yoon;GwanHyeong Koo;Ji Woo Hong;Joshua Tian Jin Tee;Chang D. Yoo", "authorids": "~Sunjae_Yoon1;~GwanHyeong_Koo1;~Ji_Woo_Hong1;~Joshua_Tian_Jin_Tee1;~Chang_D._Yoo1", "gender": "M;M;M;M;M", "homepage": "https://dbstjswo505.github.io/;https://kookie12.github.io/;https://slsp.kaist.ac.kr/xe/;;https://sanctusfactory.com/family.php", "dblp": "273/3911;358/7119;312/8033;;31/7819", "google_scholar": "2A2lRoUAAAAJ;https://scholar.google.co.kr/citations?user=qDCTLZgAAAAJ;;x90yFvMAAAAJ;gFWgUQEAAAAJ", "orcid": "0000-0001-7458-5273;;0000-0002-3758-0307;;0000-0002-0756-7179", "linkedin": "sunjae-yoon-133294333/;;;;", "or_profile": "~Sunjae_Yoon1;~GwanHyeong_Koo1;~Ji_Woo_Hong1;~Joshua_Tian_Jin_Tee1;~Chang_D._Yoo1", "aff": "Korea Advanced Institute of Science and Technology (KAIST);Korea Advanced Institute of Science and Technology (KAIST);Korea Advanced Institute of Science & Technology;KAIST, Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;PhD student;PhD student;Full Professor", "bibtex": "@misc{\nyoon2024pivotal,\ntitle={Pivotal Prompt Tuning for Video Dynamic Editing},\nauthor={Sunjae Yoon and GwanHyeong Koo and Ji Woo Hong and Joshua Tian Jin Tee and Chang D. Yoo},\nyear={2024},\nurl={https://openreview.net/forum?id=3GDKJSQnW2}\n}", "github": "", "project": "", "reviewers": "q1QA;9TJK;UHtg;1N5p", "site": "https://openreview.net/forum?id=3GDKJSQnW2", "pdf_size": 11715902, "rating": "5;5;5;5", "confidence": "4;4;4;3", "soundness": "3;3;2;1", "contribution": "3;3;2;2", "presentation": "3;3;2;2", "wc_summary": "54;47;46;45", "wc_strengths": "18;27;50;28", "wc_weaknesses": "131;151;87;90", "wc_questions": "39;5;85;16", "wc_review": "242;230;268;179", "wc_reply_reviewers": "148;0;0;51", "wc_reply_authors": "619;613;368;355", "reply_reviewers": "2;0;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 48.0, 3.5355339059327378 ], "wc_strengths_avg": [ 30.75, 11.776565713313877 ], "wc_weaknesses_avg": [ 114.75, 27.206387117733954 ], "wc_questions_avg": [ 36.25, 30.70321644388418 ], "wc_review_avg": [ 229.75, 32.36027657483786 ], "wc_reply_reviewers_avg": [ 49.75, 60.42505688867823 ], "wc_reply_authors_avg": [ 488.75, 127.35064781931814 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LFWtCGLN1ZoJ:scholar.google.com/&scioq=Pivotal+Prompt+Tuning+for+Video+Dynamic+Editing&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "3GunDQNKFJ", "title": "Learning-Retrieval-Revision For Large Language Model Domain Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "While large language models (LLMs) like GPT-4 have recently demonstrated astonishing zero-shot capabilities in general domain tasks, they often generate content with hallucinations in specific domains such as Chinese law, hindering their application in these areas. This is typically due to the absence of training data that encompasses such a specific domain, preventing GPT-4 from acquiring in-domain knowledge. A pressing challenge is that it\u2019s not plausible to continue training LLMs of such scale on in-domain data.\n\nThis paper introduces a simple and effective domain adaptation framework for GPT-4 by reformulating generation as an adapt-retrieve-revise process. The initial step is to adapt an affordable 7B LLM to the target domain by continuing learning on public in-domain data. When solving a task, we leverage the adapted LLM to generate a draft answer given a task query. Then, the draft answers will be used to retrieve supporting evidence candidates from an external in-domain knowledge base. Finally, the draft answer and retrieved evidence are concatenated into a whole prompt to let GPT-4 assess the evidence and revise the draft answer to generate the final answer.\n\nOur proposal combines the advantages of the efficiency of adapting a smaller 7B model with the evidence-assessing capability of GPT-4 and effectively prevents GPT-4 from generating hallucinatory content. In the zero-shot setting of four Chinese legal tasks, our method improves accuracy by 33.3% compared to the direct generation by GPT-4. When compared to two stronger retrieval-based baselines, our method outperforms them by 15.4% and 23.9%. Our code will be released.", "keywords": "large language models;domain adaptation;retrieval-based generation", "primary_area": "generative models", "supplementary_material": "", "author": "Zhen Wan;Yating Zhang;Yexiang Wang;Fei Cheng;Sadao Kurohashi", "authorids": "~Zhen_Wan1;~Yating_Zhang1;~Yexiang_Wang1;~Fei_Cheng2;~Sadao_Kurohashi1", "gender": "M;F;;M;M", "homepage": ";;;https://researchmap.jp/chengfei?lang=en;https://nlp.ist.i.kyoto-u.ac.jp/member/kuro/index.html", "dblp": ";29/5889;;06/5591-2.html;42/2149", "google_scholar": "OH_1qwMAAAAJ;;;https://scholar.google.com/citations?hl=en;https://scholar.google.co.jp/citations?user=gpKS5P0AAAAJ", "orcid": ";;;;0000-0001-5398-8399", "linkedin": "zhen-wan-8531251ab/;;;;", "or_profile": "~Zhen_Wan1;~Yating_Zhang1;~Yexiang_Wang1;~Fei_Cheng2;~Sadao_Kurohashi1", "aff": "Kyoto University;;;Kyoto University;Kyoto University", "aff_domain": "kyoto-u.ac.jp;;;kyoto-u.ac.jp;kyoto-u.ac.jp", "position": "PhD student;;;Associate Professor;Full Professor", "bibtex": "@misc{\nwan2024learningretrievalrevision,\ntitle={Learning-Retrieval-Revision For Large Language Model Domain Adaptation},\nauthor={Zhen Wan and Yating Zhang and Yexiang Wang and Fei Cheng and Sadao Kurohashi},\nyear={2024},\nurl={https://openreview.net/forum?id=3GunDQNKFJ}\n}", "github": "", "project": "", "reviewers": "kf7S;CEqV;TieX;n3Gy", "site": "https://openreview.net/forum?id=3GunDQNKFJ", "pdf_size": 756366, "rating": "3;5;5;6", "confidence": "3;4;4;4", "soundness": "2;3;2;3", "contribution": "2;2;3;3", "presentation": "2;2;3;2", "wc_summary": "102;126;83;92", "wc_strengths": "63;101;38;72", "wc_weaknesses": "650;106;167;56", "wc_questions": "21;77;3;40", "wc_review": "836;410;291;260", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "802;709;649;583", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.75, 16.052647756678645 ], "wc_strengths_avg": [ 68.5, 22.522211259110417 ], "wc_weaknesses_avg": [ 244.75, 237.25026343504868 ], "wc_questions_avg": [ 35.25, 27.42603689926782 ], "wc_review_avg": [ 449.25, 230.20357838226582 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 685.75, 80.56480310905005 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6C9WMJbxT4oJ:scholar.google.com/&scioq=Learning-Retrieval-Revision+For+Large+Language+Model+Domain+Adaptation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Kyoto University", "aff_unique_dep": "", "aff_unique_url": "https://www.kyoto-u.ac.jp", "aff_unique_abbr": "Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "On Harmonizing Implicit Subpopulations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19522", "id": "3GurO0kRue", "author_site": "Feng Hong, Jiangchao Yao, YUEMING LYU, Zhihan Zhou, Ivor Tsang, Ya Zhang, Yanfeng Wang", "tldr": "", "abstract": "Machine learning algorithms learned from data with skewed distributions usually suffer from poor generalization, especially when minority classes matter as much as, or even more than majority ones. This is more challenging on class-balanced data that has some hidden imbalanced subpopulations, since prevalent techniques mainly conduct class-level calibration and cannot perform subpopulation-level adjustments without subpopulation annotations. Regarding implicit subpopulation imbalance, we reveal that the key to alleviating the detrimental effect lies in effective subpopulation discovery with proper rebalancing. We then propose a novel subpopulation-imbalanced learning method called Scatter and HarmonizE (SHE). Our method is built upon the guiding principle of optimal data partition, which involves assigning data to subpopulations in a manner that maximizes the predictive information from inputs to labels. With theoretical guarantees and empirical evidences, SHE succeeds in identifying the hidden subpopulations and encourages subpopulation-balanced predictions. Extensive experiments on various benchmark datasets show the effectiveness of SHE.", "keywords": "imbalanced learning;subpopulation imbalance", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Feng Hong;Jiangchao Yao;Yueming Lyu;Zhihan Zhou;Ivor Tsang;Ya Zhang;Yanfeng Wang", "authorids": "~Feng_Hong1;~Jiangchao_Yao1;~Yueming_Lyu1;~Zhihan_Zhou2;~Ivor_Tsang1;~Ya_Zhang1;~Yanfeng_Wang1", "gender": "M;M;M;;F;M;M", "homepage": ";https://sunarker.github.io/;https://yueminglyu.github.io/;;https://annzhanglion.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/;https://www.a-star.edu.sg/cfar/about-cfar/management/prof-ivor-tsang", "dblp": "68/1260-4;166/5900;;226/5688-2;85/3714-2;55/5407-1.html;35/5873", "google_scholar": "DCTAaNQAAAAJ;w8oDh9QAAAAJ;uQXB6-oAAAAJ;;pbjw9sMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;rJMOlVsAAAAJ", "orcid": ";;;0000-0002-9475-465X;0000-0002-5390-9053;0000-0002-3196-2347;", "linkedin": ";;;;;;", "or_profile": "~Feng_Hong1;~Jiangchao_Yao1;~Yueming_Lyu1;~Zhihan_Zhou2;~Ya_Zhang1;~Yanfeng_Wang1;~Ivor_W_Tsang1", "aff": "Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Agency for Science, Technology and Research (A*STAR);Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;A*STAR", "aff_domain": "sjtu.edu.cn;pjlab.org.cn;astar.edu.sg;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;cfar.a-star.edu.sg", "position": "PhD student;Researcher;Researcher;PhD student;Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nhong2024on,\ntitle={On Harmonizing Implicit Subpopulations},\nauthor={Feng Hong and Jiangchao Yao and Yueming Lyu and Zhihan Zhou and Ivor Tsang and Ya Zhang and Yanfeng Wang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3GurO0kRue}\n}", "github": "", "project": "", "reviewers": "Dvju;FihB;acbx;TvvJ", "pdf_size": 4553670, "rating": "6;6;6;8", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "94;287;87;58", "wc_strengths": "74;185;112;21", "wc_weaknesses": "163;368;115;55", "wc_questions": "189;90;40;213", "wc_review": "520;930;354;347", "wc_reply_reviewers": "499;37;0;0", "wc_reply_authors": "2000;1373;1366;1666", "reply_reviewers": "2;1;0;0", "reply_authors": "6;4;3;4", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 131.5, 90.78683825313006 ], "wc_strengths_avg": [ 98.0, 59.728552636071804 ], "wc_weaknesses_avg": [ 175.25, 117.67832213283805 ], "wc_questions_avg": [ 133.0, 70.77075667251269 ], "wc_review_avg": [ 537.75, 236.81466909801006 ], "wc_reply_reviewers_avg": [ 134.0, 211.2735194007995 ], "wc_reply_authors_avg": [ 1601.25, 260.11283609233897 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.25, 1.0897247358851685 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10533268120873124048&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=3GurO0kRue", "pdf": "https://openreview.net/pdf?id=3GurO0kRue", "email": "sjtu.edu.cn;pjlab.org.cn;astar.edu.sg;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;cfar.a-star.edu.sg", "author_num": 7, "aff_unique_index": "0;1;2;0;0;0;2", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory;Agency for Science, Technology and Research", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.shailab.org/;https://www.a-star.edu.sg", "aff_unique_abbr": "SJTU;Shanghai AI Lab;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "id": "3IyC5lQTSi", "title": "Fairness Through Matching for better group fairness", "track": "main", "status": "Reject", "tldr": "", "abstract": "Group unfairness, which refers to socially unacceptable bias favoring certain groups (e.g., white, male), is frequently observed ethical concern in AI.\nVarious algorithms have been developed to mitigate such group unfairness in trained models.\nHowever, a significant limitation of existing algorithms for group fairness is that trained group-fair models can discriminate against specific subsets or not be fair for individuals in the same sensitive group.\nThe primary goal of this research is to develop a method to find a good group-fair model in the sense that it discriminates less against subsets and treats individuals in the same sensitive group more fairly.\nFor this purpose, we introduce a new measure of group fairness called Matched Demographic Parity (MDP). \nAn interesting feature of MDP is that it corresponds a matching function (a function matching two individuals from two different sensitive groups) to each group-fair model. \nThen, we propose a learning algorithm to seek a group-fair model whose corresponding matching function matches similar individuals well.\nTheoretical justifications are fully provided, and experiments are conducted to illustrate the superiority of the proposed algorithm.", "keywords": "Fairness;Matched demographic parity", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/3b874a33fda93b1c3ee3758183d9f2dcb0e149ae.zip", "author": "Kunwoong Kim;Insung Kong;Jongjin Lee;Minwoo Chae;Yongdai Kim", "authorids": "~Kunwoong_Kim1;~Insung_Kong1;~Jongjin_Lee1;~Minwoo_Chae1;~Yongdai_Kim1", "gender": "M;M;M;M;M", "homepage": "https://sites.google.com/view/insungkong/home;;https://sds.postech.ac.kr/;;", "dblp": ";;185/1370;93/734;296/1715", "google_scholar": "NYdp2FQAAAAJ;https://scholar.google.com/citations?view_op=list_works;B3P2AqUAAAAJ;;", "orcid": ";;0000-0002-6495-9558;;", "linkedin": ";;;;", "or_profile": "~Insung_Kong1;~Jongjin_Lee1;~Minwoo_Chae1;~Yongdai_Kim1;~Kun_woong_Kim1", "aff": "Seoul National University;Seoul National University;POSTECH;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;postech.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Associate Professor;Full Professor;PhD student", "bibtex": "@misc{\nkim2024fairness,\ntitle={Fairness Through Matching for better group fairness},\nauthor={Kunwoong Kim and Insung Kong and Jongjin Lee and Minwoo Chae and Yongdai Kim},\nyear={2024},\nurl={https://openreview.net/forum?id=3IyC5lQTSi}\n}", "github": "", "project": "", "reviewers": "diD4;9FFn;ZKn5", "site": "https://openreview.net/forum?id=3IyC5lQTSi", "pdf_size": 5719711, "rating": "5;6;6", "confidence": "4;3;2", "soundness": "3;4;3", "contribution": "2;3;3", "presentation": "2;3;2", "wc_summary": "46;26;59", "wc_strengths": "35;25;59", "wc_weaknesses": "231;168;300", "wc_questions": "8;77;9", "wc_review": "320;296;427", "wc_reply_reviewers": "0;14;236", "wc_reply_authors": "1181;679;865", "reply_reviewers": "0;1;1", "reply_authors": "3;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 43.666666666666664, 13.572848714334887 ], "wc_strengths_avg": [ 39.666666666666664, 14.2672897060218 ], "wc_weaknesses_avg": [ 233.0, 53.907327887774215 ], "wc_questions_avg": [ 31.333333333333332, 32.293790252754306 ], "wc_review_avg": [ 347.6666666666667, 56.94636853117931 ], "wc_reply_reviewers_avg": [ 83.33333333333333, 108.10283170306975 ], "wc_reply_authors_avg": [ 908.3333333333334, 207.218617782176 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:maQ1k8I0q_MJ:scholar.google.com/&scioq=Fairness+Through+Matching+for+better+group+fairness&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Seoul National University;Pohang University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.postech.ac.kr", "aff_unique_abbr": "SNU;POSTECH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "3J7foqnJkA", "title": "Understanding Parameter Saliency via Extreme Value Theory", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep neural networks are being increasingly implemented throughout society in recent years. It is useful to identify which parameters trigger misclassification in diagnosing undesirable model behaviors.\nThe concept of parameter saliency is proposed and used to diagnose convolutional neural networks (CNNs) by ranking \nconvolution filters that may have caused misclassification on the basis of parameter saliency.\nIt is also shown that fine-tuning the top ranking salient filters efficiently corrects misidentification on ImageNet.\nHowever, there is still a knowledge gap in terms of understanding why parameter saliency ranking can find the filters inducing misidentification.\nIn this work, we attempt to bridge the gap by analyzing parameter saliency ranking from a statistical viewpoint, namely, extreme value theory.\nWe first show that the existing work implicitly assumes that the gradient norm computed for each filter follows a normal distribution.\nThen, we clarify the relationship between parameter saliency and the score based on the peaks-over-threshold (POT) method, which is often used to model extreme values.\nFinally, we reformulate parameter saliency in terms of the POT method, where this reformulation is regarded as statistical anomaly detection and does not require the implicit assumptions of the existing formulation of parameter saliency.\nOur experimental results demonstrate that our reformulation can detect malicious filters as well.\nFurthermore, we show that the existing parameter saliency method exhibits a bias against the depth of layers in deep neural networks.\nIn particular, this bias has the potential to inhibit the discovery of filters that cause misidentification in situations where domain shift occurs.\nIn contrast, parameter saliency based on POT shows less of this bias.", "keywords": "parameter saliency;extreme value theory;XAI", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Shuo Wang;Issei Sato", "authorids": "~Shuo_Wang30;~Issei_Sato2", "gender": "M;", "homepage": "https://www.ml.is.s.u-tokyo.ac.jp/members-en;https://www.ml.is.s.u-tokyo.ac.jp/issei-sato-en", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Shuo_Wang30;~Issei_Sato2", "aff": "The University of Tokyo;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "MS student;Full Professor", "bibtex": "@misc{\nwang2024understanding,\ntitle={Understanding Parameter Saliency via Extreme Value Theory},\nauthor={Shuo Wang and Issei Sato},\nyear={2024},\nurl={https://openreview.net/forum?id=3J7foqnJkA}\n}", "github": "", "project": "", "reviewers": "nAUC;HPzx;8wd7", "site": "https://openreview.net/forum?id=3J7foqnJkA", "pdf_size": 4673009, "rating": "5;6;6", "confidence": "4;3;2", "soundness": "3;3;2", "contribution": "2;3;3", "presentation": "3;2;3", "wc_summary": "61;44;67", "wc_strengths": "85;29;68", "wc_weaknesses": "127;481;163", "wc_questions": "50;32;53", "wc_review": "323;586;351", "wc_reply_reviewers": "0;50;114", "wc_reply_authors": "756;499;1395", "reply_reviewers": "0;1;2", "reply_authors": "2;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 57.333333333333336, 9.741092797468305 ], "wc_strengths_avg": [ 60.666666666666664, 23.442601296689656 ], "wc_weaknesses_avg": [ 257.0, 159.0723106011854 ], "wc_questions_avg": [ 45.0, 9.273618495495704 ], "wc_review_avg": [ 420.0, 117.93501035174697 ], "wc_reply_reviewers_avg": [ 54.666666666666664, 46.65714188512718 ], "wc_reply_authors_avg": [ 883.3333333333334, 376.70884719575616 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Dq8yVgWrnjQJ:scholar.google.com/&scioq=Understanding+Parameter+Saliency+via+Extreme+Value+Theory&hl=en&as_sdt=0,44", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "The Marginal Value of Momentum for Small Learning Rate SGD", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19521", "id": "3JjJezzVkT", "author_site": "Runzhe Wang, Sadhika Malladi, Tianhao Wang, Kaifeng Lyu, Zhiyuan Li", "tldr": "", "abstract": "Momentum is known to accelerate the convergence of gradient descent in strongly convex settings without stochastic gradient noise. In stochastic optimization, such as training neural networks, folklore suggests that momentum may help deep learning optimization by reducing the variance of the stochastic gradient update, but previous theoretical analyses do not find momentum to offer any provable acceleration. Theoretical results in this paper clarify the role of momentum in stochastic settings where the learning rate is small and gradient noise is the dominant source of instability, suggesting that SGD with and without momentum behave similarly in the short and long time horizons. Experiments show that momentum indeed has limited benefits for both optimization and generalization in practical training regimes where the optimal learning rate is not very large, including small- to medium-batch training from scratch on ImageNet and fine-tuning language models on downstream tasks.", "keywords": "momentum;SGD;dynamics", "primary_area": "optimization", "supplementary_material": "", "author": "Runzhe Wang;Sadhika Malladi;Tianhao Wang;Kaifeng Lyu;Zhiyuan Li", "authorids": "~Runzhe_Wang2;~Sadhika_Malladi2;~Tianhao_Wang1;~Kaifeng_Lyu2;~Zhiyuan_Li2", "gender": ";F;M;M;M", "homepage": "https://wang-runzhe.github.io;https://www.cs.princeton.edu/~smalladi/;https://tianhaowang.ttic.edu;https://kaifeng.ac/;https://zhiyuanli.ttic.edu", "dblp": ";176/9810;145/3288-2;220/3283;l/ZhiyuanLi", "google_scholar": ";9HCmTcwAAAAJ;m45LD1kAAAAJ;843JJtgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Runzhe_Wang2;~Sadhika_Malladi2;~Tianhao_Wang1;~Kaifeng_Lyu2;~Zhiyuan_Li2", "aff": "Princeton University;Princeton University;Yale University;Princeton University;Toyota Technological Institute at Chicago", "aff_domain": "princeton.edu;princeton.edu;yale.edu;princeton.edu;ttic.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2024the,\ntitle={The Marginal Value of Momentum for Small Learning Rate {SGD}},\nauthor={Runzhe Wang and Sadhika Malladi and Tianhao Wang and Kaifeng Lyu and Zhiyuan Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3JjJezzVkT}\n}", "github": "", "project": "", "reviewers": "EHVB;ReS6;fSFX;RZuq", "pdf_size": 997611, "rating": "5;5;6;6", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "contribution": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "89;31;104;92", "wc_strengths": "174;38;98;44", "wc_weaknesses": "411;98;27;85", "wc_questions": "2;32;143;65", "wc_review": "676;199;372;286", "wc_reply_reviewers": "139;0;0;0", "wc_reply_authors": "764;586;269;463", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.0, 28.275431031197385 ], "wc_strengths_avg": [ 88.5, 54.61455849862745 ], "wc_weaknesses_avg": [ 155.25, 150.05728072972667 ], "wc_questions_avg": [ 60.5, 52.58564442887431 ], "wc_review_avg": [ 383.25, 179.74617520270075 ], "wc_reply_reviewers_avg": [ 34.75, 60.188765563018485 ], "wc_reply_authors_avg": [ 520.5, 180.37530318754838 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11356245156123228102&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3JjJezzVkT", "pdf": "https://openreview.net/pdf?id=3JjJezzVkT", "email": "princeton.edu;princeton.edu;yale.edu;princeton.edu;ttic.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Princeton University;Yale University;Toyota Technological Institute at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.yale.edu;https://www.tti-chicago.org", "aff_unique_abbr": "Princeton;Yale;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3Jl0sjmZx9", "title": "Large Multimodal Model for Real-World Radiology Report Generation", "track": "main", "status": "Reject", "tldr": "", "abstract": "While automatic report generation has demonstrated promising results using deep learning-based methods, deploying these algorithms in real-world scenarios remains challenging. Compared to conventional report generation, real-world report generation requires model to follow the instruction from the radiologists and consider contextual information. Thus, this paper focuses on developing a practical report generation method that supports real-world clinical practice. To tackle the challenges posed by the limited availability of clinical data, we propose a GPT-based unified data generation pipeline designed to produce high-quality data. Consequently, we present a new benchmark dataset MIMIC-R3G, comprising five representative tasks pertinent to real-world medical report generation. We propose Domain-enhanced Multi-modal Model (DeMMo), where an additional medical domain vision encoder is incorporated into the general domain multimodal LLM to enhance its ability on specific domains. This approach aims to harness the specialized capabilities of the medical domain vision encoder while leveraging the robustness and versatility of the general domain multi-modal LLM. Comprehensive experiments demonstrate that our approach attains competitive performance across all real-world tasks compared to existing interactive report generation frameworks and state-of-the-art encoder-decoder style report generation models.", "keywords": "Report Generation;Large Vision Language Model", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Brian Nlong Zhao;XINYANG JIANG;Xufang Luo;Yifan Yang;Bo Li;Zilong Wang;Javier Alvarez-Valle;Matthew P. Lungren;Dongsheng Li;Lili Qiu", "authorids": "~Brian_Nlong_Zhao1;~XINYANG_JIANG2;~Xufang_Luo1;~Yifan_Yang9;~Bo_Li23;~Zilong_Wang8;~Javier_Alvarez-Valle1;~Matthew_P._Lungren1;~Dongsheng_Li2;~Lili_Qiu1", "gender": "M;M;F;M;M;M;M;;M;F", "homepage": ";;;https://www.microsoft.com/en-us/research/people/yifanyang/;https://www.brianboli.com/;;;;http://recmind.cn;https://www.cs.utexas.edu/~lili/", "dblp": "205/7046.html;155/6316;218/7350;83/89-4;50/3402-80;42/898-6;281/7037;;254/0830-2.html;", "google_scholar": "IhqFMeUAAAAJ;JiTfWVMAAAAJ;;;1_zc1-IAAAAJ;;https://scholar.google.co.uk/citations?user=ojoRDc4AAAAJ;z1UtMSYAAAAJ;VNg5rA8AAAAJ;https://scholar.google.com.tw/citations?user=16posrQAAAAJ", "orcid": ";;;;;0000-0002-6760-1471;0000-0003-0906-4177;;0000-0003-3103-8442;", "linkedin": ";xinyang-jiang-ab5416b0/;;yifyang/;brianbo1121/;;javieralvarezvalle/;;;", "or_profile": "~Brian_Nlong_Zhao1;~XINYANG_JIANG2;~Xufang_Luo1;~Yifan_Yang9;~Bo_Li23;~Zilong_Wang8;~Javier_Alvarez-Valle1;~Matthew_P._Lungren1;~Dongsheng_Li2;~Lili_Qiu1", "aff": "Microsoft Research Asia;Microsoft;Microsoft Research;Microsoft;Nanyang Technological University;Microsoft Research;Microsoft;Microsoft;Microsoft Research Asia;University of Texas at Austin", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;ntu.edu.sg;microsoft.com;microsoft.com;microsoft.com;microsoft.com;cs.utexas.edu", "position": "Intern;Senior Researcher;Researcher;Researcher;PhD student;Researcher;Senior Director of Biomedical Imaging;Principal Researcher;Principal Researcher;Full Professor", "bibtex": "@misc{\nzhao2024large,\ntitle={Large Multimodal Model for Real-World Radiology Report Generation},\nauthor={Brian Nlong Zhao and XINYANG JIANG and Xufang Luo and Yifan Yang and Bo Li and Zilong Wang and Javier Alvarez-Valle and Matthew P. Lungren and Dongsheng Li and Lili Qiu},\nyear={2024},\nurl={https://openreview.net/forum?id=3Jl0sjmZx9}\n}", "github": "", "project": "", "reviewers": "watP;8G3W;GGyY;NXpC;uSN9", "site": "https://openreview.net/forum?id=3Jl0sjmZx9", "pdf_size": 22146813, "rating": "3;5;5;5;8", "confidence": "5;4;4;4;2", "soundness": "1;2;3;2;3", "contribution": "1;2;2;2;3", "presentation": "1;3;3;3;4", "wc_summary": "50;41;72;85;50", "wc_strengths": "21;36;89;50;60", "wc_weaknesses": "86;97;177;313;31", "wc_questions": "19;115;162;36;44", "wc_review": "176;289;500;484;185", "wc_reply_reviewers": "0;0;0;254;0", "wc_reply_authors": "620;680;845;1646;173", "reply_reviewers": "0;0;0;1;0", "reply_authors": "1;1;2;3;1", "rating_avg": [ 5.2, 1.6 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "contribution_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 59.6, 16.304600577751057 ], "wc_strengths_avg": [ 51.2, 23.025203582161875 ], "wc_weaknesses_avg": [ 140.8, 97.91915032311096 ], "wc_questions_avg": [ 75.2, 54.38161454021019 ], "wc_review_avg": [ 326.8, 140.703091650468 ], "wc_reply_reviewers_avg": [ 50.8, 101.6 ], "wc_reply_authors_avg": [ 792.8, 481.1259294613002 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.9951052080056662, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2585879695080037135&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;0;0;0;0;2", "aff_unique_norm": "Microsoft;Nanyang Technological University;University of Texas at Austin", "aff_unique_dep": "Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/asia;https://www.ntu.edu.sg;https://www.utexas.edu", "aff_unique_abbr": "MSR Asia;NTU;UT Austin", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Asia;;Austin", "aff_country_unique_index": "0;1;1;1;2;1;1;1;0;1", "aff_country_unique": "China;United States;Singapore" }, { "id": "3JoQqW35GQ", "title": "Training-free Linear Image Inversion via Flows", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Training-free linear inversion involves the use of a pretrained generative model and---through appropriate modifications to the generation process---solving inverse problems without any finetuning of the generative model. \nWhile recent prior methods have explored the use of diffusion models, they still require the manual tuning of many hyperparameters for different inverse problems. \nIn this work, we propose a training-free method for image inversion using pretrained flow models, leveraging the simplicity and efficiency of Flow Matching models, using theoretically-justified weighting schemes and thereby significantly reducing the amount of manual tuning.\nIn particular, we draw inspiration from two main sources: adopting prior gradient correction methods to the flow regime, and a solver scheme based on conditional Optimal Transport paths.\nAs pretrained diffusion models are widely accessible, we also show how to practically adapt diffusion models for our method.\nEmpirically, our approach requires no problem-specific tuning across an extensive suite of noisy linear image inversion problems on high-dimensional datasets, ImageNet-64/128 and AFHQ-256, and we observe that our flow-based method for image inversion significantly improves upon closely-related diffusion-based linear inversion methods.", "keywords": "inverse problems;linear image inversion;continuous normalizing flows;flow matching;diffusion models", "primary_area": "generative models", "supplementary_material": "", "author": "Ashwini Pokle;Matthew J. Muckley;Ricky T. Q. Chen;Brian Karrer", "authorids": "~Ashwini_Pokle1;~Matthew_J._Muckley1;~Ricky_T._Q._Chen1;~Brian_Karrer1", "gender": "F;M;M;M", "homepage": "https://ashwinipokle.github.io/;https://mmuckley.github.io/;;http://www.rtqichen.com", "dblp": "228/5527;158/8226;27/7164;228/6698", "google_scholar": "o_1YtVoAAAAJ;Iz9v6dcAAAAJ;Wewcpo4AAAAJ;7MxQd6UAAAAJ", "orcid": ";0000-0002-6525-8817;;", "linkedin": ";matthew-muckley-33a9b558/;;", "or_profile": "~Ashwini_Pokle1;~Matthew_J._Muckley1;~Brian_Karrer1;~Tian_Qi_Chen2", "aff": "Carnegie Mellon University;Meta;Meta Fundamental AI Research (FAIR);FAIR Labs, Meta AI", "aff_domain": "andrew.cmu.edu;fb.com;meta.com;meta.com", "position": "PhD student;Research Engineer;Researcher;Researcher", "bibtex": "@misc{\npokle2024trainingfree,\ntitle={Training-free Linear Image Inversion via Flows},\nauthor={Ashwini Pokle and Matthew J. Muckley and Ricky T. Q. Chen and Brian Karrer},\nyear={2024},\nurl={https://openreview.net/forum?id=3JoQqW35GQ}\n}", "github": "", "project": "", "reviewers": "pako;tnhX;STfT;4dCg", "site": "https://openreview.net/forum?id=3JoQqW35GQ", "pdf_size": 46014074, "rating": "6;8;8;8", "confidence": "5;4;3;3", "soundness": "3;3;4;3", "contribution": "2;3;4;3", "presentation": "3;3;4;2", "wc_summary": "62;63;46;48", "wc_strengths": "50;26;59;74", "wc_weaknesses": "174;63;63;82", "wc_questions": "161;2;15;26", "wc_review": "447;154;183;230", "wc_reply_reviewers": "24;0;0;70", "wc_reply_authors": "1694;108;260;266", "reply_reviewers": "1;0;0;1", "reply_authors": "3;1;1;3", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 54.75, 7.790218225441442 ], "wc_strengths_avg": [ 52.25, 17.41228014936585 ], "wc_weaknesses_avg": [ 95.5, 45.98097432634502 ], "wc_questions_avg": [ 51.0, 64.07417576528005 ], "wc_review_avg": [ 253.5, 114.96195022702076 ], "wc_reply_reviewers_avg": [ 23.5, 28.578838324886476 ], "wc_reply_authors_avg": [ 582.0, 645.127894296937 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13056289113879813403&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Carnegie Mellon University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "CMU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "3K3aWRpRNq", "title": "Reducing Atomic Clashes in Geometric Diffusion Models for 3D Structure-Based Drug Design", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the domain of Three-dimensional Structure-Based Drug Design (3D SBDD), the 3D spatial structures of target pockets serve as inputs for the generation of molecular geometric graphs. The Geometric Diffusion Model (GDM) has been recognized as the state-of-the-art (SOTA) method in 3D SBDD, attributed to its exceptional generation capabilities on geometric graphs. However, the inherent data-driven nature of GDM occasionally neglects critical inter-molecular interactions, such as Van der Waals force and Hydrogen Bonding. Such omissions could produce molecules that violate established physical principles. Particular evidence is that GDMs exhibit atomic clashes during generation due to the overly close proximity of generated molecules to protein structures. To address this, our paper introduces a novel constrained sampling process designed to obviate such undesirable collisions. By integrating a non-convex constraint within the current Langevin Dynamics (LD) of GDM and utilizing the proximal regularization techniques, we force molecular coordinates to obey the imposed physical constraints. Notably, the proposed method requires no modifications to the training process of GDMs. Empirical evaluations show a significant reduction in atomic clashes via the proposed method compared to the original LD process of GDMs.", "keywords": "Structure Based Drug Design;Geometric Molecular Generation;Diffusion Models", "primary_area": "generative models", "supplementary_material": "", "author": "Jian Ma;Peilin Zhao;Tingyang Xu;Qifeng Bai", "authorids": "~Jian_Ma8;~Peilin_Zhao2;~Tingyang_Xu1;~Qifeng_Bai1", "gender": "M;;M;M", "homepage": "https://github.com/small-jian-house;;;https://molaical.github.io", "dblp": ";84/8411;157/0940;", "google_scholar": ";https://scholar.google.com.hk/citations?user=HPeX_YcAAAAJ;6gIs5YMAAAAJ;https://scholar.google.com.hk/citations?user=7w7Vwh0AAAAJ", "orcid": ";0000-0001-8543-3953;0009-0002-0106-8376;0000-0002-8498-0936", "linkedin": ";;;", "or_profile": "~Jian_Ma8;~Peilin_Zhao2;~Tingyang_Xu1;~Qifeng_Bai1", "aff": "Lanzhou University;Tencent;Tencent AI Lab;Lanzhou University", "aff_domain": "lzu.edu.cn;tencent.com;tencent.com;lzu.edu.cn", "position": "MS student;Researcher;Researcher;Full Professor", "bibtex": "@misc{\nma2024reducing,\ntitle={Reducing Atomic Clashes in Geometric Diffusion Models for 3D Structure-Based Drug Design},\nauthor={Jian Ma and Peilin Zhao and Tingyang Xu and Qifeng Bai},\nyear={2024},\nurl={https://openreview.net/forum?id=3K3aWRpRNq}\n}", "github": "", "project": "", "reviewers": "2tn2;7c1E;xcbL;F9wF", "site": "https://openreview.net/forum?id=3K3aWRpRNq", "pdf_size": 10540917, "rating": "3;3;3;6", "confidence": "3;4;3;4", "soundness": "2;2;2;2", "contribution": "2;1;2;2", "presentation": "2;2;1;2", "wc_summary": "77;107;68;95", "wc_strengths": "10;33;73;132", "wc_weaknesses": "145;71;235;61", "wc_questions": "1;27;42;125", "wc_review": "233;238;418;413", "wc_reply_reviewers": "49;0;77;20", "wc_reply_authors": "641;329;772;443", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.75, 15.20485119953497 ], "wc_strengths_avg": [ 62.0, 46.27634384866635 ], "wc_weaknesses_avg": [ 128.0, 69.77822009767804 ], "wc_questions_avg": [ 48.75, 46.40245144386232 ], "wc_review_avg": [ 325.5, 90.03471552684553 ], "wc_reply_reviewers_avg": [ 36.5, 29.159046623646667 ], "wc_reply_authors_avg": [ 546.25, 171.60911252028546 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5797390712744065041&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Lanzhou University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.lzu.edu.cn;https://www.tencent.com", "aff_unique_abbr": "LZU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "On Representation Complexity of Model-based and Model-free Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19520", "id": "3K3s9qxSn7", "author_site": "Hanlin Zhu, Baihe Huang, Stuart Russell", "tldr": "", "abstract": "We study the representation complexity of model-based and model-free reinforcement learning (RL) in the context of circuit complexity. We prove theoretically that there exists a broad class of MDPs such that their underlying transition and reward functions can be represented by constant depth circuits with polynomial size, while the optimal $Q$-function suffers an exponential circuit complexity in constant-depth circuits. By drawing attention to the approximation errors and building connections to complexity theory, our theory provides unique insights into why model-based algorithms usually enjoy better sample complexity than model-free algorithms from a novel representation complexity perspective: in some cases, the ground-truth rule (model) of the environment is simple to represent, while other quantities, such as $Q$-function, appear complex. We empirically corroborate our theory by comparing the approximation error of the transition kernel, reward function, and optimal $Q$-function in various Mujoco environments, which demonstrates that the approximation errors of the transition kernel and reward function are consistently lower than those of the optimal $Q$-function. To the best of our knowledge, this work is the first to study the circuit complexity of RL, which also provides a rigorous framework for future research.", "keywords": "model-based and model-free RL;representation complexity;circuit complexity;approximation error", "primary_area": "learning theory", "supplementary_material": "/attachment/636c407cfdbdbea5f6bf0356c85553601435ca7d.pdf", "author": "Hanlin Zhu;Baihe Huang;Stuart Russell", "authorids": "~Hanlin_Zhu2;~Baihe_Huang1;~Stuart_Russell1", "gender": "M;;M", "homepage": "https://hanlinzhu.com/;;https://people.eecs.berkeley.edu/~russell/", "dblp": ";279/4131;", "google_scholar": "yDVn5LEAAAAJ;chICXXMAAAAJ;https://scholar.google.com.tw/citations?user=KJGrjCAAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hanlin_Zhu2;~Baihe_Huang1;~Stuart_Russell1", "aff": "Electrical Engineering & Computer Science Department, University of California Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "eecs.berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhu2024on,\ntitle={On Representation Complexity of Model-based and Model-free Reinforcement Learning},\nauthor={Hanlin Zhu and Baihe Huang and Stuart Russell},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3K3s9qxSn7}\n}", "github": "", "project": "", "reviewers": "kBCH;zTQ7;9wTj", "pdf_size": 6585554, "rating": "5;6;8", "confidence": "2;4;2", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "101;86;77", "wc_strengths": "77;42;34", "wc_weaknesses": "34;111;46", "wc_questions": "14;97;33", "wc_review": "226;336;190", "wc_reply_reviewers": "13;143;24", "wc_reply_authors": "456;959;424", "reply_reviewers": "1;2;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.0, 9.899494936611665 ], "wc_strengths_avg": [ 51.0, 18.672618098881223 ], "wc_weaknesses_avg": [ 63.666666666666664, 33.82635395992631 ], "wc_questions_avg": [ 48.0, 35.505868059613285 ], "wc_review_avg": [ 250.66666666666666, 62.10385566845982 ], "wc_reply_reviewers_avg": [ 60.0, 58.86141916966211 ], "wc_reply_authors_avg": [ 613.0, 245.00748287892486 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.18898223650461365, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7439446087400089778&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=3K3s9qxSn7", "pdf": "https://openreview.net/pdf?id=3K3s9qxSn7", "email": "eecs.berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Electrical Engineering & Computer Science Department", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Reasonableness Behind Unreasonable Translation Capability of Large Language Model", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19519", "id": "3KDbIWT26J", "author_site": "Tingchen Fu, lemao liu, Deng Cai, Guoping Huang, Shuming Shi, Rui Yan", "tldr": "", "abstract": "Multilingual large language models trained on non-parallel data yield impressive translation capabilities. Existing studies demonstrate that incidental sentence-level bilingualism within pre-training data contributes to the LLM's translation abilities. However, it has also been observed that LLM's translation capabilities persist even when incidental sentence-level bilingualism are excluded from the training corpus.\nIn this study, we comprehensively investigate the unreasonable effectiveness and the underlying mechanism for LLM's translation abilities, specifically addressing the question why large language models learn to translate without parallel data, using the BLOOM model series as a representative example. Through extensive experiments, our findings suggest the existence of unintentional bilingualism in the pre-training corpus, especially word alignment data significantly contributes to the large language model's acquisition of translation ability. Moreover, the translation signal derived from word alignment data is comparable to that from sentence-level bilingualism. Additionally, we study the effects of monolingual data and parameter-sharing in assisting large language model to learn to translate. Together, these findings present another piece of the broader puzzle of trying to understand how large language models acquire translation capability.", "keywords": "large language model;translation;unintentional bilingualism", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Tingchen Fu;Lemao Liu;Deng Cai;Guoping Huang;Shuming Shi;Rui Yan", "authorids": "~Tingchen_Fu1;~Lemao_Liu3;~Deng_Cai1;~Guoping_Huang2;~Shuming_Shi1;~Rui_Yan2", "gender": "M;M;M;M;M;M", "homepage": ";https://jcyk.github.io/;;;https://gsai.ruc.edu.cn/english/ruiyan;https://lemaoliu.github.io/homepage/", "dblp": "318/0986;c/DCai-2;165/3047;s/ShumingShi;19/2405-1;41/10887.html", "google_scholar": ";KpbRLYcAAAAJ;xSkkA7UAAAAJ;Lg31AKMAAAAJ;eLw6g-UAAAAJ;", "orcid": ";;;;0000-0002-3356-6823;", "linkedin": "%E5%BB%B7%E7%90%9B-%E4%BB%98-b00435181/;;guoping-huang-473708b9/;;;", "or_profile": "~Tingchen_Fu1;~Deng_Cai1;~Guoping_Huang2;~Shuming_Shi1;~Rui_Yan2;~lemao_liu1", "aff": "Renmin University of China;Tencent AI Lab;;Tencent AI Lab;Renmin University of China;Tencent", "aff_domain": "ruc.edu.cn;tencent.com;;tencent.com;ruc.edu.cn;tencent.com", "position": "MS student;Research Scientist;;Principal Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nfu2024the,\ntitle={The Reasonableness Behind Unreasonable Translation Capability of Large Language Model},\nauthor={Tingchen Fu and Lemao Liu and Deng Cai and Guoping Huang and Shuming Shi and Rui Yan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3KDbIWT26J}\n}", "github": "", "project": "", "reviewers": "5xLr;6ach;stf2;ZA8F", "pdf_size": 574040, "rating": "5;6;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "134;26;85;90", "wc_strengths": "87;17;131;26", "wc_weaknesses": "65;31;191;192", "wc_questions": "98;317;190;23", "wc_review": "384;391;597;331", "wc_reply_reviewers": "0;37;0;0", "wc_reply_authors": "580;1886;1043;1185", "reply_reviewers": "0;1;0;0", "reply_authors": "1;4;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.75, 38.408169703853375 ], "wc_strengths_avg": [ 65.25, 46.54231945230061 ], "wc_weaknesses_avg": [ 119.75, 72.75085910145667 ], "wc_questions_avg": [ 157.0, 109.68819444224616 ], "wc_review_avg": [ 425.75, 101.55632673546242 ], "wc_reply_reviewers_avg": [ 9.25, 16.021469970012117 ], "wc_reply_authors_avg": [ 1173.5, 468.2576747902804 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15514343039983960443&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=3KDbIWT26J", "pdf": "https://openreview.net/pdf?id=3KDbIWT26J", "email": "ruc.edu.cn;tencent.com;;tencent.com;ruc.edu.cn;tencent.com", "author_num": 6, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Renmin University of China;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "http://www.ruc.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "RUC;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3KmfUE31sc", "title": "Reconstruction as Sequence for Efficient Unified Unsupervised Anomaly Detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Unsupervised anomaly detection is highly desirable in industrial manufacturing processes due to the rarity of anomalies in real-world scenarios. Recent research has been focused on developing a unified framework for achieving multi-class anomaly detection. However, existing advanced feature-reconstruction-based methods often suffer from a lack of sufficient contextual awareness, thereby compromising the quality of the reconstruction. To address this challenge, we introduce a novel Reconstruction as Sequence (RAS) framework, which enhances the contextual correspondence during feature reconstruction through a sequence modelling perspective. In particular, based on the transformer technique, we integrate a specialized RASFormer block into the RAS framework. This block enables the capture of spatial relationships among different image regions and enhances temporal dependencies throughout the reconstruction process. By incorporating the RASFormer block, our RAS method achieves superior contextual awareness capabilities, leading to exceptional performance and faster inference speed. Experimental results show that our proposed RAS method significantly outperforms competing methods while exhibiting a maximal improvement of 29\\% in inference throughput. These results indicate the best trade-off between effectiveness and efficiency, further demonstrating the superiority and practicality of our method.", "keywords": "Anomaly Detection;Reconstruction;Transformer;Unsupervised Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Hui-Yue Yang;Hui Chen;Zijia Lin;Jungong Han;Guiguang Ding", "authorids": "~Hui-Yue_Yang2;~Hui_Chen7;~Zijia_Lin1;~Jungong_Han1;~Guiguang_Ding1", "gender": ";M;M;M;M", "homepage": ";https://huichen24.github.io/;https://sites.google.com/site/linzijia72/;https://jungonghan.github.io/;http://ise.thss.tsinghua.edu.cn/MIG/dgg.html", "dblp": ";;78/9911;98/6127;51/740", "google_scholar": ";erpvWcIAAAAJ;ghUYrHkAAAAJ;hNi1gxAAAAAJ;https://scholar.google.com.tw/citations?user=B7F3yt4AAAAJ", "orcid": "0000-0002-9458-5788;0000-0003-4180-5801;0000-0002-1390-7424;0000-0003-4361-956X;0000-0003-0137-9975", "linkedin": ";;;;", "or_profile": "~Hui-Yue_Yang2;~Hui_Chen7;~Zijia_Lin1;~Jungong_Han1;~Guiguang_Ding1", "aff": "Tsinghua University;Tsinghua University;Kuaishou Technology;University of Sheffield;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mail.tsinghua.edu.cn;kuaishou.com;sheffield.ac.uk;tsinghua.edu.cn", "position": "PhD student;Researcher;NLP expert;Full Professor;Full Professor", "bibtex": "@misc{\nyang2024reconstruction,\ntitle={Reconstruction as Sequence for Efficient Unified Unsupervised Anomaly Detection},\nauthor={Hui-Yue Yang and Hui Chen and Zijia Lin and Jungong Han and Guiguang Ding},\nyear={2024},\nurl={https://openreview.net/forum?id=3KmfUE31sc}\n}", "github": "", "project": "", "reviewers": "xekD;rUeR;5D2p;m5gg", "site": "https://openreview.net/forum?id=3KmfUE31sc", "pdf_size": 1272973, "rating": "3;3;5;6", "confidence": "5;4;4;4", "soundness": "3;2;3;3", "contribution": "2;1;2;3", "presentation": "3;3;2;3", "wc_summary": "45;186;78;70", "wc_strengths": "14;37;27;54", "wc_weaknesses": "162;252;87;198", "wc_questions": "9;7;1;8", "wc_review": "230;482;193;330", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 54.07113370366854 ], "wc_strengths_avg": [ 33.0, 14.611639196202457 ], "wc_weaknesses_avg": [ 174.75, 59.93903152370749 ], "wc_questions_avg": [ 6.25, 3.112474899497183 ], "wc_review_avg": [ 308.75, 111.87800275299877 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BSrZnEDOxKIJ:scholar.google.com/&scioq=Reconstruction+as+Sequence+for+Efficient+Unified+Unsupervised+Anomaly+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Tsinghua University;Kuaishou Technology;University of Sheffield", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.kuaishou.com;https://www.sheffield.ac.uk", "aff_unique_abbr": "THU;Kuaishou;Sheffield", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "3LFy3dUS86", "title": "P2RBOX:A SINGLE POINT IS ALL YOU NEED TRAINING ORIENTED OBJECT DETECTOR", "track": "main", "status": "Reject", "tldr": "", "abstract": "Oriented object detection, a specialized subfield in computer vision, finds applications across diverse scenarios, excelling particularly when dealing with objects of arbitrary orientations. Conversely, point annotation, which treats objects as single points, offers a cost-effective alternative to rotated and horizontal bounding boxes but sacrifices performance due to the loss of size and orientation information. In this study, we introduce the P2RBox network, which leverages point annotations and a mask generator to create mask proposals, followed by filtration through our Inspector Module and Constrainer Module. This process selects high-quality masks, which are subsequently converted into rotated box annotations for training a fully supervised detector. Specifically, we've thoughtfully crafted an Inspector Module rooted in multi-instance learning principles to evaluate the semantic score of masks. We've also proposed a more robust mask quality assessment in conjunction with the Constrainer Module. Furthermore, we've introduced a Symmetry Axis Estimation (SAE) Module inspired by the spectral theorem for symmetric matrices to transform the top-performing mask proposal into rotated bounding boxes. P2RBox performs well with three fully supervised rotated object detectors: RetinaNet, Rotated FCOS, and Oriented R-CNN. By combining with Oriented R-CNN, P2RBox achieves 62.26% on DOTA-v1.0 test dataset. As far as we know, this is the first attempt at training an oriented object detector with point supervision.", "keywords": "point-supervised rotated object detector", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/12da55ba9240a7bc4fe02052990810b9ddb2cb5d.pdf", "author": "Guangming Cao;Xuehui Yu;Wenwen Yu;Xumeng Han;Guorong Li;Jianbin Jiao;Zhenjun Han", "authorids": "~Guangming_Cao1;~Xuehui_Yu1;~Wenwen_Yu2;~Xumeng_Han1;~Guorong_Li1;~Jianbin_Jiao1;~Zhenjun_Han1", "gender": "M;;M;F;M;M;M", "homepage": "https://github.com/guangminglftg;http://vision.ucas.ac.cn/;;https://people.ucas.edu.cn/~GuorongLi?language=en;http://lamp.ucas.ac.cn/;https://people.ucas.ac.cn/~hanzhj;https://yinglang.github.io/", "dblp": ";;297/3745;28/4782;;11/2938;243/8603", "google_scholar": ";;https://scholar.google.cz/citations?user=LWu_FiQAAAAJ;AiuGlVQAAAAJ;;0rK4yTcAAAAJ;WYrxoBEAAAAJ", "orcid": ";;0000-0002-1636-463X;0000-0003-3954-2387;;;", "linkedin": ";;;;;;", "or_profile": "~Guangming_Cao1;~Wenwen_Yu2;~Xumeng_Han1;~Guorong_Li1;~Jianbin_Jiao1;~Zhenjun_Han1;~hui_ying2", "aff": "University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;mails.ucas.ac.cn", "position": "MS student;MS student;PhD student;Full Professor;Full Professor;Associate Professor;PhD student", "bibtex": "@misc{\ncao2024prboxa,\ntitle={P2{RBOX}:A {SINGLE} {POINT} {IS} {ALL} {YOU} {NEED} {TRAINING} {ORIENTED} {OBJECT} {DETECTOR}},\nauthor={Guangming Cao and Xuehui Yu and Wenwen Yu and Xumeng Han and Guorong Li and Jianbin Jiao and Zhenjun Han},\nyear={2024},\nurl={https://openreview.net/forum?id=3LFy3dUS86}\n}", "github": "", "project": "", "reviewers": "KHmW;4XWe;oNSm;a8aF;JsqB;hRnG", "site": "https://openreview.net/forum?id=3LFy3dUS86", "pdf_size": 7901729, "rating": "3;3;3;3;5;6", "confidence": "4;5;5;3;4;4", "soundness": "2;2;2;3;3;3", "contribution": "1;2;2;3;2;4", "presentation": "2;2;2;3;3;3", "wc_summary": "40;91;75;80;60;68", "wc_strengths": "20;50;66;24;58;97", "wc_weaknesses": "30;232;342;231;186;142", "wc_questions": "15;4;153;27;87;83", "wc_review": "105;377;636;362;391;390", "wc_reply_reviewers": "0;0;59;18;0;49", "wc_reply_authors": "228;645;652;464;577;439", "reply_reviewers": "0;0;1;1;0;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 3.8333333333333335, 1.2133516482134197 ], "confidence_avg": [ 4.166666666666667, 0.6871842709362768 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.0, 16.14517471774978 ], "wc_strengths_avg": [ 52.5, 26.03683288471673 ], "wc_weaknesses_avg": [ 193.83333333333334, 95.20227004763186 ], "wc_questions_avg": [ 61.5, 51.87083316598388 ], "wc_review_avg": [ 376.8333333333333, 153.65265662815236 ], "wc_reply_reviewers_avg": [ 21.0, 24.358434541926815 ], "wc_reply_authors_avg": [ 500.8333333333333, 146.6599430277023 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.1665741511631924, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1EacpzyPU6UJ:scholar.google.com/&scioq=P2RBOX:A+SINGLE+POINT+IS+ALL+YOU+NEED+TRAINING+ORIENTED+OBJECT+DETECTOR&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Chinese Academy of Sciences", "aff_unique_dep": "", "aff_unique_url": "http://www.ucas.ac.cn", "aff_unique_abbr": "UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3LLkES6nNs", "title": "Infinitely Deep Residual Networks: Unveiling Wide Neural ODEs as Gaussian Processes", "track": "main", "status": "Reject", "tldr": "", "abstract": "While Neural Ordinary Differential Equations (Neural ODEs) have demonstrated practical numerical success, our theoretical understanding of them remains limited. Notably, we still lack convergence results and prediction performance estimates for Neural ODEs trained using gradient-based methods. Inspired by numerical analysis, one might investigate Neural ODEs by studying the limiting behavior of Residual Networks (ResNets) as depth $\\ell$ approaches to infinity. However, a significant challenge arises due to the prevalent use of shared parameters in Neural ODEs. Consequently, the corresponding ResNets possess \\textit{infinite depth} and \\textit{shared weights} across all layers. This characteristic prevents the direct application of methods relying on Stochastic Differential Equations (SDEs) to ResNets.\n\nIn this paper, we analyze Neural ODEs using an infinitely deep ResNet with shared weights. Our analysis is rooted in asymptotic analysis from random matrix theory (RMT). Consequently, we establish the Neural Network and Gaussian Process (NNGP) correspondence for Neural ODEs, regardless of whether the parameters are shared. Remarkably, the resulting Gaussian processes (GPs) exhibit distinct behaviors depending on the use of parameter sharing, setting them apart from other neural network architectures such as feed-forward, convolutional, and recurrent networks. Moreover, we prove that, in the presence of these divergent GPs, NNGP kernels are strictly positive definite when non-polynomial activation functions are applied. These findings lay the foundation for exploring the training and generalization of Neural ODEs, paving the way for future research in this domain. Additionally, we furnish an efficient dynamic programming algorithm for calculating the covariance matrix for given input data. Finally, we conduct a series of numerical experiments to support our theoretical findings.", "keywords": "Neural ODE;Gaussian Process;Neural Tangent Kernel;Neural Network and Gaussian Process Correspondence;Kernel Methods", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/3c0d9fc08306e107fc3e5b73970b5ebd6b0cc75c.pdf", "author": "Tianxiang Gao;Xiaokai Huo;Hailiang Liu;Hongyang Gao", "authorids": "~Tianxiang_Gao2;~Xiaokai_Huo1;~Hailiang_Liu1;~Hongyang_Gao1", "gender": "M;M;M;M", "homepage": "https://gaotx-cs.github.io/;;https://faculty.sites.iastate.edu/hliu/;https://faculty.sites.iastate.edu/hygao/", "dblp": "118/3814;;;200/7985", "google_scholar": "iNLlIbQAAAAJ;;Wq7IGEIAAAAJ;jGmq0aEAAAAJ", "orcid": ";0000-0001-8131-2310;;0000-0002-9020-9080", "linkedin": ";;;hongyang-gao-74924690/", "or_profile": "~Tianxiang_Gao2;~Xiaokai_Huo1;~Hailiang_Liu1;~Hongyang_Gao1", "aff": "Iowa State University;Iowa State University;Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu;iastate.edu;iastate.edu", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@misc{\ngao2024infinitely,\ntitle={Infinitely Deep Residual Networks: Unveiling Wide Neural {ODE}s as Gaussian Processes},\nauthor={Tianxiang Gao and Xiaokai Huo and Hailiang Liu and Hongyang Gao},\nyear={2024},\nurl={https://openreview.net/forum?id=3LLkES6nNs}\n}", "github": "", "project": "", "reviewers": "ECfA;igTL;CRfF;TYVT", "site": "https://openreview.net/forum?id=3LLkES6nNs", "pdf_size": 1664231, "rating": "3;3;5;6", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "contribution": "2;2;2;3", "presentation": "1;3;2;2", "wc_summary": "29;21;101;52", "wc_strengths": "36;53;77;30", "wc_weaknesses": "43;73;157;154", "wc_questions": "91;3;277;45", "wc_review": "199;150;612;281", "wc_reply_reviewers": "36;0;201;10", "wc_reply_authors": "623;1050;1643;330", "reply_reviewers": "1;0;2;1", "reply_authors": "1;2;3;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 50.75, 31.163881337214722 ], "wc_strengths_avg": [ 49.0, 18.23458252881047 ], "wc_weaknesses_avg": [ 106.75, 49.90177852541931 ], "wc_questions_avg": [ 104.0, 104.61835403025609 ], "wc_review_avg": [ 310.5, 180.25329400596263 ], "wc_reply_reviewers_avg": [ 61.75, 81.4627982578551 ], "wc_reply_authors_avg": [ 911.5, 493.8747310806658 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xqrBVCds7aIJ:scholar.google.com/&scioq=Infinitely+Deep+Residual+Networks:+Unveiling+Wide+Neural+ODEs+as+Gaussian+Processes&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "CrIBo: Self-Supervised Learning via Cross-Image Object-Level Bootstrapping", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19518", "id": "3M0GXoUEzP", "author_site": "Tim Lebailly, Thomas Stegm\u00fcller, Behzad Bozorgtabar, Jean-Philippe Thiran, Tinne Tuytelaars", "tldr": "", "abstract": "Leveraging nearest neighbor retrieval for self-supervised representation learning has proven beneficial with object-centric images. However, this approach faces limitations when applied to scene-centric datasets, where multiple objects within an image are only implicitly captured in the global representation. Such global bootstrapping can lead to undesirable entanglement of object representations. Furthermore, even object-centric datasets stand to benefit from a finer-grained bootstrapping approach. In response to these challenges, we introduce a novel $\\textbf{Cr}$oss-$\\textbf{I}$mage Object-Level $\\textbf{Bo}$otstrapping method tailored to enhance dense visual representation learning. By employing object-level nearest neighbor bootstrapping throughout the training, CrIBo emerges as a notably strong and adequate candidate for in-context learning, leveraging nearest neighbor retrieval at test time. CrIBo shows state-of-the-art performance on the latter task while being highly competitive in more standard downstream segmentation tasks. Our code and pretrained models are publicly available at https://github.com/tileb1/CrIBo.", "keywords": "self-supervised learning;representation learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Tim Lebailly;Thomas Stegm\u00fcller;Behzad Bozorgtabar;Jean-Philippe Thiran;Tinne Tuytelaars", "authorids": "~Tim_Lebailly1;~Thomas_Stegm\u00fcller1;~Behzad_Bozorgtabar1;~Jean-Philippe_Thiran1;~Tinne_Tuytelaars1", "gender": "M;;M;M;", "homepage": ";https://people.epfl.ch/thomas.stegmuller?lang=en;https://behzadbozorgtabar.com/;https://people.epfl.ch/jean-philippe.thiran;", "dblp": "276/0970;313/9959;59/10419;t/JeanPhilippeThiran;", "google_scholar": "tQnods8AAAAJ;;kxAk6AoAAAAJ;mII-l2cAAAAJ;", "orcid": ";;0000-0002-5759-4896;0000-0003-2938-9657;", "linkedin": "tim-lebailly/;;behzad-bozorgtabar-72838560/;;", "or_profile": "~Tim_Lebailly1;~Thomas_Stegm\u00fcller1;~Behzad_Bozorgtabar1;~Jean-Philippe_Thiran1;~Tinne_Tuytelaars1", "aff": "Meta Facebook;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;University of Lausanne;", "aff_domain": "meta.com;epfl.ch;epfl.ch;unil.ch;", "position": "Intern;PhD student;Lecturer;Associate Professor;", "bibtex": "@inproceedings{\nlebailly2024cribo,\ntitle={Cr{IB}o: Self-Supervised Learning via Cross-Image Object-Level Bootstrapping},\nauthor={Tim Lebailly and Thomas Stegm{\\\"u}ller and Behzad Bozorgtabar and Jean-Philippe Thiran and Tinne Tuytelaars},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3M0GXoUEzP}\n}", "github": "", "project": "", "reviewers": "w5sb;4jZr;sFy5;FBUq", "pdf_size": 8393658, "rating": "6;8;8;10", "confidence": "5;5;4;4", "soundness": "3;3;3;4", "contribution": "3;3;3;4", "presentation": "4;3;3;4", "wc_summary": "84;80;89;95", "wc_strengths": "82;93;113;105", "wc_weaknesses": "322;232;116;164", "wc_questions": "58;279;8;81", "wc_review": "546;684;326;445", "wc_reply_reviewers": "129;163;29;37", "wc_reply_authors": "1005;736;175;664", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;2;1", "rating_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 87.0, 5.612486080160912 ], "wc_strengths_avg": [ 98.25, 11.776565713313877 ], "wc_weaknesses_avg": [ 208.5, 77.41285422977246 ], "wc_questions_avg": [ 106.5, 103.03033533867585 ], "wc_review_avg": [ 500.25, 131.5985847188335 ], "wc_reply_reviewers_avg": [ 89.5, 57.8338136387356 ], "wc_reply_authors_avg": [ 645.0, 299.6422867353672 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11621237308825909642&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=3M0GXoUEzP", "pdf": "https://openreview.net/pdf?id=3M0GXoUEzP", "email": "meta.com;epfl.ch;epfl.ch;unil.ch;", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Meta;EPFL;Swiss Federal Institute of Technology Lausanne;University of Lausanne", "aff_unique_dep": "Meta Platforms, Inc.;;;", "aff_unique_url": "https://meta.com;https://www.epfl.ch;https://www.epfl.ch;https://www.unil.ch", "aff_unique_abbr": "Meta;EPFL;EPFL;UNIL", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Switzerland" }, { "id": "3NMYMLL92j", "title": "Brain encoding models based on binding multiple modalities across audio, language, and vision", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multimodal associative learning of sensory stimuli (images, text, audio) has created powerful representations for these modalities that work across a multitude of tasks with simple task heads without even (fine)tuning features on target datasets. Such representations are being increasingly used to study neural activity and understand how our brain responds to such stimuli. While previous work has focused on static images, deep understanding of a video involves not just recognizing the individual objects present in each frame, but also requires a detailed semantic description of their interactions over time and their narrative roles. In this paper, we seek to evaluate whether new multimodally aligned features (like ImageBind) are better than previous ones in explaining fMRI responses to external stimuli, thereby allowing for a better understanding of how the brain and its different areas process external stimuli, converting them into meaningful high-level understanding, and actionable signals. In addition, we explore whether generative AI based modality conversion helps to disentangle the semantic part of the visual stimulus allowing for a more granular localization of such processing in the brain. Towards this end, given a dataset of fMRI responses from subjects watching short video clips, we first generate detailed multi-event video captions. Next, we synthesize audio from these generated text captions using a text-to-speech model. Further, we use a joint embedding across different modalities (audio, text and video) using the recently proposed ImageBind model. We use this joint embedding to train encoding models that predict fMRI brain responses. We infer from our experimental findings and computational results that the visual system's primary goal may revolve around converting visual input into comprehensive semantic scene descriptions. Further, multimodal feature alignment helps obtain richer representations for all modalities (audio, text and video) leading to improved performance compared to unimodal representations across well-known multimodal processing brain regions.", "keywords": "Multimodal Transformers;fMRI;ImageBind;cognitive neuroscience;brain encoding;movie clips;NLP;language models", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Khushbu Pahwa;SUBBA REDDY OOTA;Advaith Malladi;Maneesh Kumar Singh;Manish Gupta;Bapi Raju Surampudi", "authorids": "~Khushbu_Pahwa1;~SUBBA_REDDY_OOTA1;~Advaith_Malladi1;~Maneesh_Kumar_Singh1;~Manish_Gupta1;~Bapi_Raju_Surampudi1", "gender": "F;M;M;M;M;", "homepage": ";https://sites.google.com/view/subbareddyoota300/home?authuser=0;https://advaithmall.github.io/;https://arxiv.org/search/?query=Singh%2C+Maneesh&searchtype=author&abstracts=show&order=-announced_date_first&size=50;https://sites.google.com/view/manishg/;", "dblp": "299/8490;190/1709;372/4611;263/9205-1;g/ManishGupta1.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=4Uz0LngAAAAJ;https://scholar.google.co.in/citations?user=uZZrApkAAAAJ;hdQhiFgAAAAJ;https://scholar.google.co.in/citations?user=eX9PSu0AAAAJ;", "orcid": ";0000-0002-5975-622X;;0000-0002-7414-1813;0000-0002-2843-3110;", "linkedin": "khushbupahwa;subba-reddy-oota-11a91254/;advaith-malladi-30175326b/;maneesh-singh-3523ab9/;manishsgupta/;", "or_profile": "~Khushbu_Pahwa1;~SUBBA_REDDY_OOTA1;~Advaith_Malladi1;~Maneesh_Kumar_Singh1;~Manish_Gupta1;~Bapi_Raju_Surampudi1", "aff": "Rice University;MPI-SWS;International Institute of Information Technology, Hyderabad, International Institute of Information Technology Hyderabad;Spector Inc;Microsoft;", "aff_domain": "rice.edu;mpi-sws.org;research.iiit.ac.in;spector.com;microsoft.com;", "position": "MS student;Visiting Scholar;Undergrad student;Head, AI Research & Technologies;Principal Researcher;", "bibtex": "@misc{\npahwa2024brain,\ntitle={Brain encoding models based on binding multiple modalities across audio, language, and vision},\nauthor={Khushbu Pahwa and SUBBA REDDY OOTA and Advaith Malladi and Maneesh Kumar Singh and Manish Gupta and Bapi Raju Surampudi},\nyear={2024},\nurl={https://openreview.net/forum?id=3NMYMLL92j}\n}", "github": "", "project": "", "reviewers": "77Kr;TnaU;bZKi", "site": "https://openreview.net/forum?id=3NMYMLL92j", "pdf_size": 2841931, "rating": "1;3;8", "confidence": "5;5;5", "soundness": "1;2;3", "contribution": "1;2;3", "presentation": "1;3;3", "wc_summary": "68;45;82", "wc_strengths": "44;31;47", "wc_weaknesses": "214;253;17", "wc_questions": "133;18;53", "wc_review": "459;347;199", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "2014;0;0", "reply_reviewers": "0;0;0", "reply_authors": "3;0;0", "rating_avg": [ 4.0, 2.943920288775949 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 65.0, 15.253414918196734 ], "wc_strengths_avg": [ 40.666666666666664, 6.944222218666553 ], "wc_weaknesses_avg": [ 161.33333333333334, 103.2935407252339 ], "wc_questions_avg": [ 68.0, 48.13176359397884 ], "wc_review_avg": [ 335.0, 106.48317550987417 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 671.3333333333334, 949.4087048731378 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 1.4142135623730951 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w356AAv04OEJ:scholar.google.com/&scioq=Brain+encoding+models+based+on+binding+multiple+modalities+across+audio,+language,+and+vision&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Rice University;Max Planck Institute for Software Systems;International Institute of Information Technology, Hyderabad;Spector Inc;Microsoft", "aff_unique_dep": ";;;;Microsoft Corporation", "aff_unique_url": "https://www.rice.edu;https://www.mpi-sws.org;https://iiit Hyderabad.ac.in;;https://www.microsoft.com", "aff_unique_abbr": "Rice;MPI-SWS;IIIT Hyderabad;;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hyderabad", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "United States;Germany;India" }, { "id": "3NXhwkZGjz", "title": "Source-Free Unsupervised Domain Adaptation with Hypothesis Consolidation of Prediction Rationale", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Source-Free Unsupervised Domain Adaptation (SFUDA) is a challenging task where a model needs to be adapted to a new domain without access to target domain labels or source domain data. The primary difficulty in this task is that the model's predictions may be inaccurate, and using these inaccurate predictions for model adaptation can lead to misleading results. To address this issue, this paper proposes a novel approach that considers multiple prediction hypotheses for each sample and investigates the rationale behind each hypothesis. By consolidating these hypothesis rationales, we identify the most likely correct hypotheses, which we then use as a pseudo-labeled set to support a semi-supervised learning procedure for model adaptation. To achieve the optimal performance, we propose a three-step adaptation process: model pre-adaptation, hypothesis consolidation, and semi-supervised learning. Extensive experimental results demonstrate that our approach achieves state-of-the-art performance in the SFUDA task and can be easily integrated into existing approaches to improve their performance.", "keywords": "Source-Free Unsupervised Domain Adaptation;Hypothesis Consolidation;Prediction Rationale", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Yangyang Shu;Lingqiao Liu;Xiaofeng Cao;Qi Chen;Bowen Zhang;Ziqin Zhou;Anton van den Hengel", "authorids": "~Yangyang_Shu1;~Lingqiao_Liu3;~Xiaofeng_Cao2;~Qi_Chen4;~Bowen_Zhang3;~Ziqin_Zhou1;~Anton_van_den_Hengel1", "gender": "M;M;M;M;F;;M", "homepage": "https://ganperf.github.io/yangyangshu.github.io/;https://sites.google.com/site/lingqiaoliu83/;https://chenqi008.github.io/;;;;https://xiaofengcaoml.github.io/", "dblp": "201/7247.html;45/7776;66/6320-14;;;v/AntonvandenHengel;117/3982-2.html", "google_scholar": "TpdRFZIAAAAJ;Y2xu62UAAAAJ;OgKU77kAAAAJ;;uS_cqPUAAAAJ;https://scholar.google.com.au/citations?user=nMGZ2ZQAAAAJ;", "orcid": ";;0000-0001-8732-8049;0000-0001-6180-6815;0000-0002-5736-1232;0000-0003-3027-8364;", "linkedin": ";;qi-chen-4b1a72287;;;;", "or_profile": "~Yangyang_Shu1;~Lingqiao_Liu3;~Qi_Chen4;~Bowen_Zhang3;~Ziqin_Zhou1;~Anton_van_den_Hengel1;~Xiaofeng_Cao1", "aff": "University of Adelaide;University of Adelaide;University of Adelaide;University of Adelaide;University of Adelaide;University of Adelaide;Jilin University", "aff_domain": "adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;jlu.edu.cn", "position": "Postdoc;Associate Professor;PhD student;PhD student;PhD student;Professor;Associate Professor", "bibtex": "@misc{\nshu2024sourcefree,\ntitle={Source-Free Unsupervised Domain Adaptation with Hypothesis Consolidation of Prediction Rationale},\nauthor={Yangyang Shu and Lingqiao Liu and Xiaofeng Cao and Qi Chen and Bowen Zhang and Ziqin Zhou and Anton van den Hengel},\nyear={2024},\nurl={https://openreview.net/forum?id=3NXhwkZGjz}\n}", "github": "", "project": "", "reviewers": "MZzB;qsm3;oDf4;Pcxi;8pzv", "site": "https://openreview.net/forum?id=3NXhwkZGjz", "pdf_size": 4559014, "rating": "3;3;5;5;6", "confidence": "4;4;4;5;3", "soundness": "1;2;3;2;3", "contribution": "2;2;2;3;3", "presentation": "1;2;2;3;3", "wc_summary": "334;25;29;84;78", "wc_strengths": "63;9;39;49;41", "wc_weaknesses": "1322;81;152;130;188", "wc_questions": "131;2;38;2;2", "wc_review": "1850;117;258;265;309", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.4, 1.2 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 110.0, 114.59668407070075 ], "wc_strengths_avg": [ 40.2, 17.735839421916292 ], "wc_weaknesses_avg": [ 374.6, 474.9667777855626 ], "wc_questions_avg": [ 35.0, 49.98399743918047 ], "wc_review_avg": [ 559.8, 648.3176381990544 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2635231383473649, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9bEJlcm1BzAJ:scholar.google.com/&scioq=Source-Free+Unsupervised+Domain+Adaptation+with+Hypothesis+Consolidation+of+Prediction+Rationale&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "University of Adelaide;Jilin University", "aff_unique_dep": ";", "aff_unique_url": "https://www.adelaide.edu.au;http://www.jlu.edu.cn", "aff_unique_abbr": "Adelaide;JLU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "Australia;China" }, { "title": "Don't Play Favorites: Minority Guidance for Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19517", "id": "3NmO9lY4Jn", "author_site": "Soobin Um, Suhyeon Lee, Jong Chul YE", "tldr": "", "abstract": "We explore the problem of generating minority samples using diffusion models. The minority samples are instances that lie on low-density regions of a data manifold. Generating a sufficient number of such minority instances is important, since they often contain some unique attributes of the data. However, the conventional generation process of the diffusion models mostly yields majority samples (that lie on high-density regions of the manifold) due to their high likelihoods, making themselves ineffective and time-consuming for the minority generating task. In this work, we present a novel framework that can make the generation process of the diffusion models focus on the minority samples. We first highlight that Tweedie's denoising formula yields favorable results for majority samples. The observation motivates us to introduce a metric that describes the uniqueness of a given sample. To address the inherent preference of the diffusion models w.r.t. the majority samples, we further develop *minority guidance*, a sampling technique that can guide the generation process toward regions with desired likelihood levels. Experiments on benchmark real datasets demonstrate that our minority guidance can greatly improve the capability of generating high-quality minority samples over existing generative samplers. We showcase that the performance benefit of our framework persists even in demanding real-world scenarios such as medical imaging, further underscoring the practical significance of our work. Code is available at https://github.com/soobin-um/minority-guidance.", "keywords": "diffusion models;fairness;generative models;minority generation", "primary_area": "generative models", "supplementary_material": "/attachment/c0ea9b6ed7ee3d7f288e6eeba0e0f3b0b3cb6134.zip", "author": "Soobin Um;Suhyeon Lee;Jong Chul Ye", "authorids": "~Soobin_Um1;~Suhyeon_Lee2;~Jong_Chul_Ye1", "gender": ";M;M", "homepage": "https://sites.google.com/view/soobinum;https://github.com/hyn2028;https://bispl.weebly.com/", "dblp": "339/0076;342/2820;15/5613", "google_scholar": ";V9rMrFQAAAAJ;HNMjoNEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Soobin_Um1;~Suhyeon_Lee2;~Jong_Chul_Ye1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\num2024dont,\ntitle={Don't Play Favorites: Minority Guidance for Diffusion Models},\nauthor={Soobin Um and Suhyeon Lee and Jong Chul Ye},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3NmO9lY4Jn}\n}", "github": "", "project": "", "reviewers": "mz51;xCB3;Xf5u;F8nE", "pdf_size": 27130256, "rating": "3;6;6;6", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "contribution": "2;3;3;2", "presentation": "3;3;3;2", "wc_summary": "55;84;96;64", "wc_strengths": "42;36;154;42", "wc_weaknesses": "266;184;157;150", "wc_questions": "89;4;2;71", "wc_review": "452;308;409;327", "wc_reply_reviewers": "289;11;0;13", "wc_reply_authors": "1762;566;428;1091", "reply_reviewers": "3;1;0;1", "reply_authors": "5;2;1;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.75, 16.145819892467525 ], "wc_strengths_avg": [ 68.5, 49.42418436352794 ], "wc_weaknesses_avg": [ 189.25, 46.09433262343647 ], "wc_questions_avg": [ 41.5, 39.02883549377306 ], "wc_review_avg": [ 374.0, 58.89397252690635 ], "wc_reply_reviewers_avg": [ 78.25, 121.77720435286729 ], "wc_reply_authors_avg": [ 961.75, 524.0736470191952 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9567931568206842834&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=3NmO9lY4Jn", "pdf": "https://openreview.net/pdf?id=3NmO9lY4Jn", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "GIO: Gradient Information Optimization for Training Dataset Selection", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19516", "id": "3NnfJnbJT2", "author_site": "Dante Everaert, Christopher Potts", "tldr": "", "abstract": "It is often advantageous to train models on a subset of the available train examples, because the examples are of variable quality or because one would like to train with fewer examples, without sacrificing performance. We present Gradient Information Optimization (GIO), a scalable, task-agnostic approach to this data selection problem that requires only a small set of (unlabeled) examples representing a target distribution. GIO begins from a natural, information-theoretic objective that is intractable in practice. Our contribution is in showing that it can be made highly scalable through a simple relaxation of the objective and a highly efficient implementation. In experiments with machine translation, spelling correction, and image recognition, we show that GIO delivers outstanding results with very small train sets. These findings are robust to different representation models and hyperparameters for GIO itself. GIO is task- and domain-agnostic and can be applied out-of-the-box to new datasets and domains. We open source a pip-installable implementation of the algorithm as \"pip install grad-info-opt\".", "keywords": "data selection;data-centric AI;information theory;kl divergence;gradient;natural language processing;computer vision", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/c15baefa602a6cff4257f111c50fd9f07d2752fa.zip", "author": "Dante Everaert;Christopher Potts", "authorids": "~Dante_Everaert1;~Christopher_Potts1", "gender": "M;M", "homepage": "https://www.amazon.science/author/dante-everaert;http://web.stanford.edu/~cgpotts/", "dblp": ";13/2617", "google_scholar": ";3j08YoAAAAAJ", "orcid": ";0000-0002-7978-6055", "linkedin": "dante-everaert/;", "or_profile": "~Dante_Everaert1;~Christopher_Potts1", "aff": "Amazon;Stanford University", "aff_domain": "amazon.com;stanford.edu", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\neveraert2024gio,\ntitle={{GIO}: Gradient Information Optimization for Training Dataset Selection},\nauthor={Dante Everaert and Christopher Potts},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3NnfJnbJT2}\n}", "github": "", "project": "", "reviewers": "C7wm;u6CK;NSKq;Km99", "pdf_size": 3358561, "rating": "6;6;8;8", "confidence": "3;4;3;3", "soundness": "2;3;3;4", "contribution": "2;3;3;4", "presentation": "1;4;3;3", "wc_summary": "90;43;82;163", "wc_strengths": "18;52;40;127", "wc_weaknesses": "109;186;158;135", "wc_questions": "120;67;396;24", "wc_review": "337;348;676;449", "wc_reply_reviewers": "67;0;242;22", "wc_reply_authors": "887;864;1556;549", "reply_reviewers": "1;0;2;1", "reply_authors": "2;2;4;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 94.5, 43.36184959154764 ], "wc_strengths_avg": [ 59.25, 40.97178907492325 ], "wc_weaknesses_avg": [ 147.0, 28.416544476765644 ], "wc_questions_avg": [ 151.75, 145.0592551338935 ], "wc_review_avg": [ 452.5, 136.22132725825278 ], "wc_reply_reviewers_avg": [ 82.75, 95.06149325568161 ], "wc_reply_authors_avg": [ 964.0, 366.95299426493307 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=295988787452368742&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=3NnfJnbJT2", "pdf": "https://openreview.net/pdf?id=3NnfJnbJT2", "email": "amazon.com;stanford.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Amazon;Stanford University", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.stanford.edu", "aff_unique_abbr": "Amazon;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "3Ok7ccvtf3", "title": "UNLEARNING THE UNWANTED DATA FROM A PERSONALIZED RECOMMENDATION MODEL", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recommender Systems (RS) learn user behavior by monitoring their activities on the online platform. In a few scenarios, users consume the content but don\u2019t want to get their recommendations because a). They consumed the content by mistake, and those interactions have been utilized in personalizing the model; b) The content was consumed by someone else on their behalf; c) Data acquisition was faulty because of machine failure; d) The user has lost interest in the service, etc. Out of any of these reasons, the user wants the data that was used for generating the recommendation to be unlearned by RS. The constraints with this unlearning are 1) The user\u2019s other data should be intact, 2) Personalized experience should not be affected, and 3) We can not afford training from scratch. To solve the stated problem, a few unlearning strategies have already been proposed, but unlearning the matrix factorization-based model is not much explored. In this work, we propose a solution of unlearning from the faulty recommendation model (m1) by diluting the impact of unwanted data. To do so, we first correct the unwanted data and pre- pare an intermediate tiny model m2, referred to as the rescue model. Further, we apply the convolution fusion function (CFF) on the latent features acquired using m1 , m2 . The performance of the proposed method is evaluated on multiple public datasets. We observed that the proposed method outperforms SOTA benchmark models on recommendation tasks.", "keywords": "machine unlearning;recommendation system;matrix factorization", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Narayan Chaturvedi;Brijraj Singh;Niranjan Pedanekar", "authorids": "~Narayan_Chaturvedi1;~Brijraj_Singh1;~Niranjan_Pedanekar1", "gender": "M;;M", "homepage": ";;", "dblp": ";;131/9354", "google_scholar": ";;UksQ7SEAAAAJ", "orcid": ";;", "linkedin": "narayan-chaturvedi-phd-a3a89311/;;", "or_profile": "~Narayan_Chaturvedi1;~Brijraj_Singh1;~Niranjan_Pedanekar1", "aff": ";;Sony", "aff_domain": ";;sony.com", "position": ";;Principal Researcher", "bibtex": "@misc{\nanonymous2024unlearning,\ntitle={{UNLEARNING} {THE} {UNWANTED} {DATA} {FROM} A {PERSONALIZED} {RECOMMENDATION} {MODEL}},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=3Ok7ccvtf3}\n}", "github": "", "project": "", "reviewers": "n57L;o6ud;HcxE;GJxA", "site": "https://openreview.net/forum?id=3Ok7ccvtf3", "pdf_size": 782045, "rating": "3;3;3;5", "confidence": "3;4;3;2", "soundness": "3;1;1;2", "contribution": "3;2;1;3", "presentation": "1;1;2;2", "wc_summary": "165;86;45;51", "wc_strengths": "88;36;18;79", "wc_weaknesses": "161;318;204;168", "wc_questions": "136;12;54;45", "wc_review": "550;452;321;343", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "271;228;370;276", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 86.75, 47.814093110713706 ], "wc_strengths_avg": [ 55.25, 29.13224158900238 ], "wc_weaknesses_avg": [ 212.75, 62.9181015288923 ], "wc_questions_avg": [ 61.75, 45.630992757116296 ], "wc_review_avg": [ 416.5, 91.65833295451101 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 286.25, 51.8284429632996 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aBJC3RT9gScJ:scholar.google.com/&scioq=UNLEARNING+THE+UNWANTED+DATA+FROM+A+PERSONALIZED+RECOMMENDATION+MODEL&hl=en&as_sdt=0,21", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Sony Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.sony.com", "aff_unique_abbr": "Sony", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "id": "3OtVLnXfmS", "title": "GenNBV: Generalizable Next-Best-View Policy for Active 3D Reconstruction", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Even with the recent advances in neural radiance rendering (NeRF) enable high-quality digitization of large-scale scenes, the image-capturing process is time-consuming and labor-intensive. Previous works attempt to automate this process using active 3D reconstruction, with the Next-Best-View (NBV) policy being the most well-known. However, the majority of NBV policies are rule-based and only apply to a predefined limited action space, limiting their generalization ability. In this work, we propose \\emph{GenNBV}, a novel framework that endows the first free-space NBV policy with generalizability through end-to-end training. This policy is reinforcement learning (RL)-based and empowers a 3D scanning drone to capture from any viewpoint and interact with the environment across diverse scenarios, even those involving unseen structures during training. We also proposed a novel scene representation using action, geometric, and semantic embeddings, to further boost generalizability. To evaluate this NBV policy, we also establish a benchmark using the Isaac Gym simulator with the Houses3K and OmniObject3D datasets. Experiments demonstrate that our approach achieves a 98.26\\% and 83.61\\% coverage ratio on unseen buildings from these datasets, respectively, outperforming prior solutions.", "keywords": "Active 3D Reconstruction;Next-Best-View Prediction;Generalization;Reinforcement Learning", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/9dfbf6260f90878614e4964a296792957ed2a888.pdf", "author": "Xiao Chen;Quanyi Li;Tai Wang;Tianfan Xue;Jiangmiao Pang", "authorids": "~Xiao_Chen6;~Quanyi_Li1;~Tai_Wang2;~Tianfan_Xue2;~Jiangmiao_Pang1", "gender": "M;M;M;M;M", "homepage": "https://xiao-chen.tech;https://quanyili.github.io;https://tianfan.info;https://oceanpang.github.io/;https://tai-wang.github.io/", "dblp": ";270/7691;54/8652;231/7630;", "google_scholar": "SW2HLB0AAAAJ;Ty49X3UAAAAJ;RfSQKrIAAAAJ;https://scholar.google.com/citations?authuser=0;JmbbZWIAAAAJ", "orcid": ";;0000-0001-5031-6618;0000-0002-6711-9319;", "linkedin": "xiao-chen-%E9%99%88%E9%AA%81-3b4a731a0/;https://www.linkedin.com/mwlite/in/quanyi-li-2b7985183;tianfan-xue-54016716;;%E6%B3%B0-%E7%8E%8B-2b2738147/", "or_profile": "~Xiao_Chen6;~Quanyi_Li1;~Tianfan_Xue2;~Jiangmiao_Pang1;~Tai_WANG1", "aff": "Shanghai Artificial Intelligence Laboratory;University of Edinburgh;The Chinese University of Hong Kong;Shanghai AI Laboratory ;Shanghai AI Laboratory", "aff_domain": "pjlab.org.cn;ed.ac.uk;cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn", "position": "Intern;MS student;Assistant Professor;Research Scientist;Research Scientist", "bibtex": "@misc{\nchen2024gennbv,\ntitle={Gen{NBV}: Generalizable Next-Best-View Policy for Active 3D Reconstruction},\nauthor={Xiao Chen and Quanyi Li and Tai Wang and Tianfan Xue and Jiangmiao Pang},\nyear={2024},\nurl={https://openreview.net/forum?id=3OtVLnXfmS}\n}", "github": "", "project": "", "reviewers": "k3Gz;NscJ;gNx2;7e87", "site": "https://openreview.net/forum?id=3OtVLnXfmS", "pdf_size": 5341135, "rating": "3;3;5;6", "confidence": "5;3;4;2", "soundness": "1;2;2;3", "contribution": "1;1;2;3", "presentation": "1;2;2;3", "wc_summary": "81;67;76;58", "wc_strengths": "26;47;113;34", "wc_weaknesses": "178;152;225;249", "wc_questions": "1;20;61;6", "wc_review": "286;286;475;347", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.5, 8.789197915623474 ], "wc_strengths_avg": [ 55.0, 34.31471987354698 ], "wc_weaknesses_avg": [ 201.0, 38.11167800031901 ], "wc_questions_avg": [ 22.0, 23.569047498785352 ], "wc_review_avg": [ 348.5, 77.16378684331141 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6024640760767093, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7471704688960664825&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;University of Edinburgh;Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.shailab.org/;https://www.ed.ac.uk;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "Shanghai AI Lab;Edinburgh;CUHK;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "3OzQhhPLyW", "title": "Meta-Value Learning: a General Framework for Learning with Learning Awareness", "track": "main", "status": "Reject", "tldr": "", "abstract": "Gradient-based learning in multi-agent systems is difficult because the gradient derives from a first-order model which does not account for the interaction between agents\u2019 learning processes. LOLA (Foerster et al., 2018) accounts for this by differentiating through one step of optimization. We propose to judge joint policies by their long-term prospects as measured by the meta-value, a discounted sum over the returns of future optimization iterates. We apply a form of Q-learning to the meta-game of optimization, in a way that avoids the need to explicitly represent the continuous action space of policy updates. The resulting method, MeVa, is consistent and far-sighted, and does not require REINFORCE estimators. We analyze the behavior of our method on a toy game and compare to prior work on repeated matrix games.", "keywords": "multi-agent reinforcement learning;meta-learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/413519cca839887d6e9de828a80799fb29af6808.zip", "author": "Tim Cooijmans;Milad Aghajohari;Aaron Courville", "authorids": "~Tim_Cooijmans1;~Milad_Aghajohari1;~Aaron_Courville3", "gender": "M;M;", "homepage": ";;", "dblp": "153/5756;241/5265;56/1688", "google_scholar": "https://scholar.google.ca/citations?user=Ec6vKzwAAAAJ;;https://scholar.google.ca/citations?user=km6CP8cAAAAJ", "orcid": ";;", "linkedin": ";milad-aghajohari-6b9a669b/;", "or_profile": "~Tim_Cooijmans1;~Milad_Aghajohari1;~Aaron_Courville3", "aff": "University of Montreal;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al", "aff_domain": "umontreal.ca;mila.umontreal.ca; ", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@misc{\ncooijmans2024metavalue,\ntitle={Meta-Value Learning: a General Framework for Learning with Learning Awareness},\nauthor={Tim Cooijmans and Milad Aghajohari and Aaron Courville},\nyear={2024},\nurl={https://openreview.net/forum?id=3OzQhhPLyW}\n}", "github": "", "project": "", "reviewers": "1uYc;c4Mz;VzC1;ZYT4;zqYk;rsw5", "site": "https://openreview.net/forum?id=3OzQhhPLyW", "pdf_size": 1773301, "rating": "5;5;5;5;5;6", "confidence": "4;4;4;3;5;2", "soundness": "3;2;2;3;3;3", "contribution": "2;2;3;3;3;3", "presentation": "3;3;2;3;3;2", "wc_summary": "133;117;183;84;62;103", "wc_strengths": "24;74;44;103;42;153", "wc_weaknesses": "254;183;98;154;220;86", "wc_questions": "121;41;725;51;97;45", "wc_review": "532;415;1050;392;421;387", "wc_reply_reviewers": "458;12;0;0;0;0", "wc_reply_authors": "1148;532;1076;322;0;314", "reply_reviewers": "2;1;0;0;0;0", "reply_authors": "3;2;2;1;0;1", "rating_avg": [ 5.166666666666667, 0.37267799624996495 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 113.66666666666667, 38.41729934403105 ], "wc_strengths_avg": [ 73.33333333333333, 43.786096220401085 ], "wc_weaknesses_avg": [ 165.83333333333334, 60.713580761546986 ], "wc_questions_avg": [ 180.0, 245.48659162297778 ], "wc_review_avg": [ 532.8333333333334, 236.27485172052388 ], "wc_reply_reviewers_avg": [ 78.33333333333333, 169.84862541556103 ], "wc_reply_authors_avg": [ 565.3333333333334, 417.00786030428196 ], "reply_reviewers_avg": [ 0.5, 0.7637626158259734 ], "reply_authors_avg": [ 1.5, 0.9574271077563381 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7905694150420948, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18278300865798388679&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Montreal;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";", "aff_unique_url": "https://wwwumontreal.ca;https://www.umontreal.ca", "aff_unique_abbr": "UM;UdeM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "3P87ptzvTm", "title": "Optimal Multiple Transport with Applications to Visual Matching, Model Fusion and Beyond", "track": "main", "status": "Reject", "tldr": "", "abstract": "Optimal transport (OT) has wide applications including machine learning. It concerns finding the optimal mapping for Monge OT (or coupling for Kantorovich OT) between two probability measures. This paper generalizes the classic pairwise OT to the so-called Optimal Multiple Transportation (OMT) accepting more than two probability measures as input. We formulate the problem as minimizing the transportation costs between each pair of distributions and meanwhile requiring cycle-consistency of transportation among probability measures. In particular, we present both the Monge and Kantorovich formulations of OMT and obtain the approximate solution with added entropic and cycle-consistency regularization, for which an iterative Sinkhorn-based algorithm (ROMT-Sinkhorn) is proposed. We empirically show the superiority of our approach on two popular tasks: visual multi-point matching (MPM) and multi-model fusion (MMF). In MPM, our OMT solver directly utilizes the cosine distance between learned features of points obtained from off-the-shelf graph matching neural networks as the pairwise cost. We leverage the ROMT-Sinkhorn algorithm to learn multiple matchings. For MMF, we focus on the problem of fusing three models and employ ROMT-Sinkhorn instead of the Sinkhorn algorithm to learn the alignment between layers. Both tasks achieve competitive results with ROMT-Sinkhorn. Furthermore, we showcase the potential of our approach in addressing the travel salesman problem (TSP) by searching for the optimal path on the probability matrix instead of the distance matrix. Source code will be made publicly available.", "keywords": "Optimal Transport; Sinkhorn Algorithm; Cycle-Consistency; Visual Matching; Model Fusion", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Liangliang Shi;Jiale Hong;Junchi Yan", "authorids": "~Liangliang_Shi1;~Jiale_Hong1;~Junchi_Yan2", "gender": "M;;", "homepage": ";;", "dblp": "89/8730;;", "google_scholar": "Qf1k8lUAAAAJ;;", "orcid": "0000-0001-7033-4207;;", "linkedin": ";;", "or_profile": "~Liangliang_Shi1;~Jiale_Hong1;~Junchi_Yan2", "aff": "Shanghai Jiaotong University;;", "aff_domain": "sjtu.edu.cn;;", "position": "PhD student;;", "bibtex": "@misc{\nshi2024optimal,\ntitle={Optimal Multiple Transport with Applications to Visual Matching, Model Fusion and Beyond},\nauthor={Liangliang Shi and Jiale Hong and Junchi Yan},\nyear={2024},\nurl={https://openreview.net/forum?id=3P87ptzvTm}\n}", "github": "", "project": "", "reviewers": "aQyc;uVEA;16yx;2Gzs", "site": "https://openreview.net/forum?id=3P87ptzvTm", "pdf_size": 2003963, "rating": "5;5;5;5", "confidence": "4;3;4;4", "soundness": "3;2;3;2", "contribution": "3;2;2;2", "presentation": "3;3;3;3", "wc_summary": "104;142;103;110", "wc_strengths": "24;48;40;60", "wc_weaknesses": "58;246;274;32", "wc_questions": "514;3;49;524", "wc_review": "700;439;466;726", "wc_reply_reviewers": "227;0;137;133", "wc_reply_authors": "1319;1316;1169;774", "reply_reviewers": "1;0;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 114.75, 15.958931668504631 ], "wc_strengths_avg": [ 43.0, 13.076696830622021 ], "wc_weaknesses_avg": [ 152.5, 108.34551213594405 ], "wc_questions_avg": [ 272.5, 247.06122722920324 ], "wc_review_avg": [ 582.75, 130.92244841890178 ], "wc_reply_reviewers_avg": [ 124.25, 80.98572405060042 ], "wc_reply_authors_avg": [ 1144.5, 222.33589453797154 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8wExtdxwTNgJ:scholar.google.com/&scioq=Optimal+Multiple+Transport+with+Applications+to+Visual+Matching,+Model+Fusion+and+Beyond&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "3POV2mBEVl", "title": "Principal Component Analysis for Cross-Sectionally Correlated Pricing Errors", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We propose a new estimator for factor pricing models which we refer to as Principal Component Analysis for Cross-Sectionally Correlated Pricing Errors (PCA-XC). Our estimator aims to find the factor pricing model that well explains the time-series variation of asset returns and well handles the correlations of cross-section of pricing errors that we present exist in real-world data. The proposed estimator is defined by a new regularized minimization problem in which finding a solution is difficult. This contrasts with other related estimators whose corresponding minimization problem admits an analytic solution. To this end, we propose an approximate algorithm that solves our proposed minimization problem based on the alternating least squares method.", "keywords": "Unsupervised Learning;Optimization;Principal Component Analysis;Asset Pricing;Factor Pricing Model", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/ec72d22295ee66fa7ec2a96690c83dbb0e7f47c5.zip", "author": "Hyuksoo Kim;Saejoon Kim", "authorids": "~Hyuksoo_Kim1;~Saejoon_Kim1", "gender": ";", "homepage": ";http://fml.sogang.ac.kr", "dblp": "174/8206;10/1669", "google_scholar": "PtP1JioAAAAJ;https://scholar.google.com/scholar?hl=en", "orcid": "0000-0003-0088-3846;", "linkedin": ";", "or_profile": "~Hyuksoo_Kim1;~Saejoon_Kim1", "aff": "Sogang University;Sogang University", "aff_domain": "sogang.ac.kr;sogang.ac.kr", "position": "PhD student;Full Professor", "bibtex": "@misc{\nkim2024principal,\ntitle={Principal Component Analysis for Cross-Sectionally Correlated Pricing Errors},\nauthor={Hyuksoo Kim and Saejoon Kim},\nyear={2024},\nurl={https://openreview.net/forum?id=3POV2mBEVl}\n}", "github": "", "project": "", "reviewers": "D5h2;dcRp;ihuY;mR2q", "site": "https://openreview.net/forum?id=3POV2mBEVl", "pdf_size": 793880, "rating": "3;3;3;5", "confidence": "5;3;3;3", "soundness": "1;3;3;2", "contribution": "1;2;2;2", "presentation": "1;3;2;2", "wc_summary": "136;161;36;94", "wc_strengths": "2;29;23;73", "wc_weaknesses": "2;90;89;142", "wc_questions": "2;66;6;91", "wc_review": "142;346;154;400", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 106.75, 47.34646238104807 ], "wc_strengths_avg": [ 31.75, 25.839649765428323 ], "wc_weaknesses_avg": [ 80.75, 50.26616655365714 ], "wc_questions_avg": [ 41.25, 38.310409812477864 ], "wc_review_avg": [ 260.5, 114.18734605900953 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:53q6tVa291wJ:scholar.google.com/&scioq=Principal+Component+Analysis+for+Cross-Sectionally+Correlated+Pricing+Errors&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Sogang University", "aff_unique_dep": "", "aff_unique_url": "https://www.sogang.ac.kr", "aff_unique_abbr": "Sogang", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "3PWYAlAQxv", "title": "Neural Networks Trained by Weight Permutation are Universal Approximators", "track": "main", "status": "Reject", "tldr": "", "abstract": "The universal approximation property is fundamental to the success of neural networks, and has traditionally been achieved by networks without any constraints on their parameters. However, recent experimental research proposed an innovative permutation-based training method, which can achieve desired classification performance without modifying the exact values of the weights. In this paper, we prove that the permutation training method can guide a ReLU network to approximate one-dimensional continuous functions. Our numerical results under more diverse scenarios also validate the effectiveness of the permutation training method in regression tasks. Moreover, the notable observations during weight permutation suggest that permutation training can provide a novel tool for describing network learning behavior.", "keywords": "Universal approximation property;permutation training;physical neural networks;learning behavior", "primary_area": "learning theory", "supplementary_material": "/attachment/6522417374125b49514926c7ea20bb691c7d2f0d.zip", "author": "Gaohang Chen;Zhonghua Qiao;Yongqiang Cai", "authorids": "~Gaohang_Chen1;~Zhonghua_Qiao1;~Yongqiang_Cai1", "gender": "M;M;M", "homepage": ";https://www.polyu.edu.hk/ama/profile/zhqiao/;", "dblp": ";;228/6809", "google_scholar": ";;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-9133-0376;;0000-0002-2666-0539", "linkedin": ";;", "or_profile": "~Gaohang_Chen1;~Zhonghua_Qiao1;~Yongqiang_Cai1", "aff": "Hong Kong Polytechnic University;;Beijing Normal University", "aff_domain": "polyu.edu.hk;;bnu.edu.cn", "position": "PhD student;;Lecturer", "bibtex": "@misc{\nchen2024neural,\ntitle={Neural Networks Trained by Weight Permutation are Universal Approximators},\nauthor={Gaohang Chen and Zhonghua Qiao and Yongqiang Cai},\nyear={2024},\nurl={https://openreview.net/forum?id=3PWYAlAQxv}\n}", "github": "", "project": "", "reviewers": "5HxL;BYmb;XdGX;S6yU", "site": "https://openreview.net/forum?id=3PWYAlAQxv", "pdf_size": 5895261, "rating": "5;5;6;8", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "contribution": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "40;74;50;76", "wc_strengths": "44;53;110;119", "wc_weaknesses": "48;158;48;179", "wc_questions": "72;97;35;92", "wc_review": "204;382;243;466", "wc_reply_reviewers": "107;49;0;15", "wc_reply_authors": "731;962;781;883", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.0, 15.427248620541512 ], "wc_strengths_avg": [ 81.5, 33.30540496676178 ], "wc_weaknesses_avg": [ 108.25, 60.705745197633476 ], "wc_questions_avg": [ 74.0, 24.38237068047322 ], "wc_review_avg": [ 323.75, 105.46178217724182 ], "wc_reply_reviewers_avg": [ 42.75, 41.12405014100629 ], "wc_reply_authors_avg": [ 839.25, 89.57224737607068 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4235409972258229367&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Hong Kong Polytechnic University;Beijing Normal University", "aff_unique_dep": ";", "aff_unique_url": "https://www.polyu.edu.hk;https://www.bnu.edu.cn", "aff_unique_abbr": "PolyU;BNU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "3PaVCdeEmW", "title": "Align after Pre-train: Improving Multilingual Generative Models with Cross-lingual Alignment", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Multilingual generative models obtain remarkable cross-lingual capabilities through pre-training on large-scale corpora. However, they still exhibit a performance bias toward high-resource languages, and learn isolated distributions of sentence representations across languages. To bridge this gap, we propose a simple yet effective alignment framework exploiting pairs of translation sentences. It aligns the internal sentence representations across different languages via multilingual contrastive learning and aligns model outputs by answering prompts in different languages. Experimental results demonstrate that even with less than 0.1\u2030 of pre-training tokens, our alignment framework significantly boosts the cross-lingual abilities of generative models and mitigates the performance gap. Further analysis reveals that it results in a better internal multilingual representation distribution of multilingual models.", "keywords": "Cross-lingual alignment;In-context learning;Multilingual generative model", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Chong Li;Shaonan Wang;Jiajun Zhang;Chengqing Zong", "authorids": "~Chong_Li6;~Shaonan_Wang1;~Jiajun_Zhang1;~Chengqing_Zong1", "gender": "M;F;M;M", "homepage": ";https://wangshaonan.github.io/;http://www.nlpr.ia.ac.cn/cip/jjzhang.htm;http://www.nlpr.ia.ac.cn/cip/english/zong.htm", "dblp": ";29/8236;71/6950-1.html;38/6093", "google_scholar": "https://scholar.google.com.hk/citations?user=aftZkxsAAAAJ;ydFT-G8AAAAJ;93zngeYAAAAJ;l8lvKOQAAAAJ", "orcid": "0009-0000-0708-7493;;;", "linkedin": ";;;", "or_profile": "~Chong_Li6;~Shaonan_Wang1;~Jiajun_Zhang1;~Chengqing_Zong1", "aff": "Institute of automation, Chinese Academy of Sciences;;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;;ia.ac.cn;ia.ac.cn", "position": "PhD student;;Full Professor;Researcher", "bibtex": "@misc{\nli2024align,\ntitle={Align after Pre-train: Improving Multilingual Generative Models with Cross-lingual Alignment},\nauthor={Chong Li and Shaonan Wang and Jiajun Zhang and Chengqing Zong},\nyear={2024},\nurl={https://openreview.net/forum?id=3PaVCdeEmW}\n}", "github": "", "project": "", "reviewers": "GUFo;ehYp;1odi", "site": "https://openreview.net/forum?id=3PaVCdeEmW", "pdf_size": 1270084, "rating": "3;5;6", "confidence": "4;4;4", "soundness": "2;3;3", "contribution": "2;2;2", "presentation": "3;3;3", "wc_summary": "74;73;95", "wc_strengths": "64;83;111", "wc_weaknesses": "289;171;171", "wc_questions": "7;50;58", "wc_review": "434;377;435", "wc_reply_reviewers": "0;89;139", "wc_reply_authors": "1005;667;793", "reply_reviewers": "0;1;1", "reply_authors": "3;2;3", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.66666666666667, 10.143416036468626 ], "wc_strengths_avg": [ 86.0, 19.30457631409368 ], "wc_weaknesses_avg": [ 210.33333333333334, 55.62573345334174 ], "wc_questions_avg": [ 38.333333333333336, 22.395436042987765 ], "wc_review_avg": [ 415.3333333333333, 27.108834148463284 ], "wc_reply_reviewers_avg": [ 76.0, 57.48623023530649 ], "wc_reply_authors_avg": [ 821.6666666666666, 139.46883363517296 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11602331846743195669&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "SPTNet: An Efficient Alternative Framework for Generalized Category Discovery with Spatial Prompt Tuning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19515", "id": "3QLkwU40EE", "author_site": "Hongjun Wang, Sagar Vaze, Kai Han", "tldr": "", "abstract": "Generalized Category Discovery (GCD) aims to classify unlabelled images from both \u2018seen\u2019 and \u2018unseen\u2019 classes by transferring knowledge from a set of labelled \u2018seen\u2019 class images. A key theme in existing GCD approaches is adapting large-scale pre-trained models for the GCD task. An alternate perspective, however, is to adapt the data representation itself for better alignment with the pre-trained model. As such, in this paper, we introduce a two-stage adaptation approach termed SPTNet, which iteratively optimizes model parameters (i.e., model-finetuning) and data parameters (i.e., prompt learning). Furthermore, we propose a novel spatial prompt tuning method (SPT) which considers the spatial property of image data, enabling the method to better focus on object parts, which can transfer between seen and unseen classes. We thoroughly evaluate our SPTNet on standard benchmarks and demonstrate that our method outperforms existing GCD methods. Notably, we find our method achieves an average accuracy of 61.4% on the SSB, surpassing prior state-of-the-art methods by approximately 10%. The improvement is particularly remarkable as our method yields extra parameters amounting to only 0.117% of those in the backbone architecture. Project page: https://visual-ai.github.io/sptnet.", "keywords": "Generalized Category Discovery;Novel Category Discovery", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Hongjun Wang;Sagar Vaze;Kai Han", "authorids": "~Hongjun_Wang2;~Sagar_Vaze1;~Kai_Han1", "gender": "M;M;M", "homepage": "https://whj363636.github.io/;https://sgvaze.github.io/;http://www.kaihan.org/", "dblp": "65/3627-5;226/4705;51/4757-1.html", "google_scholar": "DNi-nB0AAAAJ;lvuOknUAAAAJ;tG8S_vMAAAAJ", "orcid": ";0000-0003-2920-9345;0000-0002-7995-9999", "linkedin": ";sagar-vaze-2356ab171/;kaihancs/", "or_profile": "~Hongjun_Wang2;~Sagar_Vaze1;~Kai_Han1", "aff": "NVIDIA;University of Oxford;The University of Hong Kong", "aff_domain": "nvidia.com;ox.ac.uk;hku.hk", "position": "Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2024sptnet,\ntitle={{SPTN}et: An Efficient Alternative Framework for Generalized Category Discovery with Spatial Prompt Tuning},\nauthor={Hongjun Wang and Sagar Vaze and Kai Han},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3QLkwU40EE}\n}", "github": "", "project": "", "reviewers": "Pson;bfKG;E8qj", "pdf_size": 25689441, "rating": "6;6;8", "confidence": "4;2;5", "soundness": "2;3;3", "contribution": "4;3;4", "presentation": "3;4;3", "wc_summary": "69;63;109", "wc_strengths": "53;84;49", "wc_weaknesses": "123;174;456", "wc_questions": "53;9;223", "wc_review": "298;330;837", "wc_reply_reviewers": "0;0;126", "wc_reply_authors": "467;347;1264", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 80.33333333333333, 20.417857108151406 ], "wc_strengths_avg": [ 62.0, 15.641824275533422 ], "wc_weaknesses_avg": [ 251.0, 146.4445287472359 ], "wc_questions_avg": [ 95.0, 92.2749514584899 ], "wc_review_avg": [ 488.3333333333333, 246.8904390390379 ], "wc_reply_reviewers_avg": [ 42.0, 59.39696961966999 ], "wc_reply_authors_avg": [ 692.6666666666666, 406.95317775991 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9353576079773046885&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=3QLkwU40EE", "pdf": "https://openreview.net/pdf?id=3QLkwU40EE", "email": "nvidia.com;ox.ac.uk;hku.hk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "NVIDIA;University of Oxford;University of Hong Kong", "aff_unique_dep": "NVIDIA Corporation;;", "aff_unique_url": "https://www.nvidia.com;https://www.ox.ac.uk;https://www.hku.hk", "aff_unique_abbr": "NVIDIA;Oxford;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;United Kingdom;China" }, { "id": "3QR230r11w", "title": "Multi-Fidelity Active Learning with GFlowNets", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the last decades, the capacity to generate large amounts of data in science and engineering applications has been growing steadily. Meanwhile, the progress in machine learning has turned it into a suitable tool to process and utilise the available data. Nonetheless, many relevant scientific and engineering problems present challenges where current machine learning methods cannot yet efficiently leverage the available data and resources. For example, in scientific discovery, we are often faced with the problem of exploring very large, structured and high-dimensional spaces, and where querying a high fidelity, black-box objective function is very expensive. Progress in machine learning methods that can efficiently tackle such problems would help accelerate currently crucial areas such as drug and materials discovery. In this paper, we propose a multi-fidelity active learning algorithm with GFlowNets as a sampler, to efficiently discover diverse, high-scoring candidates where multiple approximations of the black-box function are available at lower fidelity and cost. Our evaluation on molecular discovery tasks show that multi-fidelity active learning with GFlowNets can discover high-scoring candidates at a fraction of the budget of its single-fidelity counterpart while maintaining diversity, unlike RL-based alternatives. These results open new avenues for multi-fidelity active learning to accelerate scientific discovery and engineering design.", "keywords": "gflownets;multi-fidelity;active learning;bayesian optimization;scientific discovery;biological sequence design;molecular modelling;material discovery", "primary_area": "generative models", "supplementary_material": "/attachment/cff757e41ad228294bccf9aef376ad7d7c04aa49.zip", "author": "Alex Hern\u00e1ndez-Garc\u00eda;Nikita Saxena;Moksh Jain;Cheng-Hao Liu;Yoshua Bengio", "authorids": "~Alex_Hern\u00e1ndez-Garc\u00eda1;~Nikita_Saxena1;~Moksh_Jain1;~Cheng-Hao_Liu1;~Yoshua_Bengio1", "gender": ";M;M;M;F", "homepage": "https://alexhernandezgarcia.github.io;https://mj10.github.io;https://pchliu.github.io/;http://yoshuabengio.org;https://nikita-0209.github.io/", "dblp": "213/8573;249/9368;;56/953;", "google_scholar": "f8vQCOAAAAAJ;TD07G_wAAAAJ;iVJGx0cAAAAJ;kukA0LcAAAAJ;https://scholar.google.co.in/citations?user=DkroIXMAAAAJ", "orcid": ";;0000-0001-7923-6806;;", "linkedin": ";;chenghao-peter-liu/;yoshuabengio/?originalSubdomain=ca;nikita-saxena", "or_profile": "~Alex_Hern\u00e1ndez-Garc\u00eda1;~Moksh_Jain1;~Cheng-Hao_Liu1;~Yoshua_Bengio1;~Nikita_Saxena2", "aff": "Universit\u00e9 de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;University of Montreal;Google", "aff_domain": "umontreal.ca;umontreal.ca;mila.umontreal.ca;umontreal.ca;google.com", "position": "Postdoc;PhD student;PhD student intern;Full Professor;Researcher", "bibtex": "@misc{\nhern{\\'a}ndez-garc{\\'\\i}a2024multifidelity,\ntitle={Multi-Fidelity Active Learning with {GF}lowNets},\nauthor={Alex Hern{\\'a}ndez-Garc{\\'\\i}a and Nikita Saxena and Moksh Jain and Cheng-Hao Liu and Yoshua Bengio},\nyear={2024},\nurl={https://openreview.net/forum?id=3QR230r11w}\n}", "github": "", "project": "", "reviewers": "EtrP;Cu6t;TJ42;5Rpa", "site": "https://openreview.net/forum?id=3QR230r11w", "pdf_size": 818909, "rating": "3;5;6;8", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "contribution": "1;2;3;4", "presentation": "2;2;3;4", "wc_summary": "33;67;77;202", "wc_strengths": "218;17;55;228", "wc_weaknesses": "701;265;87;95", "wc_questions": "202;105;26;48", "wc_review": "1154;454;245;573", "wc_reply_reviewers": "335;849;26;0", "wc_reply_authors": "2849;3026;714;382", "reply_reviewers": "1;3;2;0", "reply_authors": "5;5;2;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 94.75, 64.0327064866073 ], "wc_strengths_avg": [ 129.5, 94.52645132448377 ], "wc_weaknesses_avg": [ 287.0, 249.37120924437127 ], "wc_questions_avg": [ 95.25, 68.04180700128414 ], "wc_review_avg": [ 606.5, 337.2006079472574 ], "wc_reply_reviewers_avg": [ 302.5, 341.9345697644507 ], "wc_reply_authors_avg": [ 1742.75, 1202.132142278876 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8006407690254357, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14990110088713039119&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;University of Montreal;Google", "aff_unique_dep": ";Montreal Institute for Learning Algorithms;Google", "aff_unique_url": "https://www.umontreal.ca;https://www.mila.quebec;https://www.google.com", "aff_unique_abbr": "UdeM;MILA;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Montreal;Mountain View", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Universal Backdoor Attacks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19514", "id": "3QkzYBSWqL", "author_site": "Benjamin Schneider, Nils Lukas, Florian Kerschbaum", "tldr": "", "abstract": "Web-scraped datasets are vulnerable to data poisoning, which can be used for backdooring deep image classifiers during training. Since training on large datasets is expensive, a model is trained once and reused many times. Unlike adversarial examples, backdoor attacks often target specific classes rather than any class learned by the model. One might expect that targeting many classes through a na\u00efve composition of attacks vastly increases the number of poison samples. We show this is not necessarily true and more efficient, \n _universal_ data poisoning attacks exist that allow controlling misclassifications from any source class into any target class with a slight increase in poison samples. Our idea is to generate triggers with salient characteristics that the model can learn. The triggers we craft exploit a phenomenon we call _inter-class poison transferability_, where learning a trigger from one class makes the model more vulnerable to learning triggers for other classes. We demonstrate the effectiveness and robustness of our universal backdoor attacks by controlling models with up to 6,000 classes while poisoning only 0.15% of the training dataset.", "keywords": "Backdoor;Data poisoning;Integrity;Image Classification", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Benjamin Schneider;Nils Lukas;Florian Kerschbaum", "authorids": "~Benjamin_Schneider1;~Nils_Lukas1;~Florian_Kerschbaum1", "gender": "M;M;", "homepage": "https://github.com/Ben-Schneider-code;https://nilslukas.github.io;", "dblp": "55/9497;;", "google_scholar": ";https://scholar.google.com/citations?hl=de;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Benjamin_Schneider1;~Nils_Lukas1;~Florian_Kerschbaum1", "aff": "University of Waterloo;University of Waterloo;", "aff_domain": "uwaterloo.ca;uwaterloo.ca;", "position": "MS student;PhD student;", "bibtex": "@inproceedings{\nschneider2024universal,\ntitle={Universal Backdoor Attacks},\nauthor={Benjamin Schneider and Nils Lukas and Florian Kerschbaum},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3QkzYBSWqL}\n}", "github": "", "project": "", "reviewers": "Xx2A;czTr;EfVD;H32b", "pdf_size": 556822, "rating": "5;5;6;6", "confidence": "3;3;4;4", "soundness": "3;2;2;4", "contribution": "2;2;3;3", "presentation": "2;3;2;4", "wc_summary": "56;54;206;92", "wc_strengths": "36;13;34;40", "wc_weaknesses": "239;20;155;386", "wc_questions": "78;223;59;49", "wc_review": "409;310;454;567", "wc_reply_reviewers": "0;0;157;44", "wc_reply_authors": "1231;1151;778;1659", "reply_reviewers": "0;0;1;1", "reply_authors": "2;3;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 102.0, 61.91930232165088 ], "wc_strengths_avg": [ 30.75, 10.473180032826706 ], "wc_weaknesses_avg": [ 200.0, 132.79871987334818 ], "wc_questions_avg": [ 102.25, 70.48891756865046 ], "wc_review_avg": [ 435.0, 92.31197105467957 ], "wc_reply_reviewers_avg": [ 50.25, 64.19647575996676 ], "wc_reply_authors_avg": [ 1204.75, 313.06419070216253 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10538171199816605357&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=3QkzYBSWqL", "pdf": "https://openreview.net/pdf?id=3QkzYBSWqL", "email": "uwaterloo.ca;uwaterloo.ca;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Grokking as a First Order Phase Transition in Two Layer Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19513", "id": "3ROGsTX3IR", "author_site": "Noa Rubin, Inbar Seroussi, Zohar Ringel", "tldr": "", "abstract": "A key property of deep neural networks (DNNs) is their ability to learn new features during training. This intriguing aspect of deep learning stands out most clearly in recently reported Grokking phenomena. While mainly reflected as a sudden increase in test accuracy, Grokking is also believed to be a beyond lazy-learning/Gaussian Process (GP) phenomenon involving feature learning. Here we apply a recent development in the theory of feature learning, the adaptive kernel approach, to two teacher-student models with cubic-polynomial and modular addition teachers. We provide analytical predictions on feature learning and Grokking properties of these models and demonstrate a mapping between Grokking and the theory of phase transitions. We show that after Grokking, the state of the DNN is analogous to the mixed phase following a first-order phase transition. In this mixed phase, the DNN generates useful internal representations of the teacher that are sharply distinct from those before the transition.", "keywords": "Grokking;deep neural networks;Gaussian Process;phase transitions", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Noa Rubin;Inbar Seroussi;Zohar Ringel", "authorids": "~Noa_Rubin1;~Inbar_Seroussi1;~Zohar_Ringel1", "gender": "F;F;M", "homepage": "https://phys.huji.ac.il/people/noa-rubin;https://sites.google.com/view/inbar-seroussi/home;http://old.phys.huji.ac.il/~zohar.ringel/", "dblp": ";;", "google_scholar": ";https://scholar.google.co.il/citations?user=t_Qe0CMAAAAJ;https://scholar.google.co.il/citations?user=8-8VIDgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Noa_Rubin1;~Inbar_Seroussi1;~Zohar_Ringel1", "aff": "Hebrew University of Jerusalem;Tel Aviv University;Hebrew University of Jerusalem, Israel", "aff_domain": "huji.ac.il;tau.ac.il;huji.ac.il", "position": "MS student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nrubin2024grokking,\ntitle={Grokking as a First Order Phase Transition in Two Layer Networks},\nauthor={Noa Rubin and Inbar Seroussi and Zohar Ringel},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3ROGsTX3IR}\n}", "github": "", "project": "", "reviewers": "t2DF;iD3W;64Ja;PXdZ;MgBg", "pdf_size": 23077248, "rating": "3;6;6;6;8", "confidence": "3;3;4;4;4", "soundness": "2;3;2;3;3", "contribution": "2;2;3;2;4", "presentation": "1;3;4;3;2", "wc_summary": "68;75;57;55;273", "wc_strengths": "18;7;51;57;79", "wc_weaknesses": "286;89;187;185;184", "wc_questions": "28;3;164;193;494", "wc_review": "400;174;459;490;1030", "wc_reply_reviewers": "0;103;138;42;121", "wc_reply_authors": "1018;531;1229;1099;1313", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;3;3;3", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 105.6, 84.0180932894814 ], "wc_strengths_avg": [ 42.4, 26.363611285254528 ], "wc_weaknesses_avg": [ 186.2, 62.313401447842665 ], "wc_questions_avg": [ 176.4, 175.15090636362692 ], "wc_review_avg": [ 510.6, 282.2747597643119 ], "wc_reply_reviewers_avg": [ 80.8, 51.79729722678588 ], "wc_reply_authors_avg": [ 1038.0, 273.2310377684058 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6634034720037775, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3441025740091630117&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=3ROGsTX3IR", "pdf": "https://openreview.net/pdf?id=3ROGsTX3IR", "email": "huji.ac.il;tau.ac.il;huji.ac.il", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Hebrew University of Jerusalem;Tel Aviv University", "aff_unique_dep": ";", "aff_unique_url": "https://www.huji.ac.il;https://www.tau.ac.il", "aff_unique_abbr": "HUJI;TAU", "aff_campus_unique_index": "0", "aff_campus_unique": "Jerusalem;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "id": "3RfGSbXUt8", "title": "Option Boosting", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce a novel approach to enhance stability and knowledge transfer in multi-task hierarchical reinforcement learning, specifically within the options framework. Modern Hierarchical Reinforcement Learning (HRL) algorithms can be prone to instability, due to the multilevel nature of the optimization process. To improve stability, we draw inspiration from boosting methods in supervised learning and propose a method which progressively introduces new options, while older options are kept fixed. In order to encourage generalization, each option policy has limited expressiveness. In order to improve knowledge transfer, we introduce the \\textit{Option Library}, a mechanism to share options across a population of agents. Our approach improves learning stability and allows agents to leverage knowledge from simple tasks in order to explore and perform more complex tasks. We evaluate our algorithm in MiniGrid and CraftingWorld, two pixel-based 2D grid-world environments designed for goal-oriented tasks, which allows compositional solutions.", "keywords": "Hierarchical Reinforcement Learning;Multi-Task Reinforcement Learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Manuel Del Verme;Doina Precup", "authorids": "~Manuel_Del_Verme1;~Doina_Precup1", "gender": ";F", "homepage": ";http://cs.mcgill.ca/~dprecup/", "dblp": ";p/DoinaPrecup", "google_scholar": "JcOwyS0AAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Manuel_Del_Verme1;~Doina_Precup1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;McGill University", "aff_domain": "mila.umontreal.ca;mcgill.ca", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nverme2024option,\ntitle={Option Boosting},\nauthor={Manuel Del Verme and Doina Precup},\nyear={2024},\nurl={https://openreview.net/forum?id=3RfGSbXUt8}\n}", "github": "", "project": "", "reviewers": "Sp3D;Jjah;ZULE", "site": "https://openreview.net/forum?id=3RfGSbXUt8", "pdf_size": 413492, "rating": "3;3;5", "confidence": "4;4;3", "soundness": "2;3;2", "contribution": "2;2;2", "presentation": "1;2;3", "wc_summary": "55;61;101", "wc_strengths": "35;52;64", "wc_weaknesses": "133;199;53", "wc_questions": "92;111;46", "wc_review": "315;423;264", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 72.33333333333333, 20.417857108151406 ], "wc_strengths_avg": [ 50.333333333333336, 11.897712198383164 ], "wc_weaknesses_avg": [ 128.33333333333334, 59.69552374806301 ], "wc_questions_avg": [ 83.0, 27.28858125052797 ], "wc_review_avg": [ 334.0, 66.2872536767062 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5qxNTHtR17AJ:scholar.google.com/&scioq=Option+Boosting&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Montreal;McGill University", "aff_unique_dep": "Montreal Institute for Learning Algorithms;", "aff_unique_url": "https://www.umontreal.ca;https://www.mcgill.ca", "aff_unique_abbr": "UM;McGill", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Generalization error of spectral algorithms", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19512", "id": "3SJE1WLB4M", "author_site": "Maksim Velikanov, Maxim Panov, Dmitry Yarotsky", "tldr": "", "abstract": "The asymptotically precise estimation of the generalization of kernel methods has recently received attention due to the parallels between neural networks and their associated kernels. However, prior works derive such estimates for training by kernel ridge regression (KRR), whereas neural networks are typically trained with gradient descent (GD). In the present work, we consider the training of kernels with a family of \\emph{spectral algorithms} specified by profile $h(\\lambda)$, and including KRR and GD as special cases. Then, we derive the generalization error as a functional of learning profile $h(\\lambda)$ for two data models: high-dimensional Gaussian and low-dimensional translation-invariant model. \nUnder power-law assumptions on the spectrum of the kernel and target, we use our framework to (i) give full loss asymptotics for both noisy and noiseless observations (ii) show that the loss localizes on certain spectral scales, giving a new perspective on the KRR saturation phenomenon (iii) conjecture, and demonstrate for the considered data models, the universality of the loss w.r.t. non-spectral details of the problem, but only in case of noisy observation.", "keywords": "gradient descent;kernel ridge regression;optimal algorithm;generalization;asymptotic error rates;power-laws", "primary_area": "learning theory", "supplementary_material": "", "author": "Maksim Velikanov;Maxim Panov;Dmitry Yarotsky", "authorids": "~Maksim_Velikanov1;~Maxim_Panov1;~Dmitry_Yarotsky1", "gender": "M;M;M", "homepage": ";;http://yarotsky.info", "dblp": "292/3286;30/10085;132/4661", "google_scholar": "OrWWVcUAAAAJ;https://scholar.google.ru/citations?user=BqDhGJQAAAAJ;https://scholar.google.ru/citations?user=wNSSr_gAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Maksim_Velikanov1;~Maxim_Panov1;~Dmitry_Yarotsky1", "aff": "\u00c9cole Polytechnique;Institute for Information Transmission Problems;Skolkovo Institute of Science and Technology", "aff_domain": "polytechnique.fr;iitp.ru;skoltech.ru", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nvelikanov2024generalization,\ntitle={Generalization error of spectral algorithms},\nauthor={Maksim Velikanov and Maxim Panov and Dmitry Yarotsky},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3SJE1WLB4M}\n}", "github": "", "project": "", "reviewers": "wTWc;jkPS;v7ue", "pdf_size": 737467, "rating": "8;8;8", "confidence": "3;3;2", "soundness": "3;4;3", "contribution": "3;4;3", "presentation": "3;3;3", "wc_summary": "88;97;26", "wc_strengths": "55;90;22", "wc_weaknesses": "135;51;32", "wc_questions": "207;27;17", "wc_review": "485;265;97", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1091;286;117", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.33333333333333, 31.562988170042175 ], "wc_strengths_avg": [ 55.666666666666664, 27.764885897278397 ], "wc_weaknesses_avg": [ 72.66666666666667, 44.7536466546458 ], "wc_questions_avg": [ 83.66666666666667, 87.3053390247253 ], "wc_review_avg": [ 282.3333333333333, 158.87381435871959 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 498.0, 424.9525463703761 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ONrYOG02T8AJ:scholar.google.com/&scioq=Generalization+error+of+spectral+algorithms&hl=en&as_sdt=0,5", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=3SJE1WLB4M", "pdf": "https://openreview.net/pdf?id=3SJE1WLB4M", "email": "polytechnique.fr;iitp.ru;skoltech.ru", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ecole Polytechnique;Institute for Information Transmission Problems;Skolkovo Institute of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.polytechnique.edu;http://www.iitp.ru;https://www.skoltech.ru", "aff_unique_abbr": "X;;Skoltech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "France;Russian Federation" }, { "id": "3SqnZXg24T", "title": "RetinexGAN Enables More Robust Low-Light Image Enhancement Via Retinex Decomposition Based Unsupervised Illumination Brightening", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Most existing image enhancement techniques rely heavily on strict supervision of paired images. Moreover, unsupervised enhancement methods also face challenges in achieving a balance between model performance and efficiency when handling real-world low-light images in unknown complex scenarios. Herein, we present a novel low-light image enhancement scheme termed \\textbf{RetinexGAN} that can leverage the supervision of a limited number of low-light/normal image pairs to realize an accurate Retinex decomposition, and based on this, achieve brightening the illumination of unpaired images to reduce dependence on paired datasets and improve generalization ability. The decomposition network is learned with some newly established constraints for complete decoupling between reflectance and illumination. For the first time, we introduce the feature pyramid network (FPN) to adjust the illumination maps of other low-light images without any supervision. Under this flexible framework, a wide range of backbones can be employed to work with illumination map generator, to navigate the balance between performance and efficiency. In addition, a novel attention mechanism is integrated into the FPN for giving the adaptability towards application scenes with different environment like underwater image enhancement (UIE) and dark face detection. Extensive experiments demonstrate that our proposed scheme has a more robust performance with high efficiency facing various images from different low-light environments over state-of-the-art methods.", "keywords": "low-light image enhancement;Retinex decomposition;feature pyramid network (FPN);attention mechanism;unsupervised illumination brightening", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Ruiqi Mao;Rongxin Cui", "authorids": "~Ruiqi_Mao1;~Rongxin_Cui1", "gender": "M;M", "homepage": "https://github.com/Ruiqi-Mao;", "dblp": ";05/7736.html", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-8006-3620", "linkedin": ";", "or_profile": "~Ruiqi_Mao1;~Rongxin_Cui1", "aff": "Northwest Polytechnical University Xi'an;Northwestern Polytechnical University Xi'an", "aff_domain": "nwpu.edu.cn;nwpu.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@misc{\nmao2024retinexgan,\ntitle={Retinex{GAN} Enables More Robust Low-Light Image Enhancement Via Retinex Decomposition Based Unsupervised Illumination Brightening},\nauthor={Ruiqi Mao and Rongxin Cui},\nyear={2024},\nurl={https://openreview.net/forum?id=3SqnZXg24T}\n}", "github": "", "project": "", "reviewers": "tFmg;p4zt;TqR9;6g5x", "site": "https://openreview.net/forum?id=3SqnZXg24T", "pdf_size": 30220609, "rating": "1;3;3;3", "confidence": "5;4;5;5", "soundness": "1;3;2;2", "contribution": "1;2;1;2", "presentation": "2;3;2;2", "wc_summary": "59;33;54;63", "wc_strengths": "30;23;22;36", "wc_weaknesses": "353;68;127;202", "wc_questions": "4;4;24;10", "wc_review": "446;128;227;311", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "705;349;514;551", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 52.25, 11.562331079847178 ], "wc_strengths_avg": [ 27.75, 5.673402858955108 ], "wc_weaknesses_avg": [ 187.5, 106.70168695948533 ], "wc_questions_avg": [ 10.5, 8.170067319184096 ], "wc_review_avg": [ 278.0, 116.63404305776251 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 529.75, 126.57285451470233 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WEGobSqFmSAJ:scholar.google.com/&scioq=RetinexGAN+Enables+More+Robust+Low-Light+Image+Enhancement+Via+Retinex+Decomposition+Based+Unsupervised+Illumination+Brightening&hl=en&as_sdt=0,7", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Northwest Polytechnical University;Northwestern Polytechnical University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nwpu.edu.cn;http://www.nwpu.edu.cn", "aff_unique_abbr": "NWPU;NWPU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Xi'an", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "3TAhlGaMKD", "title": "Last One Standing: A Comparative Analysis of Security and Privacy of Soft Prompt Tuning, LoRA, and In-Context Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large Language Models (LLMs) are powerful tools for natural language processing, enabling novel applications and user experiences. However, to achieve optimal performance, LLMs often require adaptation with private data, which poses privacy and security challenges. Several techniques have been proposed to adapt LLMs with private data, such as Low-Rank Adaptation (LoRA), Soft Prompt Tuning (SPT), and In-Context Learning (ICL), but their comparative privacy and security properties have not been systematically investigated. In this work, we fill this gap by evaluating the robustness of LoRA, SPT, and ICL against three types of well-established attacks: membership inference, which exposes data leakage (privacy); backdoor, which injects malicious behavior (security); and model stealing, which can violate intellectual property (privacy and security). Our results show that there is no silver bullet for privacy and security in LLM adaptation and each technique has different strengths and weaknesses.", "keywords": "Large Language Models;Privacy;Security;Model Stealing Attack;Membership Inference Attack;Backdoor Attack;Low-Rank Adaptation;Soft Prompt Tuning;In-Context Learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Rui Wen;Tianhao Wang;Michael Backes;Yang Zhang;Ahmed Salem", "authorids": "~Rui_Wen3;~Tianhao_Wang3;~Michael_Backes3;~Yang_Zhang15;~Ahmed_Salem2", "gender": "M;M;;M;", "homepage": "https://ruiwen-ai.github.io/;https://tianhao.wang;;https://yangzhangalmo.github.io/;", "dblp": "63/10765-2;https://dblp.uni-trier.de/pid/145/3288-1.html;;06/6785-16;", "google_scholar": "https://scholar.google.com/citations?hl=en;TkgyXGwAAAAJ;;Xeb2888AAAAJ;", "orcid": "0009-0009-0691-7569;;;0000-0003-3612-7348;", "linkedin": ";;;;", "or_profile": "~Rui_Wen3;~Tianhao_Wang3;~Michael_Backes3;~Yang_Zhang15;~Ahmed_Salem2", "aff": "CISPA Helmholtz Center for Information Security;University of Virginia, Charlottesville;;CISPA Helmholtz Center for Information Security;", "aff_domain": "cispa.de;virginia.edu;;cispa.de;", "position": "PhD student;Assistant Professor;;Full Professor;", "bibtex": "@misc{\nwen2024last,\ntitle={Last One Standing: A Comparative Analysis of Security and Privacy of Soft Prompt Tuning, Lo{RA}, and In-Context Learning},\nauthor={Rui Wen and Tianhao Wang and Michael Backes and Yang Zhang and Ahmed Salem},\nyear={2024},\nurl={https://openreview.net/forum?id=3TAhlGaMKD}\n}", "github": "", "project": "", "reviewers": "FB1d;J1hc;JTKU;RTpz", "site": "https://openreview.net/forum?id=3TAhlGaMKD", "pdf_size": 759382, "rating": "1;5;5;8", "confidence": "4;3;3;4", "soundness": "1;3;3;3", "contribution": "1;2;2;3", "presentation": "3;4;3;3", "wc_summary": "47;49;16;113", "wc_strengths": "56;45;27;60", "wc_weaknesses": "210;98;83;140", "wc_questions": "18;174;1;118", "wc_review": "331;366;127;431", "wc_reply_reviewers": "0;18;0;87", "wc_reply_authors": "704;876;255;562", "reply_reviewers": "0;1;0;2", "reply_authors": "2;2;2;3", "rating_avg": [ 4.75, 2.48746859276655 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 56.25, 35.280128968018246 ], "wc_strengths_avg": [ 47.0, 12.786711852544421 ], "wc_weaknesses_avg": [ 132.75, 49.25126901918366 ], "wc_questions_avg": [ 77.75, 71.3175118747142 ], "wc_review_avg": [ 313.75, 113.63400679374111 ], "wc_reply_reviewers_avg": [ 26.25, 35.83556194620087 ], "wc_reply_authors_avg": [ 599.25, 227.73819947474774 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.10050378152592121, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18014859622824349825&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;University of Virginia", "aff_unique_dep": ";", "aff_unique_url": "https://www.cispa.de/;https://www.virginia.edu", "aff_unique_abbr": "CISPA;UVA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "BTR: Binary Token Representations for Efficient Retrieval Augmented Language Models", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19511", "id": "3TO3TtnOFl", "author_site": "Qingqing Cao, Sewon Min, Yizhong Wang, Hannaneh Hajishirzi", "tldr": "", "abstract": "Retrieval augmentation addresses many critical problems in large language models such as hallucination, staleness, and privacy leaks.\nHowever, running retrieval-augmented language models (LMs) is slow and difficult to scale due to processing large amounts of retrieved text. \nWe introduce binary token representations (BTR), which use 1-bit vectors to precompute every token in passages, significantly reducing computation during inference. \nDespite the potential loss of accuracy, our new calibration techniques and training objectives restore performance. Combined with offline and runtime compression, this only requires 127GB of disk space for encoding 3 billion tokens in Wikipedia.\nOur experiments show that on five knowledge-intensive NLP tasks, BTR accelerates state-of-the-art inference by up to 4x and reduces storage by over 100x while maintaining over 95% task performance. Our code is publicly available at https://github.com/csarron/BTR.", "keywords": "language models;question answering;binary representations;retrieval-augmented language models", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Qingqing Cao;Sewon Min;Yizhong Wang;Hannaneh Hajishirzi", "authorids": "~Qingqing_Cao1;~Sewon_Min1;~Yizhong_Wang2;~Hannaneh_Hajishirzi1", "gender": "M;F;M;F", "homepage": "https://awk.ai/;https://www.sewonmin.com;https://yizhong-wang.com;https://homes.cs.washington.edu/~hannaneh/", "dblp": ";203/9401;79/3601;52/1296", "google_scholar": "vLpPyUUAAAAJ;https://scholar.google.ca/citations?user=jU4IZs4AAAAJ;y5zpqdAAAAAJ;LOV6_WIAAAAJ", "orcid": "0000-0002-8564-9241;;;", "linkedin": "qqcao;;;", "or_profile": "~Qingqing_Cao1;~Sewon_Min1;~Yizhong_Wang2;~Hannaneh_Hajishirzi1", "aff": "University of Washington, Seattle;Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;University of Washington", "aff_domain": "uw.edu;cs.washington.edu;cs.washington.edu;uw.edu", "position": "Postdoc;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ncao2024btr,\ntitle={{BTR}: Binary Token Representations for Efficient Retrieval Augmented Language Models},\nauthor={Qingqing Cao and Sewon Min and Yizhong Wang and Hannaneh Hajishirzi},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3TO3TtnOFl}\n}", "github": "", "project": "", "reviewers": "PDbq;U3RY;X7Hc;KKL3", "pdf_size": 597161, "rating": "6;6;6;8", "confidence": "4;3;3;4", "soundness": "3;2;3;3", "contribution": "3;3;3;4", "presentation": "4;3;3;3", "wc_summary": "83;93;55;225", "wc_strengths": "118;87;76;75", "wc_weaknesses": "100;171;98;323", "wc_questions": "124;96;34;53", "wc_review": "425;447;263;676", "wc_reply_reviewers": "91;173;0;122", "wc_reply_authors": "670;532;567;789", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 114.0, 65.58200972827838 ], "wc_strengths_avg": [ 89.0, 17.392527130926087 ], "wc_weaknesses_avg": [ 173.0, 91.45764046814242 ], "wc_questions_avg": [ 76.75, 35.336772631353874 ], "wc_review_avg": [ 452.75, 147.1807986797191 ], "wc_reply_reviewers_avg": [ 96.5, 62.93846200853656 ], "wc_reply_authors_avg": [ 639.5, 100.11618250812403 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14156270123239045267&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=3TO3TtnOFl", "pdf": "https://openreview.net/pdf?id=3TO3TtnOFl", "email": "uw.edu;cs.washington.edu;cs.washington.edu;uw.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Planning Abstractions from Language", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19510", "id": "3UWuFoksGb", "author_site": "Weiyu Liu, Geng Chen, Joy Hsu, Jiayuan Mao, Jiajun Wu", "tldr": "", "abstract": "This paper presents a framework for learning state and action abstractions in sequential decision-making domains. Our framework, planning abstraction from language (PARL), utilizes language-annotated demonstrations to automatically discover a symbolic and abstract action space and induce a latent state abstraction based on it. PARL consists of three stages: 1) recovering object-level and action concepts, 2) learning state abstractions, abstract action feasibility, and transition models, and 3) applying low-level policies for abstract actions. During inference, given the task description, PARL first makes abstract action plans using the latent transition and feasibility functions, then refines the high-level plan using low-level policies. PARL generalizes across scenarios involving novel object instances and environments, unseen concept compositions, and tasks that require longer planning horizons than settings it is trained on.", "keywords": "Planning and Learning;Learning Abstractions;Compositional Generalization;Robotic Manipulation", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "", "author": "Weiyu Liu;Geng Chen;Joy Hsu;Jiayuan Mao;Jiajun Wu", "authorids": "~Weiyu_Liu1;~Geng_Chen3;~Joy_Hsu2;~Jiayuan_Mao1;~Jiajun_Wu1", "gender": "M;M;F;F;M", "homepage": "http://weiyuliu.com/;https://jc043.github.io/;https://web.stanford.edu/~joycj/;http://jiayuanm.com;https://jiajunwu.com", "dblp": "133/0311.html;;258/5012;200/8283;117/4768", "google_scholar": "PHi0YEQAAAAJ;;Zr7RJT4AAAAJ;-xaOIZIAAAAJ;2efgcS0AAAAJ", "orcid": ";;;0000-0003-4798-3748;0000-0002-4176-343X", "linkedin": ";;;;jiajunwu/", "or_profile": "~Weiyu_Liu1;~Geng_Chen3;~Joy_Hsu2;~Jiayuan_Mao1;~Jiajun_Wu1", "aff": "Stanford University;University of California, San Diego;Stanford University;Massachusetts Institute of Technology;Stanford University", "aff_domain": "stanford.edu;ucsd.edu;stanford.edu;mit.edu;stanford.edu", "position": "Postdoc;MS student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2024learning,\ntitle={Learning Planning Abstractions from Language},\nauthor={Weiyu Liu and Geng Chen and Joy Hsu and Jiayuan Mao and Jiajun Wu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3UWuFoksGb}\n}", "github": "", "project": "", "reviewers": "Q2SJ;B6My;NUXY;V9Cw", "pdf_size": 1089365, "rating": "3;5;6;8", "confidence": "3;3;4;3", "soundness": "2;2;3;3", "contribution": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "197;209;66;51", "wc_strengths": "103;74;28;107", "wc_weaknesses": "67;486;249;31", "wc_questions": "409;150;70;2", "wc_review": "776;919;413;191", "wc_reply_reviewers": "32;33;96;14", "wc_reply_authors": "1995;644;1573;168", "reply_reviewers": "1;1;2;1", "reply_authors": "4;1;5;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 130.75, 72.56850212041034 ], "wc_strengths_avg": [ 78.0, 31.551545128567 ], "wc_weaknesses_avg": [ 208.25, 180.39869040544613 ], "wc_questions_avg": [ 157.75, 154.22771313872224 ], "wc_review_avg": [ 574.75, 288.2866412097515 ], "wc_reply_reviewers_avg": [ 43.75, 31.09963826156182 ], "wc_reply_authors_avg": [ 1095.0, 724.778241947149 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.7853571071357126 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.16012815380508713, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15050195618124764063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3UWuFoksGb", "pdf": "https://openreview.net/pdf?id=3UWuFoksGb", "email": "stanford.edu;ucsd.edu;stanford.edu;mit.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Stanford University;University of California, San Diego;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.ucsd.edu;https://web.mit.edu", "aff_unique_abbr": "Stanford;UCSD;MIT", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Stanford;San Diego;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fusion Is Not Enough: Single Modal Attacks on Fusion Models for 3D Object Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19509", "id": "3VD4PNEt5q", "author_site": "Zhiyuan Cheng, Hongjun Choi, Shiwei Feng, James Liang, Guanhong Tao, Dongfang Liu, Michael Zuzak, Xiangyu Zhang", "tldr": "", "abstract": "Multi-sensor fusion (MSF) is widely used in autonomous vehicles (AVs) for perception, particularly for 3D object detection with camera and LiDAR sensors. The purpose of fusion is to capitalize on the advantages of each modality while minimizing its weaknesses. Advanced deep neural network (DNN)-based fusion techniques have demonstrated the exceptional and industry-leading performance. Due to the redundant information in multiple modalities, MSF is also recognized as a general defence strategy against adversarial attacks. \nIn this paper, we attack fusion models from the camera modality that is considered to be of lesser importance in fusion but is more affordable for attackers. We argue that the weakest link of fusion models depends on their most vulnerable modality and propose an attack framework that targets advanced camera-LiDAR fusion-based 3D object detection models through camera-only adversarial attacks. \nOur approach employs a two-stage optimization-based strategy that first thoroughly evaluates vulnerable image areas under adversarial attacks, and then applies dedicated attack strategies for different fusion models to generate deployable patches. The evaluations with six advanced camera-LiDAR fusion models and one camera-only model indicate that our attacks successfully compromise all of them. Our approach can either decrease the mean average precision (mAP) of detection performance from 0.824 to 0.353 or degrade the detection score of a target object from 0.728 to 0.156, demonstrating the efficacy of our proposed attack framework. Code is available.", "keywords": "Adversarial Attacks;3D Object Detection;Autonomous Driving", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/2bd5ecf53634e87ceeecc8fd1736137e4820d36d.zip", "author": "Zhiyuan Cheng;Hongjun Choi;Shiwei Feng;James Chenhao Liang;Guanhong Tao;Dongfang Liu;Michael Zuzak;Xiangyu Zhang", "authorids": "~Zhiyuan_Cheng2;~Hongjun_Choi3;~Shiwei_Feng1;~James_Chenhao_Liang1;~Guanhong_Tao1;~Dongfang_Liu1;~Michael_Zuzak1;~Xiangyu_Zhang3", "gender": "M;;M;M;;;Not Specified;M", "homepage": "https://bob-cheng.github.io;https://hongjun9.github.io/;https://www.cs.purdue.edu/homes/feng292/;https://jamesliang819.github.io/;;https://www.rit.edu/directory/dxleec-dongfang-liu;https://mzuzak.github.io/;https://www.cs.purdue.edu/homes/xyzhang", "dblp": "324/1963;;138/9141-2;323/3403;;;;", "google_scholar": "dVchB-gAAAAJ;;https://scholar.google.com/citations?hl=en;cR8m4CcAAAAJ;;uICY0vEAAAAJ;dESHYYcAAAAJ;PXbu1wIAAAAJ", "orcid": "0000-0001-7280-6079;;0000-0001-6959-4327;;;;;", "linkedin": "bobchengzy/;;swfeng98/;;;;michael-zuzak/;", "or_profile": "~Zhiyuan_Cheng2;~Hongjun_Choi3;~Shiwei_Feng1;~James_Chenhao_Liang1;~Guanhong_Tao1;~Dongfang_Liu1;~Michael_Zuzak1;~Xiangyu_Zhang3", "aff": "Purdue University;Daegu Gyeongbuk Institute of Science and Technology;Purdue University;Rochester Institute of Technology;;Rochester Institute of Technology;Rochester Institute of Technology;Purdue University", "aff_domain": "purdue.edu;dgist.ac.kr;cs.purdue.edu;rit.edu;;rit.edu;rit.edu;cs.purdue.edu", "position": "PhD student;Assistant Professor;PhD student;PhD student;;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncheng2024fusion,\ntitle={Fusion Is Not Enough: Single Modal Attacks on Fusion Models for 3D Object Detection},\nauthor={Zhiyuan Cheng and Hongjun Choi and Shiwei Feng and James Chenhao Liang and Guanhong Tao and Dongfang Liu and Michael Zuzak and Xiangyu Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3VD4PNEt5q}\n}", "github": "", "project": "", "reviewers": "qqbx;DXNR;QA2X;nx8r", "pdf_size": 12839495, "rating": "5;6;6;8", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "contribution": "3;2;3;3", "presentation": "3;2;4;3", "wc_summary": "65;50;75;31", "wc_strengths": "44;52;152;36", "wc_weaknesses": "422;145;190;124", "wc_questions": "142;59;191;12", "wc_review": "673;306;608;203", "wc_reply_reviewers": "73;390;0;93", "wc_reply_authors": "2262;1963;1134;867", "reply_reviewers": "1;4;0;2", "reply_authors": "4;7;2;3", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 55.25, 16.588776326179094 ], "wc_strengths_avg": [ 71.0, 47.10626285325551 ], "wc_weaknesses_avg": [ 220.25, 118.89570009045744 ], "wc_questions_avg": [ 101.0, 69.76030389842063 ], "wc_review_avg": [ 447.5, 197.7454171403221 ], "wc_reply_reviewers_avg": [ 139.0, 148.9916105020682 ], "wc_reply_authors_avg": [ 1556.5, 573.7789208397255 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 4.0, 1.8708286933869707 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8293660199150583941&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=3VD4PNEt5q", "pdf": "https://openreview.net/pdf?id=3VD4PNEt5q", "email": "purdue.edu;dgist.ac.kr;cs.purdue.edu;rit.edu;;rit.edu;rit.edu;cs.purdue.edu", "author_num": 8, "aff_unique_index": "0;1;0;2;2;2;0", "aff_unique_norm": "Purdue University;Daegu Gyeongbuk Institute of Science and Technology;Rochester Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.purdue.edu;https://www.dgist.ac.kr;https://www.rit.edu", "aff_unique_abbr": "Purdue;DGIST;RIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Daegu", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "LEMON: Lossless model expansion", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19508", "id": "3Vw7DQqq7U", "author_site": "Yite Wang, Jiahao Su, Hanlin Lu, Cong Xie, Tianyi Liu, Jianbo Yuan, Haibin Lin, Ruoyu Sun, Hongxia Yang", "tldr": "", "abstract": "Scaling of deep neural networks, especially Transformers, is pivotal for their surging performance and has further led to the emergence of sophisticated reasoning capabilities in foundation models.\nSuch scaling generally requires training large models from scratch with random initialization, failing to leverage the knowledge acquired by their smaller counterparts, which are already resource-intensive to obtain.\nTo tackle this inefficiency, we present $\\textbf{L}$ossl$\\textbf{E}$ss $\\textbf{MO}$del Expansio$\\textbf{N}$ (LEMON), a recipe \nto initialize scaled models using the weights of their smaller but pre-trained counterparts. This is followed by model training with an optimized learning rate scheduler tailored explicitly for the scaled models, substantially reducing the training time compared to training from scratch.\nNotably, LEMON is versatile, ensuring compatibility with various network structures, including models like Vision Transformers and BERT.\nOur empirical results demonstrate that LEMON reduces computational costs by 56.7\\% for Vision Transformers and 33.2\\% for BERT when compared to training from scratch.", "keywords": "model growth;efficient deep learning;continual learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Yite Wang;Jiahao Su;Hanlin Lu;Cong Xie;Tianyi Liu;Jianbo Yuan;Haibin Lin;Ruoyu Sun;Hongxia Yang", "authorids": "~Yite_Wang1;~Jiahao_Su1;~Hanlin_Lu1;~Cong_Xie1;~Tianyi_Liu2;~Jianbo_Yuan1;~Haibin_Lin1;~Ruoyu_Sun1;~Hongxia_Yang2", "gender": "M;M;M;M;M;M;;;F", "homepage": "https://yitewang.github.io/;;;https://congxie1108.github.io/;https://sites.google.com/view/tianyiliu/home;;;https://ruoyus.github.io/;https://www4.comp.polyu.edu.hk/~hongxyang/", "dblp": "317/0407;;31/7172;;;134/6790;;30/9879-1;", "google_scholar": "wEGgxUIAAAAJ;z4AEqYkAAAAJ;https://scholar.google.at/citations?user=UbWokRoAAAAJ;pIPJUJMAAAAJ;;https://scholar.google.com/citations?hl=en;;PsfzbCMAAAAJ;iJlC5mMAAAAJ", "orcid": ";;;;;;;;", "linkedin": "yite-wang-261057140/;jiahaosu-umd/;;;;;;;", "or_profile": "~Yite_Wang1;~Jiahao_Su1;~Hanlin_Lu1;~Cong_Xie1;~Tianyi_Liu2;~Jianbo_Yuan1;~Haibin_Lin1;~Ruoyu_Sun1;~Hongxia_Yang2", "aff": "University of Illinois, Urbana Champaign;Amazon;ByteDance;ByteDance Inc.;Amazon;Bytedance;;The Chinese University of Hong Kong;ByteDance Inc.", "aff_domain": "illinois.edu;amazon.com;bytedance.com;bytedance.com;amazon.com;bytedance.com;;cuhk.edu.cn;bytedance.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nwang2024lemon,\ntitle={{LEMON}: Lossless model expansion},\nauthor={Yite Wang and Jiahao Su and Hanlin Lu and Cong Xie and Tianyi Liu and Jianbo Yuan and Haibin Lin and Ruoyu Sun and Hongxia Yang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3Vw7DQqq7U}\n}", "github": "", "project": "", "reviewers": "JSXE;u5r2;XnGd;pAyq", "pdf_size": 898077, "rating": "6;6;8;8", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "contribution": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "185;96;62;119", "wc_strengths": "136;55;77;132", "wc_weaknesses": "125;221;56;155", "wc_questions": "42;6;92;7", "wc_review": "488;378;287;413", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "568;927;652;579", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 115.5, 44.95831402532795 ], "wc_strengths_avg": [ 100.0, 34.90701935141412 ], "wc_weaknesses_avg": [ 139.25, 59.29744935492588 ], "wc_questions_avg": [ 36.75, 35.03837182290296 ], "wc_review_avg": [ 391.5, 72.2443769438148 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 681.5, 145.36935715617648 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16616827554209479908&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3Vw7DQqq7U", "pdf": "https://openreview.net/pdf?id=3Vw7DQqq7U", "email": "illinois.edu;amazon.com;bytedance.com;bytedance.com;amazon.com;bytedance.com;;cuhk.edu.cn;bytedance.com", "author_num": 9, "aff_unique_index": "0;1;2;2;1;2;3;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Amazon;ByteDance;Chinese University of Hong Kong", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://illinois.edu;https://www.amazon.com;https://www.bytedance.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "UIUC;Amazon;ByteDance;CUHK", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Urbana-Champaign;;Hong Kong SAR", "aff_country_unique_index": "0;0;1;1;0;1;1;1", "aff_country_unique": "United States;China" }, { "id": "3WB5hT27zf", "title": "Partial Optimal Transport for Open-set Semi-supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Semi-supervised learning (SSL) is a machine learning paradigm that leverages both labeled and unlabeled data to improve the performance of learning tasks. However, SSL methods make an assumption that the label spaces of labeled and unlabeled data are identical, which may not hold in open-world applications, where the unlabeled data may contain novel categories that were not present in the labeled training data, essentially outliers. This paper tackles open-set semi-supervised learning (OSSL), where detecting these outliers, or out-of-distribution (OOD) data, is critical. In particular, we model the OOD detection problem in OSSL as a partial optimal transport (POT) problem. With the theory of POT, we devise a mass score function (MSF) to measure the likelihood of a sample being an outlier during training. Then, a novel OOD loss is proposed, which allows to adapt the off-the-shelf SSL methods with POT into OSSL settings in an end-to-end training manner.\nFurthermore, we conduct extensive experiments on multiple datasets and OSSL configurations, demonstrating that our method consistently achieves superior or competitive results compared to existing approaches.", "keywords": "Open-set problem;Optimal transport;Semi-supervised learning;Out-of-distribution detection", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Yilong Ren;Chuanwen Feng;Xike Xie;S Kevin Zhou", "authorids": "~Yilong_Ren1;~Chuanwen_Feng1;~Xike_Xie1;~S_Kevin_Zhou1", "gender": "M;M;M;M", "homepage": "https://github.com/ryl0427;https://optstats.github.io;http://staff.ustc.edu.cn/~xkxie;", "dblp": ";;64/1308;57/98", "google_scholar": ";;;8eNm2GMAAAAJ", "orcid": ";;;0000-0002-6881-4444", "linkedin": ";;;s-kevin-zhou-231a094b/", "or_profile": "~Yilong_Ren1;~Chuanwen_Feng1;~Xike_Xie1;~S_Kevin_Zhou1", "aff": "University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn", "position": "MS student;;Research Professor;Full Professor", "bibtex": "@misc{\nren2024partial,\ntitle={Partial Optimal Transport for Open-set Semi-supervised Learning},\nauthor={Yilong Ren and Chuanwen Feng and Xike Xie and S Kevin Zhou},\nyear={2024},\nurl={https://openreview.net/forum?id=3WB5hT27zf}\n}", "github": "", "project": "", "reviewers": "2Syu;RKEQ;uAiP", "site": "https://openreview.net/forum?id=3WB5hT27zf", "pdf_size": 369667, "rating": "5;5;6", "confidence": "4;4;3", "soundness": "3;3;4", "contribution": "2;2;4", "presentation": "3;3;2", "wc_summary": "86;52;46", "wc_strengths": "56;40;30", "wc_weaknesses": "381;268;200", "wc_questions": "90;6;99", "wc_review": "613;366;375", "wc_reply_reviewers": "157;10;13", "wc_reply_authors": "912;807;286", "reply_reviewers": "1;1;1", "reply_authors": "3;2;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 61.333333333333336, 17.613126418163876 ], "wc_strengths_avg": [ 42.0, 10.708252269472673 ], "wc_weaknesses_avg": [ 283.0, 74.65029582437478 ], "wc_questions_avg": [ 65.0, 41.88078318274385 ], "wc_review_avg": [ 451.3333333333333, 114.37462811111368 ], "wc_reply_reviewers_avg": [ 60.0, 68.60029154456998 ], "wc_reply_authors_avg": [ 668.3333333333334, 273.72776419079025 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0HmSht6QT7kJ:scholar.google.com/&scioq=Partial+Optimal+Transport+for+Open-set+Semi-supervised+Learning&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "3WDFykPhM4", "title": "Generalized Supervised Contrastive Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "With the recent promising results of contrastive learning in the self-supervised learning paradigm, supervised contrastive learning has successfully extended these contrastive approaches to supervised contexts, outperforming cross-entropy on various datasets. However, supervised contrastive learning inherently employs label information in a binary form--either positive or negative--using a one-hot target vector. This structure struggles to adapt to methods that exploit label information as a probability distribution, such as CutMix and knowledge distillation. In this paper, we introduce a generalized supervised contrastive loss, which measures cross-entropy between label similarity and latent similarity. This concept enhances the capabilities of supervised contrastive loss by fully utilizing the label distribution and enabling the adaptation of various existing techniques for training modern neural networks. Leveraging this generalized supervised contrastive loss, we construct a tailored framework: the Generalized Supervised Contrastive Learning (GenSCL). Compared to existing contrastive learning frameworks, GenSCL incorporates additional enhancements, including advanced image-based regularization techniques and an arbitrary teacher classifier. When applied to ResNet50 with the Momentum Contrast technique, GenSCL achieves a top-1 accuracy of 77.3% on ImageNet, a 4.1% relative improvement over traditional supervised contrastive learning. Moreover, our method establishes new state-of-the-art accuracies of 98.2% and 87.0% on CIFAR10 and CIFAR100 respectively when applied to ResNet50, marking the highest reported figures for this architecture.", "keywords": "Contrastive Learning;Knowledge Distillation;Representation Learning;Image-Based Regularization;Label Smoothing", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Jaewon Kim;Hyeok Jong Lee;Jooyoung Chang;Sang Min Park", "authorids": "~Jaewon_Kim2;~Hyeok_Jong_Lee1;~Jooyoung_Chang3;~Sang_Min_Park2", "gender": "M;;M;M", "homepage": "https://kiimmm.github.io/;https://hyeok-jong.github.io/;http://biomed.snu.ac.kr/main/tmpl/sub_main.php?m_cd=8&m_id=0103&sp=2&wr_id=73;https://scholar.google.com/citations?user=_Y2v3BMAAAAJ", "dblp": "00/4936;;67/4835;258/4663", "google_scholar": ";;Km1BklMAAAAJ;_Y2v3BMAAAAJ", "orcid": ";;;0000-0002-8586-0645", "linkedin": "jaewon-kim-16ba651b6/;;;jooyoungchang", "or_profile": "~Jaewon_Kim2;~Hyeok_Jong_Lee1;~Sang_Min_Park2;~Jooyoung_Chang2", "aff": "Seoul National University Graduate School;Seoul National University Graduate School;;XAIMED Co. Ltd.", "aff_domain": "snu.ac.kr;snu.ac.kr;;ixaimed.com", "position": "PhD student;MS student;;Researcher", "bibtex": "@misc{\nkim2024generalized,\ntitle={Generalized Supervised Contrastive Learning},\nauthor={Jaewon Kim and Hyeok Jong Lee and Jooyoung Chang and Sang Min Park},\nyear={2024},\nurl={https://openreview.net/forum?id=3WDFykPhM4}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=3WDFykPhM4", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1366932886400978211&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Seoul National University;XAIMED Co. Ltd.", "aff_unique_dep": "Graduate School;", "aff_unique_url": "https://www.snu.ac.kr;", "aff_unique_abbr": "SNU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea;" }, { "title": "Continual Learning in the Presence of Spurious Correlations: Analyses and a Simple Baseline", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19507", "id": "3Y7r6xueJJ", "author_site": "Donggyu Lee, Sangwon Jung, Taesup Moon", "tldr": "", "abstract": "Most continual learning (CL) algorithms have focused on tackling the stability-plasticity dilemma, that is, the challenge of preventing the forgetting of past tasks while learning new ones. However, we argue that they have overlooked the impact of knowledge transfer when the training dataset of a certain task is biased \u2014 namely, when the dataset contains some spurious correlations that can overly influence the prediction rule of a model. In that case, how would the dataset bias of a certain task affect the prediction rules of a CL model for future or past tasks? In this work, we carefully design systematic experiments using three benchmark datasets to answer the question from our empirical findings. Specifically, we first show through two-task CL experiments that standard CL methods, which are oblivious of the dataset bias, can transfer bias from one task to another, both forward and backward. Moreover, we find out this transfer is exacerbated depending on whether the CL methods focus on stability or plasticity. We then present that the bias is also transferred and even accumulates in longer task sequences. Finally, we offer a standardized experimental setup and a simple, yet strong plug-in baseline method, dubbed as group-class Balanced Greedy Sampling (BGS), which are utilized for the development of more advanced bias-aware CL methods.", "keywords": "continual learning;bias;spurious correlation", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Donggyu Lee;Sangwon Jung;Taesup Moon", "authorids": "~Donggyu_Lee1;~Sangwon_Jung1;~Taesup_Moon1", "gender": "M;M;", "homepage": "https://sites.google.com/view/dqlee/%ED%99%88;https://successful-humor-4db.notion.site/Sangwon-Jung-70109a49767a470092a6ee0d02c78313;https://mindlab-snu.github.io/people/pi/", "dblp": "142/3306;236/3698;05/4084", "google_scholar": "7syHfVAAAAAJ;WdC_a5IAAAAJ;lQlioBoAAAAJ", "orcid": ";;0000-0002-9257-6503", "linkedin": ";;", "or_profile": "~Donggyu_Lee1;~Sangwon_Jung1;~Taesup_Moon1", "aff": "Sungkyunkwan University;Seoul National University;Seoul National University", "aff_domain": "skku.edu;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nlee2024continual,\ntitle={Continual Learning in the Presence of Spurious Correlations: Analyses and a Simple Baseline},\nauthor={Donggyu Lee and Sangwon Jung and Taesup Moon},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3Y7r6xueJJ}\n}", "github": "", "project": "", "reviewers": "C1FY;A8St;UUYC;6w81", "pdf_size": 3530484, "rating": "5;6;6;8", "confidence": "3;3;4;4", "soundness": "1;3;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "90;62;95;106", "wc_strengths": "1;73;154;88", "wc_weaknesses": "183;190;229;189", "wc_questions": "5;77;48;97", "wc_review": "279;402;526;480", "wc_reply_reviewers": "0;20;113;26", "wc_reply_authors": "574;1019;756;1056", "reply_reviewers": "0;1;1;1", "reply_authors": "2;4;2;3", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.25, 16.223054582907622 ], "wc_strengths_avg": [ 79.0, 54.37370688117557 ], "wc_weaknesses_avg": [ 197.75, 18.239723133863627 ], "wc_questions_avg": [ 56.75, 34.585943676586304 ], "wc_review_avg": [ 421.75, 93.57984558653642 ], "wc_reply_reviewers_avg": [ 39.75, 43.37265843823733 ], "wc_reply_authors_avg": [ 851.25, 197.48591721943112 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3613202752424766689&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "openreview": "https://openreview.net/forum?id=3Y7r6xueJJ", "pdf": "https://openreview.net/pdf?id=3Y7r6xueJJ", "email": "skku.edu;snu.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Sungkyunkwan University;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.skku.edu;https://www.snu.ac.kr", "aff_unique_abbr": "SKKU;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "PoSE: Efficient Context Window Extension of LLMs via Positional Skip-wise Training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19506", "id": "3Z1gxuAQrA", "author_site": "Dawei Zhu, Nan Yang, Liang Wang, Yifan Song, Wenhao Wu, Furu Wei, Sujian Li", "tldr": "", "abstract": "Large Language Models (LLMs) are trained with a pre-defined context length, restricting their use in scenarios requiring long inputs. Previous efforts for adapting LLMs to a longer length usually requires fine-tuning with this target length (Full-length fine-tuning), suffering intensive training cost. To decouple train length from target length for efficient context window extension, we propose Positional Skip-wisE (PoSE) training that smartly simulates long inputs using a fixed context window. This is achieved by first dividing the original context window into several chunks, then designing distinct skipping bias terms to manipulate the position indices of each chunk. These bias terms and the lengths of each chunk are altered for every training example, allowing the model to adapt to all positions within target length. Experimental results show that PoSE greatly reduces memory and time overhead compared with Full-length fine-tuning, with minimal impact on performance. Leveraging this advantage, we have successfully extended the LLaMA model to 128k tokens using a 2k training context window. Furthermore, we empirically confirm that PoSE is compatible with all RoPE-based LLMs and position interpolation strategies. Notably, our method can potentially support infinite length, limited only by memory usage in inference. With ongoing progress for efficient inference, we believe PoSE can further scale the context window beyond 128k.", "keywords": "context window extension;efficiency;positional skip-wise training", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Dawei Zhu;Nan Yang;Liang Wang;Yifan Song;Wenhao Wu;Furu Wei;Sujian Li", "authorids": "~Dawei_Zhu2;~Nan_Yang5;~Liang_Wang2;~Yifan_Song2;~Wenhao_Wu7;~Furu_Wei1;~Sujian_Li1", "gender": ";;M;M;M;M;F", "homepage": ";;https://github.com/intfloat;https://yifan-song793.github.io/;;https://www.microsoft.com/en-us/research/people/fuwei/;https://pku-tangent.github.io/", "dblp": ";https://dblp.uni-trier.de/pers/hd/y/Yang_0002:Nan;56/4499;;;72/5870;05/4288", "google_scholar": "oD2HPaYAAAAJ;InAQ3o0AAAAJ;NfJbKJ4AAAAJ;;LZFvCrwAAAAJ;G-V1VpwAAAAJ;https://scholar.google.com.tw/citations?user=RvBDhSwAAAAJ", "orcid": ";;0000-0003-4664-7136;;;;", "linkedin": ";;;;;;", "or_profile": "~Dawei_Zhu2;~Nan_Yang5;~Liang_Wang2;~Yifan_Song2;~Wenhao_Wu7;~Furu_Wei1;~Sujian_Li1", "aff": "Peking University;Microsoft Research Asia;Microsoft Research;Peking University;Peking University;Microsoft Research;Peking University", "aff_domain": "pku.edu.cn;microsoft.com;microsoft.com;pku.edu.cn;pku.edu.cn;microsoft.com;pku.edu.cn", "position": "PhD student;Researcher;Researcher;PhD student;PhD student;Distinguished Scientist;Associate Professor", "bibtex": "@inproceedings{\nzhu2024pose,\ntitle={Po{SE}: Efficient Context Window Extension of {LLM}s via Positional Skip-wise Training},\nauthor={Dawei Zhu and Nan Yang and Liang Wang and Yifan Song and Wenhao Wu and Furu Wei and Sujian Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3Z1gxuAQrA}\n}", "github": "", "project": "", "reviewers": "NXZX;VfnN;T2GT;UVnU", "pdf_size": 496870, "rating": "6;6;6;6", "confidence": "3;4;4;5", "soundness": "2;3;3;4", "contribution": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "35;174;71;45", "wc_strengths": "27;91;59;84", "wc_weaknesses": "67;131;121;113", "wc_questions": "5;178;18;4", "wc_review": "134;574;269;246", "wc_reply_reviewers": "29;18;0;15", "wc_reply_authors": "320;938;955;416", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.25, 55.137895317104736 ], "wc_strengths_avg": [ 65.25, 25.083610186733488 ], "wc_weaknesses_avg": [ 108.0, 24.515301344262525 ], "wc_questions_avg": [ 51.25, 73.38724344189527 ], "wc_review_avg": [ 305.75, 163.0772439673911 ], "wc_reply_reviewers_avg": [ 15.5, 10.35615758860399 ], "wc_reply_authors_avg": [ 657.25, 291.2965628015545 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4361802516457215180&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3Z1gxuAQrA", "pdf": "https://openreview.net/pdf?id=3Z1gxuAQrA", "email": "pku.edu.cn;microsoft.com;microsoft.com;pku.edu.cn;pku.edu.cn;microsoft.com;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;0;0;1;0", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "Peking U;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "3ZDEwhAlCO", "title": "ILPO-NET: convolution network for the recognition of arbitrary volumetric patterns", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modern spatial data analysis is built on the effective recognition of spatial patterns and learning their hierarchy. Applications to real-world volumetric data require techniques that ensure invariance not only to shifts but also to pattern rotations. While traditional methods can readily achieve translational invariance, rotational invariance possesses multiple challenges and remains an active area of research.\nHere, we present ILPO-Net (Invariant to Local Patterns Orientation Network), a novel approach to handling arbitrarily shaped patterns with the convolutional operation inherently invariant to local spatial pattern orientations. Our architecture seamlessly integrates the new convolution operator and, when benchmarked on diverse volumetric datasets such as MedMNIST and CATH, demonstrates superior performance over the baselines with significantly reduced parameter counts\u2014up to 1000 times fewer in the case of MedMNIST. Beyond these demonstrations, ILPO-Net's rotational invariance paves the way for other applications across multiple disciplines.", "keywords": "Volumetric data;3DCNN;pattern recognition;rotational invariance;SO(3) invariance;SE(3) invariance", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/89e246cac8b5cbcf4173f5ec0bc45a7adb4016e6.zip", "author": "Dmitrii Zhemchuzhnikov;Sergei Grudinin", "authorids": "~Dmitrii_Zhemchuzhnikov1;~Sergei_Grudinin1", "gender": "M;M", "homepage": ";https://grulab.imag.fr/", "dblp": ";", "google_scholar": "hf04h7IAAAAJ;8-TQzFIAAAAJ", "orcid": ";0000-0002-1903-7220", "linkedin": ";", "or_profile": "~Dmitrii_Zhemchuzhnikov1;~Sergei_Grudinin1", "aff": "Universit\u00e9 Grenoble Alpes;CNRS", "aff_domain": "univ-grenoble-alpes.fr;cnrs.fr", "position": "PhD student;Principal Researcher", "bibtex": "@misc{\nzhemchuzhnikov2024ilponet,\ntitle={{ILPO}-{NET}: convolution network for the recognition of arbitrary volumetric patterns},\nauthor={Dmitrii Zhemchuzhnikov and Sergei Grudinin},\nyear={2024},\nurl={https://openreview.net/forum?id=3ZDEwhAlCO}\n}", "github": "", "project": "", "reviewers": "ckuL;KeGD;PtnA;sNRn", "site": "https://openreview.net/forum?id=3ZDEwhAlCO", "pdf_size": 8867013, "rating": "3;5;5;5", "confidence": "3;3;2;4", "soundness": "3;3;2;2", "contribution": "1;3;2;3", "presentation": "2;3;2;2", "wc_summary": "115;49;61;51", "wc_strengths": "47;75;51;39", "wc_weaknesses": "359;53;60;275", "wc_questions": "140;96;4;65", "wc_review": "661;273;176;430", "wc_reply_reviewers": "0;10;0;0", "wc_reply_authors": "1158;478;133;957", "reply_reviewers": "0;1;0;0", "reply_authors": "3;2;1;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.0, 26.94438717061496 ], "wc_strengths_avg": [ 53.0, 13.416407864998739 ], "wc_weaknesses_avg": [ 186.75, 133.61582054532315 ], "wc_questions_avg": [ 76.25, 49.499368682842814 ], "wc_review_avg": [ 385.0, 183.3207571444107 ], "wc_reply_reviewers_avg": [ 2.5, 4.330127018922194 ], "wc_reply_authors_avg": [ 681.5, 401.62700357421187 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JLQ4nKfx_2QJ:scholar.google.com/&scioq=ILPO-NET:+convolution+network+for+the+recognition+of+arbitrary+volumetric+patterns&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 Grenoble Alpes;Centre National de la Recherche Scientifique", "aff_unique_dep": ";", "aff_unique_url": "https://www.univ-grenoble-alpes.fr;https://www.cnrs.fr", "aff_unique_abbr": "UGA;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "3ZWdgOvmAA", "title": "LumiNet: The Bright Side of Perceptual Knowledge Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "In knowledge distillation research, feature-based methods have dominated due to their ability to effectively tap into extensive teacher models. In contrast, logit-based approaches are considered to be less adept at extracting hidden 'dark knowledge' from teachers. To bridge this gap, we present LumiNet, a novel knowledge-transfer algorithm designed to enhance logit-based distillation. We introduce a perception matrix that aims to recalibrate logits through adjustments based on the model's representation capability. By meticulously analyzing intra-class dynamics, LumiNet reconstructs more granular inter-class relationships, enabling the student model to learn a richer breadth of knowledge. Both teacher and student models are mapped onto this refined matrix, with the student's goal being to minimize representational discrepancies. Rigorous testing on benchmark datasets (CIFAR-100, ImageNet, and MSCOCO) attests to LumiNet's efficacy, revealing its competitive edge over leading feature-based methods. Moreover, in exploring the realm of transfer learning, we assess how effectively the student model, trained using our method, adapts to downstream tasks. Notably, when applied to Tiny ImageNet, the transferred features exhibit remarkable performance, further underscoring LumiNet's versatility and robustness in diverse settings. With LumiNet, we hope to steer the research discourse towards a renewed interest in the latent capabilities of logit-based knowledge distillation.", "keywords": "Knowledge Distillation;Model Compression;Transfer Learning;Computer Vision", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/cd7faf2661b6984e380d9fe3d317671e7cd02ec6.pdf", "author": "Md. Ismail Hossain;M M Lutfe Elahi;Sameera Ramasinghe;Ali Cheraghian;Fuad Rahman;Nabeel Mohammed;Shafin Rahman", "authorids": "~Md._Ismail_Hossain1;~M_M_Lutfe_Elahi1;~Sameera_Ramasinghe1;~Ali_Cheraghian2;~Fuad_Rahman1;~Nabeel_Mohammed1;~Shafin_Rahman1", "gender": "M;;M;M;M;M;M", "homepage": "https://sites.google.com/view/ismailhossain31415;;;https://alichr.github.io/;;http://ece.northsouth.edu/people/dr-nabeel-mohammed/;https://sites.google.com/site/rshafin", "dblp": "28/10457;;181/4514;;11/478;127/2798;95/10398", "google_scholar": "8f7b5YIAAAAJ;;https://scholar.google.com.au/citations?user=-j0m9aMAAAAJ;QT0EXIkAAAAJ;D0riK2QAAAAJ;https://scholar.google.com.au/citations?hl=en;Pe8C-SUAAAAJ", "orcid": ";;;;0000-0002-8670-7124;0000-0002-7661-3570;0000-0001-7169-0318", "linkedin": "md-ismail-hossain-771423167/;;;ali-cheraghian-006aa26b/;https://www.linkedin.com/pub/fuad-rahman/1/30/6ba;;rshafin/", "or_profile": "~Md._Ismail_Hossain1;~M_M_Lutfe_Elahi1;~Sameera_Ramasinghe1;~Ali_Cheraghian2;~Fuad_Rahman1;~Nabeel_Mohammed1;~Shafin_Rahman1", "aff": "North South University;;Amazon;CSIRO;University of Arizona;North South University;North South University", "aff_domain": "northsouth.edu;;amazon.com;data61.csiro.au;arizona.edu;northsouth.edu;northsouth.edu", "position": "Researcher;;Researcher;Researcher;Adjunct Professor;Associate Professor;Assistant Professor", "bibtex": "@misc{\nhossain2024luminet,\ntitle={LumiNet: The Bright Side of Perceptual Knowledge Distillation},\nauthor={Md. Ismail Hossain and M M Lutfe Elahi and Sameera Ramasinghe and Ali Cheraghian and Fuad Rahman and Nabeel Mohammed and Shafin Rahman},\nyear={2024},\nurl={https://openreview.net/forum?id=3ZWdgOvmAA}\n}", "github": "", "project": "", "reviewers": "8pKb;tkRa;7mGH;3qnL", "site": "https://openreview.net/forum?id=3ZWdgOvmAA", "pdf_size": 2931974, "rating": "5;5;5;5", "confidence": "3;5;3;5", "soundness": "2;2;2;2", "contribution": "2;2;2;2", "presentation": "3;2;2;2", "wc_summary": "60;53;27;52", "wc_strengths": "21;21;39;32", "wc_weaknesses": "50;87;185;75", "wc_questions": "16;11;243;2", "wc_review": "147;172;494;161", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "627;597;1131;624", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 48.0, 12.509996003196804 ], "wc_strengths_avg": [ 28.25, 7.660776723022281 ], "wc_weaknesses_avg": [ 99.25, 51.275603360662664 ], "wc_questions_avg": [ 68.0, 101.16076314461057 ], "wc_review_avg": [ 243.5, 144.89737747799302 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 744.75, 223.30738344264392 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11737768826256174744&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "North South University;Amazon;Commonwealth Scientific and Industrial Research Organisation;University of Arizona", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.northsouth.edu/;https://www.amazon.com;https://www.csiro.au;https://www.arizona.edu", "aff_unique_abbr": "NSU;Amazon;CSIRO;UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;0;0", "aff_country_unique": "Bangladesh;United States;Australia" }, { "id": "3Zm6wR5Mvc", "title": "LangNav: Language as a Perceptual Representation for Navigation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We explore the use of language as a perceptual representation for vision-and-language navigation. Our approach uses off-the-shelf vision systems (for image captioning and object detection) to convert an agent's egocentric panoramic view at each time step into natural language descriptions. We then finetune a pretrained language model to select an action, based on the current view and the trajectory history, that would best fulfill the navigation instructions. In contrast to the standard setup which adapts a pretrained language model to work directly with continuous visual features from pretrained vision models, our approach instead uses (discrete) language as the perceptual representation. We explore two use cases of our language-based navigation ours approach on the R2R vision-and-language navigation benchmark: generating synthetic trajectories from a prompted large language model (GPT-4) with which to finetune a smaller language model; and sim-to-real transfer where we transfer a policy learned on a simulated environment (ALFRED) to a real-world environment (R2R). Our approach is found to improve upon strong baselines that rely on visual features in settings where only a few gold trajectories (10-100) are available, demonstrating the potential of using language as a perceptual representation for learning navigation agents.", "keywords": "Language Models;Vision-and-Language Navigation;Learn from Synthetic Data;Sim-to-Real", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/e323f53fccd0c1df76c4e891a3b9910cdd58dc9d.pdf", "author": "Bowen Pan;Rameswar Panda;SouYoung Jin;Rogerio Feris;Aude Oliva;Phillip Isola;Yoon Kim", "authorids": "~Bowen_Pan2;~Rameswar_Panda1;~SouYoung_Jin2;~Rogerio_Feris1;~Aude_Oliva1;~Phillip_Isola1;~Yoon_Kim1", "gender": "M;M;F;M;;M;", "homepage": "http://people.csail.mit.edu/bpan/;https://rpand002.github.io/;http://souyoungjin.com;http://rogerioferis.com;;http://web.mit.edu/phillipi/;https://people.csail.mit.edu/yoonkim/", "dblp": "188/1139;126/0986;225/4723;;;36/9988;", "google_scholar": "x9Tpbq8AAAAJ;_ySuu6gAAAAJ;_B-_CzYAAAAJ;xt3XLjcAAAAJ;;ROILf3EAAAAJ;n_ts4eYAAAAJ", "orcid": ";;;;;0000-0002-1411-6704;", "linkedin": ";;;;;phillip-isola-a9955b20/;", "or_profile": "~Bowen_Pan2;~Rameswar_Panda1;~SouYoung_Jin2;~Rogerio_Feris1;~Aude_Oliva1;~Phillip_Isola1;~Yoon_Kim1", "aff": "Massachusetts Institute of Technology;MIT-IBM Watson AI Lab;Dartmouth College;International Business Machines;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;ibm.com;dartmouth.edu;ibm.com;;mit.edu;mit.edu", "position": "PhD student;Research Scientist;Assistant Professor;Research Manager;;Associate Professor;Assistant Professor", "bibtex": "@misc{\npan2024langnav,\ntitle={LangNav: Language as a Perceptual Representation for Navigation},\nauthor={Bowen Pan and Rameswar Panda and SouYoung Jin and Rogerio Feris and Aude Oliva and Phillip Isola and Yoon Kim},\nyear={2024},\nurl={https://openreview.net/forum?id=3Zm6wR5Mvc}\n}", "github": "", "project": "", "reviewers": "z42V;PNyi;Sthf;MJeR;2DEF", "site": "https://openreview.net/forum?id=3Zm6wR5Mvc", "pdf_size": 2144034, "rating": "3;3;5;6;8", "confidence": "4;4;3;4;5", "soundness": "2;2;2;4;3", "contribution": "2;1;2;2;2", "presentation": "2;3;2;4;3", "wc_summary": "93;150;115;43;107", "wc_strengths": "78;31;71;65;185", "wc_weaknesses": "178;722;215;76;709", "wc_questions": "131;122;228;70;594", "wc_review": "480;1025;629;254;1595", "wc_reply_reviewers": "117;324;103;370;145", "wc_reply_authors": "1089;2154;1723;693;2811", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;5;4;2;6", "rating_avg": [ 5.0, 1.8973665961010275 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.8 ], "contribution_avg": [ 1.8, 0.4000000000000001 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 101.6, 34.811492355255325 ], "wc_strengths_avg": [ 86.0, 52.069184744914146 ], "wc_weaknesses_avg": [ 380.0, 277.7228834647948 ], "wc_questions_avg": [ 229.0, 189.50461735799473 ], "wc_review_avg": [ 796.6, 471.6670859833236 ], "wc_reply_reviewers_avg": [ 211.8, 112.16309553502882 ], "wc_reply_authors_avg": [ 1694.0, 752.0845697127418 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15739144564300280860&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Dartmouth College;International Business Machines Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.dartmouth.edu;https://www.ibm.com", "aff_unique_abbr": "MIT;Dartmouth;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Evaluating Language Model Agency Through Negotiations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19505", "id": "3ZqKxMHcAg", "author_site": "Tim R. Davidson, Veniamin Veselovsky, Michal Kosinski, Robert West", "tldr": "", "abstract": "We introduce an approach to evaluate language model (LM) agency using negotiation games. This approach better reflects real-world use cases and addresses some of the shortcomings of alternative LM benchmarks. Negotiation games enable us to study multi-turn, and cross-model interactions, modulate complexity, and side-step accidental evaluation data leakage. We use our approach to test six widely used and publicly accessible LMs, evaluating performance and alignment in both self-play and cross-play settings. Noteworthy findings include: (i) only closed-source models tested here were able to complete these tasks; (ii) cooperative bargaining games proved to be most challenging to the models; and (iii) even the most powerful models sometimes \"lose\" to weaker opponents.", "keywords": "language model evaluation;dynamic evaluation;alignment;cooperative AI;agency;evolving benchmarks;multi-agent interactions", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Tim Ruben Davidson;Veniamin Veselovsky;Michal Kosinski;Robert West", "authorids": "~Tim_Ruben_Davidson1;~Veniamin_Veselovsky1;~Michal_Kosinski1;~Robert_West1", "gender": ";M;Non-Binary;M", "homepage": ";https://vminvsky.com/;https://www.michalkosinski.com;https://dlab.epfl.ch/people/west/", "dblp": ";;03/10818;20/7441-1", "google_scholar": ";hFPL6gwAAAAJ;01-XV0YAAAAJ;ZiFn598AAAAJ", "orcid": ";;0000-0003-2936-4775;", "linkedin": ";;kosinskimichal/;", "or_profile": "~Tim_Ruben_Davidson1;~Veniamin_Veselovsky1;~Michal_Kosinski1;~Robert_West1", "aff": ";Department of Computer Science, Princeton University;Stanford University;EPFL - EPF Lausanne", "aff_domain": ";cs.princeton.edu;stanford.edu;epfl.ch", "position": ";PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ndavidson2024evaluating,\ntitle={Evaluating Language Model Agency Through Negotiations},\nauthor={Tim Ruben Davidson and Veniamin Veselovsky and Michal Kosinski and Robert West},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3ZqKxMHcAg}\n}", "github": "", "project": "", "reviewers": "pM8H;ZZpj;yVDW", "pdf_size": 1040448, "rating": "3;5;8", "confidence": "2;4;4", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "3;2;4", "wc_summary": "97;56;75", "wc_strengths": "32;13;63", "wc_weaknesses": "429;10;75", "wc_questions": "143;215;45", "wc_review": "701;294;258", "wc_reply_reviewers": "0;0;11", "wc_reply_authors": "2140;1454;504", "reply_reviewers": "0;0;1", "reply_authors": "3;2;1", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 76.0, 16.753109164172084 ], "wc_strengths_avg": [ 36.0, 20.607442021431645 ], "wc_weaknesses_avg": [ 171.33333333333334, 184.12012986694916 ], "wc_questions_avg": [ 134.33333333333334, 69.67224858020748 ], "wc_review_avg": [ 417.6666666666667, 200.8852629957929 ], "wc_reply_reviewers_avg": [ 3.6666666666666665, 5.185449728701348 ], "wc_reply_authors_avg": [ 1366.0, 670.78660292724 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8029550685469663, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11870300425326261130&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=3ZqKxMHcAg", "pdf": "https://openreview.net/pdf?id=3ZqKxMHcAg", "email": ";cs.princeton.edu;stanford.edu;epfl.ch", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Princeton University;Stanford University;EPFL", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.princeton.edu;https://www.stanford.edu;https://www.epfl.ch", "aff_unique_abbr": "Princeton;Stanford;EPFL", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Lausanne", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Switzerland" }, { "id": "3a505tMjGE", "title": "AVOID: Alleviating VAE's Overestimation in Unsupervised OOD Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep generative models (DGMs) aim at characterizing the distribution of the training set by maximizing the marginal likelihood of inputs \nin an unsupervised manner, making them a promising option for unsupervised out-of-distribution (OOD) detection.\nHowever, recent works have reported that DGMs often assign higher likelihoods to OOD data than in-distribution (ID) data, i.e., **overestimation**, leading to their failures in OOD detection.\nAlthough several pioneer works have tried to analyze this phenomenon, and some VAE-based methods have also attempted to alleviate this issue by modifying their score functions for OOD detection, the root cause of the overestimation in VAE has never been revealed to our best knowledge.\nTo fill this gap, this paper will provide a thorough theoretical analysis on the overestimation issue of VAE, and reveal that this phenomenon arises from two aspects: 1) the improper design of prior distribution; 2) the gap of dataset entropy-mutual integration (sum of dataset entropy and mutual information terms) between ID and OOD datasets.\nBased on these findings, we propose a novel score function to **A**lleviate **V**AE's **O**verestimation **I**n unsupervised OOD **D**etection, named ``**AVOID**'', which contains two novel techniques, specifically post-hoc prior and dataset entropy-mutual calibration.\nExperimental results verify our theoretical analysis, demonstrating that the proposed method is effective in alleviating overestimation and improving unsupervised OOD detection performance.", "keywords": "Unsupervised Out-of-Distribution Detection;VAE", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/5e71d3c32b265b8b2f53cffa81e9cd685502950f.zip", "author": "Yewen Li;Chaojie Wang;Xiaobo Xia;Hongxin Wei;Tongliang Liu;Bo An;Lei Feng;Xinrun Wang", "authorids": "~Yewen_Li1;~Chaojie_Wang1;~Xiaobo_Xia1;~Hongxin_Wei1;~Tongliang_Liu1;~Bo_An2;~Lei_Feng1;~Xinrun_Wang1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=W5796yEAAAAJ&hl=zh-CN;https://chaojiewang94.github.io/;https://xiaoboxia.github.io/;https://hongxin001.github.io/;https://tongliang-liu.github.io/;https://personal.ntu.edu.sg/boan/;https://lfeng1995.github.io/;https://rainwangphy.github.io/", "dblp": "55/2231;134/9314-1;242/8072;150/6350;150/6667;42/6178-1.html;76/847-6;199/6413", "google_scholar": "W5796yEAAAAJ;https://scholar.google.com/citations?hl=en;jRsugY0AAAAJ;cABH034AAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;PEEpuNwAAAAJ;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;ROANfPUAAAAJ", "orcid": "0009-0008-0073-123X;;;;;0000-0002-7064-7438;0000-0003-2839-5799;", "linkedin": ";;;;;;;", "or_profile": "~Yewen_Li1;~Chaojie_Wang1;~Xiaobo_Xia1;~Hongxin_Wei1;~Tongliang_Liu1;~Bo_An2;~Lei_Feng1;~Xinrun_Wang1", "aff": "Nanyang Technological University;Skywork AI;The University of Sydney;Southern University of Science and Technology;Mohamed bin Zayed University of Artificial Intelligence;Nanyang Technological University;Singapore University of Technology and Design;Nanyang Technological University", "aff_domain": "ntu.edu.sg;kunlun-inc.com;sydney.edu.au;sustech.edu.cn;mbzuai.ac.ae;ntu.edu.sg;sutd.edu.sg;ntu.edu.sg", "position": "PhD student;Researcher;PhD student;Assistant Professor;Affiliated Associate Professor;Full Professor;Assistant Professor;Postdoc", "bibtex": "@misc{\nli2024avoid,\ntitle={{AVOID}: Alleviating {VAE}'s Overestimation in Unsupervised {OOD} Detection},\nauthor={Yewen Li and Chaojie Wang and Xiaobo Xia and Hongxin Wei and Tongliang Liu and Bo An and Lei Feng and Xinrun Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=3a505tMjGE}\n}", "github": "", "project": "", "reviewers": "F6GH;jWXz;G5ek;ms1o", "site": "https://openreview.net/forum?id=3a505tMjGE", "pdf_size": 7588508, "rating": "6;6;6;6", "confidence": "4;4;4;4", "soundness": "3;3;2;3", "contribution": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "99;159;278;106", "wc_strengths": "91;106;100;107", "wc_weaknesses": "764;212;506;212", "wc_questions": "196;110;271;62", "wc_review": "1150;587;1155;487", "wc_reply_reviewers": "307;0;215;0", "wc_reply_authors": "2054;650;1682;793", "reply_reviewers": "1;0;1;0", "reply_authors": "4;1;4;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 160.5, 71.69553682064178 ], "wc_strengths_avg": [ 101.0, 6.363961030678928 ], "wc_weaknesses_avg": [ 423.5, 230.3318258513139 ], "wc_questions_avg": [ 159.75, 80.18844991643122 ], "wc_review_avg": [ 844.75, 309.7792560840703 ], "wc_reply_reviewers_avg": [ 130.5, 134.49256485025484 ], "wc_reply_authors_avg": [ 1294.75, 590.3132113547858 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:H902VqbwE_IJ:scholar.google.com/&scioq=AVOID:+Alleviating+VAE%27s+Overestimation+in+Unsupervised+OOD+Detection&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;0;5;0", "aff_unique_norm": "Nanyang Technological University;Skywork AI;University of Sydney;Southern University of Science and Technology;Mohamed bin Zayed University of Artificial Intelligence;Singapore University of Technology and Design", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.skywork.ai;https://www.sydney.edu.au;https://www.sustech.edu.cn;https://mbzuai.ac.ae;https://www.sutd.edu.sg", "aff_unique_abbr": "NTU;Skywork AI;USYD;SUSTech;MBZUAI;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;4;0;0;0", "aff_country_unique": "Singapore;United States;Australia;China;United Arab Emirates" }, { "id": "3aSbJhaVDi", "title": "Exploiting Open-World Data for Adaptive Continual Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Continual learning (CL), which involves learning from sequential tasks without forgetting, is mainly explored in supervised learning settings where all data are labeled. However, high-quality labeled data may not be readily available at a large scale due to high labeling costs, making the application of existing CL methods in real-world scenarios challenging. In this paper, we delve into a more practical facet of CL: open-world continual learning, where the training data comes from the open-world dataset and is partially labeled and non-i.i.d. Building on the insight that task shifts in continual learning can be viewed as transitions from in-distribution (ID) data to out-of-distribution (OOD) data, we propose OpenACL, a method that explicitly leverages unlabeled OOD data to enhance continual learning. Specifically, OpenACL considers novel classes within OOD data as potential classes for upcoming tasks and mines the underlying pattern in unlabeled open-world data to empower the model's adaptability to upcoming tasks. Furthermore, learning from extensive unlabeled data also helps to tackle the issue of catastrophic forgetting. Extensive experiments validate the effectiveness of OpenACL and show the benefit of learning from open-world data.", "keywords": "Continual Learning;Incremental Learning;Semi-supervised Learning;Open-world", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Xuwei Tan;Tian Xie;Zhiqun Zuo;Xueru Zhang", "authorids": "~Xuwei_Tan1;~Tian_Xie4;~Zhiqun_Zuo1;~Xueru_Zhang2", "gender": "M;M;M;F", "homepage": "https://engineering.osu.edu/people/tan.1206;https://www.linkedin.com/in/tianxie1999/;https://github.com/zuozhiqun;https://xueruzhang.github.io/", "dblp": "308/6151;;258/4850;", "google_scholar": "IpTmmIYAAAAJ;;;PNBO_a4AAAAJ", "orcid": ";;;", "linkedin": ";tianxie1999/;;", "or_profile": "~Xuwei_Tan1;~Tian_Xie4;~Zhiqun_Zuo1;~Xueru_Zhang2", "aff": "Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University", "aff_domain": "osu.edu;osu.edu;osu.edu;osu.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\ntan2024exploiting,\ntitle={Exploiting Open-World Data for Adaptive Continual Learning},\nauthor={Xuwei Tan and Tian Xie and Zhiqun Zuo and Xueru Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=3aSbJhaVDi}\n}", "github": "", "project": "", "reviewers": "58yd;9HZN;rvMJ;66AU", "site": "https://openreview.net/forum?id=3aSbJhaVDi", "pdf_size": 584142, "rating": "5;5;6;6", "confidence": "3;3;4;3", "soundness": "2;2;2;3", "contribution": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "51;76;98;112", "wc_strengths": "59;51;38;99", "wc_weaknesses": "239;193;15;57", "wc_questions": "113;3;259;21", "wc_review": "462;323;410;289", "wc_reply_reviewers": "362;0;42;63", "wc_reply_authors": "2193;949;1335;260", "reply_reviewers": "2;0;1;2", "reply_authors": "6;3;4;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 23.09085316743407 ], "wc_strengths_avg": [ 61.75, 22.77471185328148 ], "wc_weaknesses_avg": [ 126.0, 92.65527507918802 ], "wc_questions_avg": [ 99.0, 101.36074190730847 ], "wc_review_avg": [ 371.0, 68.61122357165772 ], "wc_reply_reviewers_avg": [ 116.75, 143.40044455998034 ], "wc_reply_authors_avg": [ 1184.25, 698.1910107556528 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8lxZ6ZIiCwsJ:scholar.google.com/&scioq=Exploiting+Open-World+Data+for+Adaptive+Continual+Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Why is SAM Robust to Label Noise?", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19504", "id": "3aZCPl3ZvR", "author_site": "Christina Baek, J Kolter, Aditi Raghunathan", "tldr": "", "abstract": "Sharpness-Aware Minimization (SAM) is most known for achieving state-of the-art performances on natural image and language tasks. However, its most pronounced improvements (of tens of percent) is rather in the presence of label noise. Understanding SAM's label noise robustness requires a departure from characterizing the robustness of minimas lying in ``flatter'' regions of the loss landscape. In particular, the peak performance under label noise occurs with early stopping, far before the loss converges. We decompose SAM's robustness into two effects: one induced by changes to the logit term and the other induced by changes to the network Jacobian. The first can be observed in linear logistic regression where SAM provably up-weights the gradient contribution from clean examples. Although this explicit up-weighting is also observable in neural networks, when we intervene and modify SAM to remove this effect, surprisingly, we see no visible degradation in performance. We infer that SAM's effect in deeper networks is instead explained entirely by the effect SAM has on the network Jacobian. We theoretically derive the implicit regularization induced by this Jacobian effect in two layer linear networks. Motivated by our analysis, we see that cheaper alternatives to SAM that explicitly induce these regularization effects largely recover the benefits in deep networks trained on real-world datasets.", "keywords": "generalization;sharpness;robustness;SAM", "primary_area": "optimization", "supplementary_material": "", "author": "Christina Baek;J Zico Kolter;Aditi Raghunathan", "authorids": "~Christina_Baek2;~J_Zico_Kolter1;~Aditi_Raghunathan1", "gender": ";F;M", "homepage": "https://kebaek.github.io;https://www.cs.cmu.edu/~aditirag/;http://www.zicokolter.com", "dblp": "202/7238;166/1409;67/2526", "google_scholar": ";Ch9iRwQAAAAJ;UXh1I6UAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Christina_Baek2;~Aditi_Raghunathan1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nbaek2024why,\ntitle={Why is {SAM} Robust to Label Noise?},\nauthor={Christina Baek and J Zico Kolter and Aditi Raghunathan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3aZCPl3ZvR}\n}", "github": "", "project": "", "reviewers": "Cbxh;WpDM;M3rX;ATLV", "pdf_size": 1203210, "rating": "6;6;6;6", "confidence": "4;4;3;3", "soundness": "3;3;2;3", "contribution": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "117;120;69;116", "wc_strengths": "59;50;29;44", "wc_weaknesses": "24;33;52;651", "wc_questions": "29;43;116;6", "wc_review": "229;246;266;817", "wc_reply_reviewers": "22;15;25;25", "wc_reply_authors": "633;487;660;1318", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.5, 21.12463017427761 ], "wc_strengths_avg": [ 45.5, 10.920164833920778 ], "wc_weaknesses_avg": [ 190.0, 266.3503332079763 ], "wc_questions_avg": [ 48.5, 41.14911906711977 ], "wc_review_avg": [ 389.5, 247.16441896033498 ], "wc_reply_reviewers_avg": [ 21.75, 4.085033659592048 ], "wc_reply_authors_avg": [ 774.5, 320.6169833305778 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16289533126675671960&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=3aZCPl3ZvR", "pdf": "https://openreview.net/pdf?id=3aZCPl3ZvR", "email": "cmu.edu;cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "3b8CgMO5ix", "title": "Model guidance via explanations turns image classifiers into segmentation models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Heatmaps generated on inputs of image classification networks via explainable AI methods like Grad-CAM and LRP have been observed to resemble segmentations of input images in many cases. Consequently, heatmaps have also been leveraged for achieving weakly supervised segmentation with image-level supervision.\nOn the other hand, losses can be imposed on differentiable heatmaps, which has been shown to serve for (1) improving heatmaps to be more human-interpretable, (2) regularization of networks towards better generalization, (3) training diverse ensembles of networks, and (4) for explicitly ignoring confounding input features. Due to the latter use case, the paradigm of imposing losses on heatmaps is often referred to as \"Right for the right reasons\". \nWe unify these two lines of research by investigating semi-supervised segmentation as a novel use case for the Right for the Right Reasons paradigm. \nFirst, we show formal parallels between differentiable heatmap architectures and standard encoder-decoder architectures for image segmentation. \nSecond, we show that such differentiable heatmap architectures yield competitive results when trained with standard segmentation losses. \nThird, we show that such architectures allow for training with weak supervision in the form of image-level labels and small numbers of pixel-level labels, outperforming comparable encoder-decoder models.", "keywords": "few-shot learning;semantic segmentation;interpretation", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Xiaoyan Yu;Jannik Franzen;Wojciech Samek;Marina MC H\u00f6hne;Dagmar Kainmueller", "authorids": "~Xiaoyan_Yu2;~Jannik_Franzen1;~Wojciech_Samek1;~Marina_MC_H\u00f6hne1;~Dagmar_Kainmueller2", "gender": "F;M;M;;F", "homepage": ";;http://iphome.hhi.de/samek/;;", "dblp": ";;79/9736;22/4586;167/3225", "google_scholar": ";;7aQwO08AAAAJ;mg0LpUwAAAAJ;https://scholar.google.de/citations?user=araOPxQAAAAJ", "orcid": "0000-0001-8196-663X;;;;", "linkedin": ";jannik-franzen-5931b121a;;;marina-marie-claire-h\u00f6hne-8949bb77/", "or_profile": "~Xiaoyan_Yu2;~Jannik_Franzen1;~Wojciech_Samek1;~Dagmar_Kainm\u00fcller1;~Marina_H\u00f6hne1", "aff": "Max Delbr\u00fcck Center for Molecular Medicine;Max Delbr\u00fcck Center for Molecular Medicine;Fraunhofer HHI;Max Delbr\u00fcck Center for Molecular Medicine;Universit\u00e4t Potsdam", "aff_domain": "mdc-berlin.de;mdc-berlin.de;hhi.fraunhofer.de;mdc-berlin.de;uni-potsdam.de", "position": "PhD student;PhD student;Assistant Professor;Group Leader;Full Professor", "bibtex": "@misc{\nyu2024model,\ntitle={Model guidance via explanations turns image classifiers into segmentation models},\nauthor={Xiaoyan Yu and Jannik Franzen and Wojciech Samek and Marina MC H{\\\"o}hne and Dagmar Kainmueller},\nyear={2024},\nurl={https://openreview.net/forum?id=3b8CgMO5ix}\n}", "github": "", "project": "", "reviewers": "K7Hs;nMm3;a33j;43DH", "site": "https://openreview.net/forum?id=3b8CgMO5ix", "pdf_size": 12292941, "rating": "3;5;6;8", "confidence": "4;4;4;4", "soundness": "2;3;2;4", "contribution": "2;2;2;3", "presentation": "1;1;1;3", "wc_summary": "12;29;60;77", "wc_strengths": "25;16;66;194", "wc_weaknesses": "78;62;59;91", "wc_questions": "6;1;259;91", "wc_review": "121;108;444;453", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "190;74;700;363", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_avg": [ 44.5, 25.46075411294803 ], "wc_strengths_avg": [ 75.25, 71.1033578672625 ], "wc_weaknesses_avg": [ 72.5, 12.893796958227627 ], "wc_questions_avg": [ 89.25, 104.32730946401331 ], "wc_review_avg": [ 281.5, 167.09353667931026 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 331.75, 236.17406186963038 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Lz_rfX9EBVkJ:scholar.google.com/&scioq=Model+guidance+via+explanations+turns+image+classifiers+into+segmentation+models&hl=en&as_sdt=0,33", "gs_version_total": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Max Delbr\u00fcck Center for Molecular Medicine;Fraunhofer Heinrich Hertz Institute;University of Potsdam", "aff_unique_dep": ";;", "aff_unique_url": "https://www.mdc-berlin.de;https://www.hhi.fraunhofer.de/;https://www.uni-potsdam.de", "aff_unique_abbr": "MDC;HHI;UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "3bV46RKWVA", "title": "Diversity-Aware Agnostic Ensemble of Sharpness Minimizers", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "There has long been a variety of theoretical and empirical evidence supporting the success of ensemble learning. Deep ensembles in particular leverage training randomness and expressivity of individual neural networks to gain prediction diversity and ultimately a boost in generalization performance, robustness and uncertainty estimation. In respect of generalization ability, it is found that minimizers pursuing wider local minima result in models being more robust to shifts between training and testing sets. A natural research question arises out of these two approaches as to whether better generalization ability can be achieved if ensemble learning and loss sharpness minimization is integrated. Our work takes the lead in investigating this connection and proposes DASH - a learning algorithm that promotes diversity and flatness within deep ensembles. More concretely, DASH encourages base learners to move divergently towards low-loss regions of minimal sharpness. We provide a theoretical backbone for our method along with empirical evidence demonstrating an improvement in ensemble generalization ability.", "keywords": "SAM;Ensemble learning;Sharpness-Aware Minimization", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Anh Tuan Bui;Vy Vo;Tung Pham;He Zhao;Dinh Phung;Trung Le", "authorids": "~Anh_Tuan_Bui2;~Vy_Vo2;~Tung_Pham1;~He_Zhao1;~Dinh_Phung2;~Trung_Le2", "gender": "M;F;M;;;M", "homepage": "https://tuananhbui89.github.io/;https://isvy08.github.io/;;;;", "dblp": "120/0106;176/4660;38/10862-1;;;", "google_scholar": "jEjMZ7oAAAAJ;3CpFpFkAAAAJ;KcUuEKsAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Anh_Tuan_Bui2;~Vy_Vo2;~Tung_Pham1;~He_Zhao1;~Dinh_Phung2;~Trung_Le2", "aff": "Monash University;Monash University;VinAI Research;;;Monash University", "aff_domain": "monash.edu;monash.edu;vinai.io;;;monash.edu", "position": "Postdoc;PhD student;Researcher;;;Assistant Professor", "bibtex": "@misc{\nbui2024diversityaware,\ntitle={Diversity-Aware Agnostic Ensemble of Sharpness Minimizers},\nauthor={Anh Tuan Bui and Vy Vo and Tung Pham and He Zhao and Dinh Phung and Trung Le},\nyear={2024},\nurl={https://openreview.net/forum?id=3bV46RKWVA}\n}", "github": "", "project": "", "reviewers": "XYNF;XaPX;UbbB;xVVh", "site": "https://openreview.net/forum?id=3bV46RKWVA", "pdf_size": 2582464, "rating": "3;3;3;6", "confidence": "4;4;3;2", "soundness": "2;2;2;3", "contribution": "2;1;2;2", "presentation": "2;2;1;3", "wc_summary": "76;44;40;25", "wc_strengths": "59;18;45;66", "wc_weaknesses": "596;483;214;91", "wc_questions": "75;116;243;33", "wc_review": "806;661;542;215", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 46.25, 18.579222265746218 ], "wc_strengths_avg": [ 47.0, 18.371173070873837 ], "wc_weaknesses_avg": [ 346.0, 202.31040507101952 ], "wc_questions_avg": [ 116.75, 78.57599824373852 ], "wc_review_avg": [ 556.0, 217.94609425268442 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784892, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13721089435499981022&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Monash University;VinAI Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.monash.edu;https://www.vinai.io/", "aff_unique_abbr": "Monash;VinAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Australia;Vietnam" }, { "id": "3bmjHYX42n", "title": "Leveraging Human Revisions for Improving Text-to-Layout Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning from human feedback has shown success in aligning large, pretrained models with human values. However, prior works have mostly focused on using high-level labels, such as preferences between pairs of model outputs. On the other hand, many domains could benefit from more involved, detailed feedback, such as corrections, explanations, and reasoning of human users. Our work proposes using nuanced feedback through the form of human revisions for stronger alignment. In this paper, we ask expert designers to fix layouts generated from a generative layout model that is pretrained on a large-scale dataset of mobile screens. Then, we train a reward model based on how human designers revise these generated layouts. With the learned reward model, we optimize our model with reinforcement learning from human feedback (RLHF). Our method, Revision-Aware Reward Models (RARE), allows a generative model to produce more modern, designer-aligned layouts, showing the potential for utilizing human corrections and stronger forms of feedback in improving generative models.", "keywords": "human feedback;reinforcement learning;generative models;UI layouts", "primary_area": "generative models", "supplementary_material": "/attachment/a4e99ce13cf35663b57423d0acf0103f202a8dd6.pdf", "author": "Amber Xie;Chin-Yi Cheng;Forrest Huang;Yang Li", "authorids": "~Amber_Xie1;~Chin-Yi_Cheng1;~Forrest_Huang1;~Yang_Li2", "gender": ";M;M;M", "homepage": ";;https://forresthuang.com;http://yangl.org", "dblp": ";;224/0243;37/4190-58", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;IgHGybQAAAAJ;ZZdB48QAAAAJ", "orcid": ";;;", "linkedin": ";chinyi/;;yang-li-127a2a41/", "or_profile": "~Amber_Xie1;~Chin-Yi_Cheng1;~Forrest_Huang1;~Yang_Li2", "aff": ";Google;Apple;Google", "aff_domain": ";google.com;apple.com;google.com", "position": ";Researcher;Research Scientist;Research Scientist", "bibtex": "@misc{\nxie2024leveraging,\ntitle={Leveraging Human Revisions for Improving Text-to-Layout Models},\nauthor={Amber Xie and Chin-Yi Cheng and Forrest Huang and Yang Li},\nyear={2024},\nurl={https://openreview.net/forum?id=3bmjHYX42n}\n}", "github": "", "project": "", "reviewers": "d2Zb;otPj;RQ9B;Qjey", "site": "https://openreview.net/forum?id=3bmjHYX42n", "pdf_size": 1482729, "rating": "5;5;5;6", "confidence": "3;4;3;3", "soundness": "3;2;2;2", "contribution": "2;3;2;2", "presentation": "3;2;2;2", "wc_summary": "63;36;48;121", "wc_strengths": "37;19;38;161", "wc_weaknesses": "157;204;131;258", "wc_questions": "47;131;12;33", "wc_review": "304;390;229;573", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "534;901;547;870", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.0, 32.61134771824066 ], "wc_strengths_avg": [ 63.75, 56.65410399962213 ], "wc_weaknesses_avg": [ 187.5, 48.386465049639654 ], "wc_questions_avg": [ 55.75, 45.19610049550735 ], "wc_review_avg": [ 374.0, 128.24000935745443 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 713.0, 172.9089355701434 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jN_E4kKTPNQJ:scholar.google.com/&scioq=Leveraging+Human+Revisions+for+Improving+Text-to-Layout+Models&hl=en&as_sdt=0,47", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Google;Apple", "aff_unique_dep": "Google;Apple Inc.", "aff_unique_url": "https://www.google.com;https://www.apple.com", "aff_unique_abbr": "Google;Apple", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19503", "id": "3bq3jsvcQ1", "author_site": "Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen, Heng-Tze Cheng, Ed H. Chi, Quoc V Le, Denny Zhou", "tldr": "", "abstract": "We present STEP-BACK PROMPTING, a simple prompting technique that enables LLMs to do abstractions to derive high-level concepts and first principles from instances containing specific details. Using the concepts and principles to guide reasoning, LLMs significantly improve their abilities in following a correct reasoning path towards the solution. We conduct experiments of STEP-BACK PROMPTING with PaLM-2L, GPT-4 and Llama2-70B models, and observe substantial performance gains on various challenging reasoning-intensive tasks including STEM, Knowledge QA, and Multi-Hop Reasoning. For instance, STEP-BACK PROMPTING improves PaLM-2L performance on MMLU (Physics and Chemistry) by 7% and 11% respectively, TimeQA by 27%, and MuSiQue by 7%.", "keywords": "Prompting;Large Language Models;Reasoning;Abstraction", "primary_area": "generative models", "supplementary_material": "", "author": "Huaixiu Steven Zheng;Swaroop Mishra;Xinyun Chen;Heng-Tze Cheng;Ed H. Chi;Quoc V Le;Denny Zhou", "authorids": "~Huaixiu_Steven_Zheng1;~Swaroop_Mishra1;~Xinyun_Chen1;~Heng-Tze_Cheng1;~Ed_H._Chi1;~Quoc_V_Le1;~Denny_Zhou1", "gender": "M;M;;M;;M;F", "homepage": ";https://swarooprm.github.io/;https://www.linkedin.com/in/hengtze;;https://dennyzhou.github.io/;http://edchi.net;https://jungyhuk.github.io/", "dblp": "307/3201;249/2784;30/8739;29/6166;178/3277;13/310;", "google_scholar": "PyK4x4wAAAAJ;-7LK2SwAAAAJ;;;UwLsYw8AAAAJ;VuWl-KUAAAAJ;d4W1UT0AAAAJ", "orcid": ";;;;;0000-0003-3230-5338;", "linkedin": ";;;;;edchi/;", "or_profile": "~Huaixiu_Steven_Zheng1;~Swaroop_Mishra1;~Heng-Tze_Cheng1;~Quoc_V_Le1;~Dengyong_Zhou2;~Ed_Chi1;~Xinyun_Chen2", "aff": "Google;Google;;Google;Google DeepMind;Google;Google", "aff_domain": "google.com;google.com;;google.com;google.com;google.com;google.com", "position": "Software Engineer;Researcher;;Scientist;Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nzheng2024take,\ntitle={Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models},\nauthor={Huaixiu Steven Zheng and Swaroop Mishra and Xinyun Chen and Heng-Tze Cheng and Ed H. Chi and Quoc V Le and Denny Zhou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3bq3jsvcQ1}\n}", "github": "", "project": "", "reviewers": "XiDg;FiKx;xaDx", "pdf_size": 870565, "rating": "8;8;8", "confidence": "4;3;3", "soundness": "3;4;4", "contribution": "3;4;3", "presentation": "3;3;3", "wc_summary": "81;58;90", "wc_strengths": "83;43;67", "wc_weaknesses": "312;116;153", "wc_questions": "133;124;6", "wc_review": "609;341;316", "wc_reply_reviewers": "325;57;35", "wc_reply_authors": "812;586;483", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.33333333333333, 13.474255287605157 ], "wc_strengths_avg": [ 64.33333333333333, 16.438437341250605 ], "wc_weaknesses_avg": [ 193.66666666666666, 85.02679316283518 ], "wc_questions_avg": [ 87.66666666666667, 57.86382481501048 ], "wc_review_avg": [ 422.0, 132.62227062852855 ], "wc_reply_reviewers_avg": [ 139.0, 131.82817099037166 ], "wc_reply_authors_avg": [ 627.0, 137.40693820425034 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 157, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3437259973551531575&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3bq3jsvcQ1", "pdf": "https://openreview.net/pdf?id=3bq3jsvcQ1", "email": "google.com;google.com;;google.com;google.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "3bqesUzZPH", "title": "FTA: Stealthy and Adaptive Backdoor Attack with Flexible Triggers on Federated Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Current backdoor attacks against federated learning (FL) strongly rely on universal triggers or semantic patterns, which can be easily detected and filtered by certain defense mechanisms such as norm clipping, trigger inversion and etc.\nIn this work, we propose a novel generator-assisted backdoor attack, FTA, against FL defenses.\nIn this method, we build a generative trigger function that can learn to manipulate the benign samples with naturally imperceptible trigger patterns (stealthy) and simultaneously make poisoned samples include similar hidden features of the attacker-chosen label. \nMoreover, our trigger generator repeatedly produces triggers for each sample (flexibility) in each FL iteration (adaptivity), allowing it to adjust to changes of hidden features between global models of different rounds.\nInstead of using universal and predefined triggers of existing works, we break this wall by providing three desiderate (i.e., stealthy, flexibility and adaptivity), which helps our attack avoid the presence of backdoor-related hidden features or backdoor routing. \nExtensive experiments confirmed the effectiveness (above 98\\% attack success rate) and stealthiness of our attack compared to prior attacks on decentralized learning frameworks with eight well-studied defenses.", "keywords": "federated learning;backdoor attack;trigger generator;robustness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/410e83506017bdee675d58b231c1a0687905b1ca.zip", "author": "Yanqi Qiao;Dazhuang Liu;Congwen Chen;Rui Wang;Kaitai Liang", "authorids": "~Yanqi_Qiao1;~Dazhuang_Liu1;~Congwen_Chen1;~Rui_Wang24;~Kaitai_Liang1", "gender": "M;;M;M;M", "homepage": "https://www.tudelft.nl/en/eemcs/the-faculty/departments/intelligent-systems/cybersecurityeemcs/people/yanqi-qiao;;;https://www.tudelft.nl/en/eemcs/the-faculty/departments/intelligent-systems/cybersecurityeemcs/people/rui-wang;https://sites.google.com/view/kaitailiang", "dblp": "322/9220;;314/0597;;", "google_scholar": ";;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;", "linkedin": ";;https://linkedin.com/in/congwen-chen-144750231;;", "or_profile": "~Yanqi_Qiao1;~Dazhuang_Liu1;~Congwen_Chen1;~Rui_Wang24;~Kaitai_Liang1", "aff": "Delft University of Technology;;;;Delft University of Technology", "aff_domain": "tudelft.nl;;;;tudelft.nl", "position": "PhD student;;;;Assistant Professor", "bibtex": "@misc{\nqiao2024fta,\ntitle={{FTA}: Stealthy and Adaptive Backdoor Attack with Flexible Triggers on Federated Learning},\nauthor={Yanqi Qiao and Dazhuang Liu and Congwen Chen and Rui Wang and Kaitai Liang},\nyear={2024},\nurl={https://openreview.net/forum?id=3bqesUzZPH}\n}", "github": "", "project": "", "reviewers": "cGsZ;gADe;nnMX", "site": "https://openreview.net/forum?id=3bqesUzZPH", "pdf_size": 10340369, "rating": "5;5;6", "confidence": "4;4;3", "soundness": "3;3;3", "contribution": "2;2;2", "presentation": "2;3;2", "wc_summary": "15;46;31", "wc_strengths": "37;31;16", "wc_weaknesses": "403;103;195", "wc_questions": "158;9;144", "wc_review": "613;189;386", "wc_reply_reviewers": "298;0;23", "wc_reply_authors": "1311;456;663", "reply_reviewers": "2;0;1", "reply_authors": "3;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 30.666666666666668, 12.657891697365017 ], "wc_strengths_avg": [ 28.0, 8.831760866327848 ], "wc_weaknesses_avg": [ 233.66666666666666, 125.48926470242606 ], "wc_questions_avg": [ 103.66666666666667, 67.18300049533033 ], "wc_review_avg": [ 396.0, 173.24164241505756 ], "wc_reply_reviewers_avg": [ 107.0, 135.38340617175604 ], "wc_reply_authors_avg": [ 810.0, 364.20049423360206 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15899732668944491480&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "3cE6NKYy8x", "title": "Towards Fair Graph Anomaly Detection: Problem, New Datasets, and Evaluation", "track": "main", "status": "Reject", "tldr": "", "abstract": "The Fair Graph Anomaly Detection (FairGAD) problem aims to accurately detect anomalous nodes in an input graph while ensuring fairness and avoiding biased predictions against individuals from sensitive subgroups such as gender or political leanings. Fairness in graphs is particularly crucial in anomaly detection areas such as misinformation detection, where decision outcomes can significantly affect individuals. Despite this need, existing works lack realistic datasets that encompass actual graph structures, anomaly labels, and sensitive attributes for research in FairGAD. To bridge this gap, we present two novel graph datasets constructed from the globally prominent social media platforms Reddit and Twitter. These datasets comprise 1.2 million and 400 thousand edges associated with 9 thousand and 47 thousand nodes, respectively, and leverage political leanings as sensitive attributes and misinformation spreaders as anomaly labels. We demonstrate that our FairGAD datasets significantly differ from the synthetic datasets used by the research community. These new datasets offer significant values for FairGAD by providing realistic data that captures the intricacies of social networks. Using our datasets, we investigate the performance-fairness trade-off in three existing GAD methods on five state-of-the-art fairness methods, which sheds light on their effectiveness and limitations in addressing the FairGAD problem.", "keywords": "graph anomaly detection;fairness;fair graph anomaly detection;benchmark datasets", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Neng Kai Nigel Neo;Yeon-Chang Lee;Yiqiao Jin;Sang-Wook Kim;Srijan Kumar", "authorids": "~Neng_Kai_Nigel_Neo1;~Yeon-Chang_Lee1;~Yiqiao_Jin1;~Sang-Wook_Kim1;~Srijan_Kumar1", "gender": ";;M;M;M", "homepage": ";https://sites.google.com/view/yclee/;https://ahren09.github.io/;https://bigdas.hanyang.ac.kr/;https://faculty.cc.gatech.edu/~srijan/", "dblp": ";167/5996;207/6631.html;64/5810;131/9628", "google_scholar": ";https://scholar.google.co.kr/citations?user=GzzNEJAAAAAJ;eY85qm4AAAAJ;https://scholar.google.co.kr/citations?user=ed2vz_oAAAAJ;kqfLNK8AAAAJ", "orcid": ";0000-0002-8769-0678;0000-0002-6974-5970;0000-0002-6345-9084;0000-0002-5796-3532", "linkedin": ";;ahren-jin/;;srijankr/", "or_profile": "~Neng_Kai_Nigel_Neo1;~Yeon-Chang_Lee1;~Yiqiao_Jin1;~Sang-Wook_Kim1;~Srijan_Kumar1", "aff": ";Ulsan National Institute of Science and Technology;Georgia Institute of Technology;Hanyang University;Georgia Institute of Technology", "aff_domain": ";unist.ac.kr;gatech.edu;hanyang.ac.kr;gatech.edu", "position": ";Assistant Professor;PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nneo2024towards,\ntitle={Towards Fair Graph Anomaly Detection: Problem, New Datasets, and Evaluation},\nauthor={Neng Kai Nigel Neo and Yeon-Chang Lee and Yiqiao Jin and Sang-Wook Kim and Srijan Kumar},\nyear={2024},\nurl={https://openreview.net/forum?id=3cE6NKYy8x}\n}", "github": "", "project": "", "reviewers": "QKDb;9VrT;jyni;rPbE", "site": "https://openreview.net/forum?id=3cE6NKYy8x", "pdf_size": 1887519, "rating": "3;5;6;8", "confidence": "1;3;3;4", "soundness": "2;2;3;3", "contribution": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "144;173;96;53", "wc_strengths": "20;76;62;68", "wc_weaknesses": "273;189;455;193", "wc_questions": "91;286;69;2", "wc_review": "528;724;682;316", "wc_reply_reviewers": "0;116;175;0", "wc_reply_authors": "1663;1555;2205;658", "reply_reviewers": "0;1;1;0", "reply_authors": "4;4;4;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 116.5, 45.828484592008934 ], "wc_strengths_avg": [ 56.5, 21.650635094610966 ], "wc_weaknesses_avg": [ 277.5, 107.8181339107666 ], "wc_questions_avg": [ 112.0, 105.67166129100082 ], "wc_review_avg": [ 562.5, 159.93357996368368 ], "wc_reply_reviewers_avg": [ 72.75, 75.68148716826329 ], "wc_reply_authors_avg": [ 1520.25, 555.4148787168021 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9544271444636667, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14943895517549817533&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Ulsan National Institute of Science and Technology;Georgia Institute of Technology;Hanyang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unist.ac.kr;https://www.gatech.edu;https://www.hanyang.ac.kr", "aff_unique_abbr": "UNIST;Georgia Tech;HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "Identifying Representations for Intervention Extrapolation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19502", "id": "3cuJwmPxXj", "author_site": "Sorawit (James) Saengkyongam, Elan Rosenfeld, Pradeep K Ravikumar, Niklas Pfister, Jonas Peters", "tldr": "", "abstract": "The premise of identifiable and causal representation learning is to improve the current representation learning paradigm in terms of generalizability or robustness. Despite recent progress in questions of identifiability, more theoretical results demonstrating concrete advantages of these methods for downstream tasks are needed. In this paper, we consider the task of intervention extrapolation: predicting how interventions affect an outcome, even when those interventions are not observed at training time, and show that identifiable representations can provide an effective solution to this task even if the interventions affect the outcome non-linearly. Our setup includes an outcome variable $Y$, observed features $X$, which are generated as a non-linear transformation of latent features $Z$, and exogenous action variables $A$, which influence $Z$. The objective of intervention extrapolation is then to predict how interventions on $A$ that lie outside the training support of $A$ affect $Y$. Here, extrapolation becomes possible if the effect of $A$ on $Z$ is linear and the residual when regressing Z on A has full support. As $Z$ is latent, we combine the task of intervention extrapolation with identifiable representation learning, which we call $\\texttt{Rep4Ex}$: we aim to map the observed features $X$ into a subspace that allows for non-linear extrapolation in $A$. We show that the hidden representation is identifiable up to an affine transformation in $Z$-space, which, we prove, is sufficient for intervention extrapolation. The identifiability is characterized by a novel constraint describing the linearity assumption of $A$ on $Z$. Based on this insight, we propose a flexible method that enforces the linear invariance constraint and can be combined with any type of autoencoder. We validate our theoretical findings through a series of synthetic experiments and show that our approach can indeed succeed in predicting the effects of unseen interventions.", "keywords": "causality;extrapolation;exogenous variables;causal representation learning;identifiable representation learning;control functions;instrumental variables;invariance", "primary_area": "causal reasoning", "supplementary_material": "/attachment/ca0776cd9e91d0b874238a4bfb5ae74e4b210e3f.zip", "author": "Sorawit Saengkyongam;Elan Rosenfeld;Pradeep Kumar Ravikumar;Niklas Pfister;Jonas Peters", "authorids": "~Sorawit_Saengkyongam1;~Elan_Rosenfeld1;~Pradeep_Kumar_Ravikumar1;~Niklas_Pfister1;~Jonas_Peters2", "gender": ";M;M;;M", "homepage": "https://sorawitj.github.io/;;http://www.cs.cmu.edu/~pradeepr/;https://niklaspfister.github.io/;https://people.math.ethz.ch/~jopeters/", "dblp": ";236/4508;94/3594;222/3117;48/97", "google_scholar": "e3aDv1QAAAAJ;f0j0K8QAAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ;u2G6pzcAAAAJ;https://scholar.google.de/citations?user=kBQ4VvEAAAAJ", "orcid": ";;;0000-0001-6203-9777;", "linkedin": ";;;;", "or_profile": "~Sorawit_Saengkyongam1;~Elan_Rosenfeld1;~Pradeep_Kumar_Ravikumar1;~Niklas_Pfister1;~Jonas_Peters2", "aff": "ETH Z\u00fcrich;Carnegie Mellon University;Carnegie Mellon University;University of Copenhagen;ETHZ - ETH Zurich", "aff_domain": "math.ethz.ch;andrew.cmu.edu;cmu.edu;ku.dk;ethz.ch", "position": "PhD student;PhD student;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsaengkyongam2024identifying,\ntitle={Identifying Representations for Intervention Extrapolation},\nauthor={Sorawit Saengkyongam and Elan Rosenfeld and Pradeep Kumar Ravikumar and Niklas Pfister and Jonas Peters},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3cuJwmPxXj}\n}", "github": "", "project": "", "reviewers": "wfVE;dGv9;k2zA;iVVC", "pdf_size": 4071912, "rating": "8;8;8;8", "confidence": "4;4;4;4", "soundness": "4;3;3;3", "contribution": "4;3;3;4", "presentation": "4;4;3;3", "wc_summary": "268;107;135;86", "wc_strengths": "137;53;32;90", "wc_weaknesses": "375;64;487;65", "wc_questions": "231;196;63;98", "wc_review": "1011;420;717;339", "wc_reply_reviewers": "4;0;35;0", "wc_reply_authors": "1009;1025;1295;547", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 149.0, 70.86959855960805 ], "wc_strengths_avg": [ 78.0, 39.89360851063739 ], "wc_weaknesses_avg": [ 247.75, 187.47983224869816 ], "wc_questions_avg": [ 147.0, 68.76408946535975 ], "wc_review_avg": [ 621.75, 265.15973959106236 ], "wc_reply_reviewers_avg": [ 9.75, 14.669270602180601 ], "wc_reply_authors_avg": [ 969.0, 268.8382413273826 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13348287505911038180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=3cuJwmPxXj", "pdf": "https://openreview.net/pdf?id=3cuJwmPxXj", "email": "math.ethz.ch;andrew.cmu.edu;cmu.edu;ku.dk;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "ETH Zurich;Carnegie Mellon University;University of Copenhagen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.cmu.edu;https://www.ku.dk", "aff_unique_abbr": "ETHZ;CMU;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2;0", "aff_country_unique": "Switzerland;United States;Denmark" }, { "title": "Privately Aligning Language Models with Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19501", "id": "3d0OmYTNui", "author_site": "Fan Wu, Huseyin Inan, Arturs Backurs, Varun Chandrasekaran, Janardhan Kulkarni, Robert Sim", "tldr": "", "abstract": "Positioned between pre-training and user deployment, aligning large language models (LLMs) through reinforcement learning (RL) has emerged as a prevailing strategy for training instruction following-models such as ChatGPT. In this work, we initiate the study of privacy-preserving alignment of LLMs through Differential Privacy (DP) in conjunction with RL. Following the influential work of Ziegler et al. (2020), we study two dominant paradigms: (i) alignment via RL without human in the loop (e.g., positive review generation) and (ii) alignment via RL from human feedback (RLHF) (e.g., summarization in a human-preferred way). We give a new DP framework to achieve alignment via RL, and prove its correctness. Our experimental results validate the effectiveness of our approach, offering competitive utility while ensuring strong privacy protections.", "keywords": "Large Language Models;RLHF;Alignment;Differential Privacy", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/8533995075a0f8cc73da220bc27a7b4b58895d3f.pdf", "author": "Fan Wu;Huseyin A Inan;Arturs Backurs;Varun Chandrasekaran;Janardhan Kulkarni;Robert Sim", "authorids": "~Fan_Wu6;~Huseyin_A_Inan1;~Arturs_Backurs1;~Varun_Chandrasekaran1;~Janardhan_Kulkarni2;~Robert_Sim1", "gender": "F;;M;M;M;", "homepage": ";http://www.mit.edu/~backurs/;http://pages.cs.wisc.edu/~chandrasekaran/;;;", "dblp": "07/6378-11;74/10669;;54/1978;47/1233;41/11141", "google_scholar": "qd8WzBMAAAAJ;UNHdIKoAAAAJ;Sl7nSOsAAAAJ;_fxnybwAAAAJ;uT8sPt8AAAAJ;BGN4egcAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;simra/;", "or_profile": "~Fan_Wu6;~Arturs_Backurs1;~Varun_Chandrasekaran1;~Janardhan_Kulkarni2;~Robert_Sim1;~Huseyin_Atahan_Inan1", "aff": "University of Illinois, Urbana Champaign;Microsoft;University of Illinois Urbana-Champaign;Microsoft Research, Redmond;Microsoft;Microsoft", "aff_domain": "illinois.edu;microsoft.com;illinois.edu;microsoft.com;microsoft.com;microsoft.com", "position": "PhD student;Researcher;Assistant Professor;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nwu2024privately,\ntitle={Privately Aligning Language Models with Reinforcement Learning},\nauthor={Fan Wu and Huseyin A Inan and Arturs Backurs and Varun Chandrasekaran and Janardhan Kulkarni and Robert Sim},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3d0OmYTNui}\n}", "github": "", "project": "", "reviewers": "JeaX;vWsq;A7MT", "pdf_size": 1164572, "rating": "6;6;8", "confidence": "2;3;3", "soundness": "3;3;3", "contribution": "2;3;3", "presentation": "3;3;4", "wc_summary": "37;49;33", "wc_strengths": "44;108;46", "wc_weaknesses": "132;157;74", "wc_questions": "4;41;28", "wc_review": "217;355;181", "wc_reply_reviewers": "17;82;0", "wc_reply_authors": "649;1003;619", "reply_reviewers": "1;1;0", "reply_authors": "1;3;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 39.666666666666664, 6.79869268479038 ], "wc_strengths_avg": [ 66.0, 29.709706606876257 ], "wc_weaknesses_avg": [ 121.0, 34.76588366008646 ], "wc_questions_avg": [ 24.333333333333332, 15.326085243430198 ], "wc_review_avg": [ 251.0, 74.9933330370107 ], "wc_reply_reviewers_avg": [ 33.0, 35.33647784749729 ], "wc_reply_authors_avg": [ 757.0, 174.37889780589853 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10110203762954942936&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3d0OmYTNui", "pdf": "https://openreview.net/pdf?id=3d0OmYTNui", "email": "illinois.edu;microsoft.com;illinois.edu;microsoft.com;microsoft.com;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;0;1;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://illinois.edu;https://www.microsoft.com", "aff_unique_abbr": "UIUC;Microsoft", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Urbana-Champaign;;Redmond", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient-3Dim: Learning a Generalizable Single-image Novel-view Synthesizer in One Day", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19500", "id": "3eFMnZ3N4J", "author_site": "Yifan Jiang, Hao Tang, Jen-Hao Chang, Liangchen Song, Zhangyang Wang, Liangliang Cao", "tldr": "", "abstract": "The task of novel view synthesis aims to generate unseen perspectives of an object or scene from a limited set of input images. Nevertheless, synthesizing novel views from a single image remains a significant challenge. Previous approaches tackle this problem by adopting mesh prediction, multi-plane image construction, or more advanced techniques such as neural radiance fields. Recently, a pre-trained diffusion model that is specifically designed for 2D image synthesis has demonstrated its capability in producing photorealistic novel views, if sufficiently optimized with a 3D finetuning task. Despite greatly improved fidelity and generalizability, training such a powerful diffusion model requires a vast volume of training data and model parameters, resulting in a notoriously long time and high computational costs. To tackle this issue, we propose Efficient-3DiM, a highly efficient yet effective framework to learn a single-image novel-view synthesizer. Motivated by our in-depth analysis of the diffusion model inference process, we propose several pragmatic strategies to reduce training overhead to a manageable scale, including a crafted timestep sampling strategy, a superior 3D feature extractor, and an enhanced training scheme. When combined, our framework can reduce the total training time from 10 days to less than 1 day, significantly accelerating the training process on the same computational platform (an instance with 8 Nvidia A100 GPUs). Comprehensive experiments are conducted to demonstrate the efficiency and generalizability of our proposed method.", "keywords": "Novel View Synthesis;3D from Single Image;Efficient Training", "primary_area": "generative models", "supplementary_material": "", "author": "Yifan Jiang;Hao Tang;Jen-Hao Rick Chang;Liangchen Song;Zhangyang Wang;Liangliang Cao", "authorids": "~Yifan_Jiang2;~Hao_Tang16;~Jen-Hao_Rick_Chang1;~Liangchen_Song1;~Zhangyang_Wang1;~Liangliang_Cao1", "gender": "M;M;M;;M;M", "homepage": "https://yifanjiang19.github.io/;;https://rick-chang.github.io;;https://vita-group.github.io;http://llcao.net", "dblp": "81/7246-1;;169/4938;;119/4026;95/6915", "google_scholar": "PMeFEOIAAAAJ;xW-IxnwAAAAJ;F5Z9kN4AAAAJ;;pxFyKAIAAAAJ;S-hBSfIAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;liangliangcao/", "or_profile": "~Yifan_Jiang2;~Hao_Tang16;~Jen-Hao_Rick_Chang1;~Liangchen_Song1;~Zhangyang_Wang1;~Liangliang_Cao1", "aff": "University of Texas, Austin;Apple;Apple;;University of Texas at Austin;Apple", "aff_domain": "utexas.edu;apple.com;apple.com;;utexas.edu;apple.com", "position": "PhD student;Researcher;Researcher;;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\njiang2024efficientdim,\ntitle={Efficient-3Dim: Learning a Generalizable Single-image Novel-view Synthesizer in One Day},\nauthor={Yifan Jiang and Hao Tang and Jen-Hao Rick Chang and Liangchen Song and Zhangyang Wang and Liangliang Cao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3eFMnZ3N4J}\n}", "github": "", "project": "", "reviewers": "mro2;TYx2;FP6T;fp2s", "pdf_size": 15811605, "rating": "5;8;8;8", "confidence": "4;4;4;5", "soundness": "3;4;4;3", "contribution": "2;3;4;3", "presentation": "3;3;3;3", "wc_summary": "62;61;66;35", "wc_strengths": "69;121;107;153", "wc_weaknesses": "159;118;85;115", "wc_questions": "72;4;7;5", "wc_review": "362;304;265;308", "wc_reply_reviewers": "55;0;0;23", "wc_reply_authors": "581;403;339;442", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.0, 12.267844146385297 ], "wc_strengths_avg": [ 112.5, 30.14548058996572 ], "wc_weaknesses_avg": [ 119.25, 26.328454189336675 ], "wc_questions_avg": [ 22.0, 28.88771365130858 ], "wc_review_avg": [ 309.75, 34.52806829233283 ], "wc_reply_reviewers_avg": [ 19.5, 22.544400635190993 ], "wc_reply_authors_avg": [ 441.25, 88.66897710022373 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "openreview": "https://openreview.net/forum?id=3eFMnZ3N4J", "pdf": "https://openreview.net/pdf?id=3eFMnZ3N4J", "email": "utexas.edu;apple.com;apple.com;;utexas.edu;apple.com", "author_num": 6, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Texas at Austin;Apple", "aff_unique_dep": ";Apple Inc.", "aff_unique_url": "https://www.utexas.edu;https://www.apple.com", "aff_unique_abbr": "UT Austin;Apple", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3edHHvu5GX", "title": "Adaptive Visual Scene Understanding: Incremental Scene Graph Generation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Scene graph generation (SGG) involves analyzing images to extract meaningful information about objects and their relationships. Given the dynamic nature of the visual world, it becomes crucial for AI systems to detect new objects and establish their new relationships with existing objects. To address the lack of continual learning methodologies in SGG, we introduce the comprehensive Continual ScenE Graph Generation (CSEGG) dataset along with 3 learning scenarios and 8 evaluation metrics. Our research investigates the continual learning \nperformances of existing SGG methods on the retention of previous object entities and relationships as they learn new ones. Moreover, we also explore how continual object detection enhances generalization in classifying known relationships on unknown objects. We conduct extensive experiments benchmarking and analyzing the most recent transformer-based SGG methods in continual learning settings, and gain valuable insights into the CSEGG problem. We invite the research community to explore this emerging field of study.", "keywords": "Scene Graph Generation;Scene Understanding;Long-tailed Learning;Incremental Learning", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/aa9d3fe778d16b45efbfb849e65985ec2a02b66c.zip", "author": "Naitik Khandelwal;Xiao Liu;Mengmi Zhang", "authorids": "~Naitik_Khandelwal1;~Xiao_Liu24;~Mengmi_Zhang1", "gender": "M;M;F", "homepage": ";;https://a0091624.wixsite.com/deepneurocognition-1", "dblp": ";;160/7116", "google_scholar": "7divVH4AAAAJ;;https://scholar.google.com.sg/citations?user=G2sVOhcAAAAJ", "orcid": ";;0000-0002-2694-7097", "linkedin": ";xiao-liu-34971b205/;", "or_profile": "~Naitik_Khandelwal1;~Xiao_Liu24;~Mengmi_Zhang1", "aff": "A*STAR;I2R, A*STAR;A*STAR", "aff_domain": "i2r.a-star.edu.sg;i2r.a-star.edu.sg;astar.edu.sg", "position": "Researcher;Researcher;Principal Researcher", "bibtex": "@misc{\nkhandelwal2024adaptive,\ntitle={Adaptive Visual Scene Understanding: Incremental Scene Graph Generation},\nauthor={Naitik Khandelwal and Xiao Liu and Mengmi Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=3edHHvu5GX}\n}", "github": "", "project": "", "reviewers": "FdjE;TSA3;SDSd;pXA5;LwTF", "site": "https://openreview.net/forum?id=3edHHvu5GX", "pdf_size": 38916967, "rating": "3;5;6;6;6", "confidence": "4;3;4;4;4", "soundness": "2;2;3;3;3", "contribution": "2;2;3;3;2", "presentation": "3;1;2;2;2", "wc_summary": "25;76;50;44;55", "wc_strengths": "18;45;71;39;91", "wc_weaknesses": "206;118;34;66;194", "wc_questions": "59;6;114;2;105", "wc_review": "308;245;269;151;445", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1614;230;1014;582;2984", "reply_reviewers": "0;0;0;0;0", "reply_authors": "3;1;2;1;5", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 50.0, 16.504544828622205 ], "wc_strengths_avg": [ 52.8, 25.506077707087776 ], "wc_weaknesses_avg": [ 123.6, 68.0047057195309 ], "wc_questions_avg": [ 57.2, 47.29228266852849 ], "wc_review_avg": [ 283.6, 95.84487466734984 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1284.8, 967.0074249973471 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.4, 1.4966629547095764 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.08574929257125442, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UcBCy0UyJ44J:scholar.google.com/&scioq=Adaptive+Visual+Scene+Understanding:+Incremental+Scene+Graph+Generation&hl=en&as_sdt=0,38", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Agency for Science, Technology and Research;A*STAR", "aff_unique_dep": ";Institute for Infocomm Research", "aff_unique_url": "https://www.a-star.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "A*STAR;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "LEGO-Prover: Neural Theorem Proving with Growing Libraries", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19499", "id": "3f5PALef5B", "author_site": "Haiming Wang, Huajian Xin, Chuanyang Zheng, Zhengying Liu, Qingxing Cao, Yinya Huang, Jing Xiong, Han Shi, Enze Xie, Jian Yin, Zhenguo Li, Xiaodan Liang", "tldr": "", "abstract": "Despite the success of large language models (LLMs), the task of theorem proving still remains one of the hardest reasoning tasks that is far from being fully solved. Prior methods using language models have demonstrated promising results, but they still struggle to prove even middle school level theorems. One common limitation of these methods is that they assume a fixed theorem library during the whole theorem proving process. However, as we all know, creating new useful theorems or even new theories is not only helpful but crucial and necessary for advancing mathematics and proving harder and deeper results. In this work, we present LEGO-Prover, which employs a growing skill library containing verified lemmas as skills to augment the capability of LLMs used in theorem proving. By constructing the proof modularly, LEGO-Prover enables LLMs to utilize existing skills retrieved from the library and to create new skills during the proving process. These skills are further evolved (by prompting an LLM) to enrich the library on another scale. Modular and reusable skills are constantly added to the library to enable tackling increasingly intricate mathematical problems. Moreover, the learned library further bridges the gap between human proofs and formal proofs by making it easier to impute missing steps. LEGO-Prover advances the state-of-the-art pass rate on miniF2F-valid (48.0\\% to 57.0\\%) and miniF2F-test (45.5\\% to 50.0\\%). During the proving process, LEGO-Prover also generates over 20,000 skills (theorems/lemmas) and adds them to the growing library. Our ablation study indicates that these newly added skills are indeed helpful for proving theorems, resulting in a 4.9\\% improvement in success rate", "keywords": "Theorem proving;Large language model;Autoformalization", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "", "author": "Haiming Wang;Huajian Xin;Chuanyang Zheng;Zhengying Liu;Qingxing Cao;Yinya Huang;Jing Xiong;Han Shi;Enze Xie;Jian Yin;Zhenguo Li;Xiaodan Liang", "authorids": "~Haiming_Wang1;~Huajian_Xin1;~Chuanyang_Zheng3;~Zhengying_Liu2;~Qingxing_Cao1;~Yinya_Huang1;~Jing_Xiong4;~Han_Shi1;~Enze_Xie1;~Jian_Yin3;~Zhenguo_Li1;~Xiaodan_Liang2", "gender": "M;M;M;M;M;;M;M;M;M;F;M", "homepage": ";https://xinhuajian.wordpress.com/;https://chuanyang-zheng.github.io/;;;https://eleanor-h.github.io/;https://han-shi.github.io/;https://xieenze.github.io/;http://sai.sysu.edu.cn/teacher/teacher01/1385356.htm;http://www.ee.columbia.edu/~zgli/;https://www.sysu-hcp.net/;https://menik1126.github.io/", "dblp": "97/604;356/3551;;241/1782;149/7615;282/1562;;218/5441;95/578-1;23/6479;;", "google_scholar": "zDPqP6AAAAAJ;E5M9x8wAAAAJ;LWwh7K4AAAAJ;http:// DFme0joAAAAJ;flOBrd8AAAAJ;dWStaRIAAAAJ;https://scholar.google.com.hk/citations?user=Johp_14AAAAJ;42MVVPgAAAAJ;;XboZC1AAAAAJ;voxznZAAAAAJ;https://scholar.google.com.hk/citations?user=dFX1hXkAAAAJ", "orcid": ";;;;;0000-0002-0686-0832;;;;;;0000-0003-2986-6978", "linkedin": ";;;;;;;;;;;", "or_profile": "~Haiming_Wang1;~Huajian_Xin1;~Chuanyang_Zheng3;~Zhengying_Liu2;~Qingxing_Cao1;~Yinya_Huang1;~Han_Shi1;~Enze_Xie1;~Jian_Yin3;~Zhenguo_Li1;~Xiaodan_Liang2;~jing_xiong3", "aff": "SUN YAT-SEN UNIVERSITY;University of Edinburgh, University of Edinburgh;The Chinese University of Hong Kong;Huawei Technologies Ltd.;SUN YAT-SEN UNIVERSITY, Tsinghua University;City University of Hong Kong;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;SUN YAT-SEN UNIVERSITY;Huawei Noah's Ark Lab;SUN YAT-SEN UNIVERSITY;Sun Yat-Sen University", "aff_domain": "sysu.edu.cn;ed.ac.uk;cse.cuhk.edu.hk;huawei.com;sysu.edu.cn;cityu.edu.hk;huawei.com;huawei.com;sysu.edu.cn;huawei.com;sysu.edu.cn;sysu.edu.cn", "position": "PhD student;PhD student;PhD student;Researcher;Postdoc;Postdoc;Principal Researcher;Researcher;Full Professor;Principal Researcher;Associate Professor;MS student", "bibtex": "@inproceedings{\nwang2024legoprover,\ntitle={{LEGO}-Prover: Neural Theorem Proving with Growing Libraries},\nauthor={Haiming Wang and Huajian Xin and Chuanyang Zheng and Zhengying Liu and Qingxing Cao and Yinya Huang and Jing Xiong and Han Shi and Enze Xie and Jian Yin and Zhenguo Li and Xiaodan Liang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3f5PALef5B}\n}", "github": "", "project": "", "reviewers": "SGTy;Nn2P;UPjC;oKSy", "pdf_size": 2951810, "rating": "6;8;8;8", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "contribution": "3;4;4;3", "presentation": "3;2;3;3", "wc_summary": "92;97;73;519", "wc_strengths": "68;92;65;222", "wc_weaknesses": "106;236;31;1048", "wc_questions": "40;123;164;461", "wc_review": "306;548;333;2250", "wc_reply_reviewers": "21;106;24;582", "wc_reply_authors": "1532;1145;562;3177", "reply_reviewers": "1;2;1;1", "reply_authors": "5;5;2;7", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 195.25, 187.13147116399207 ], "wc_strengths_avg": [ 111.75, 64.50726703248247 ], "wc_weaknesses_avg": [ 355.25, 406.62843911856436 ], "wc_questions_avg": [ 197.0, 158.83167190456695 ], "wc_review_avg": [ 859.25, 808.4068823927713 ], "wc_reply_reviewers_avg": [ 183.25, 232.73093369812273 ], "wc_reply_authors_avg": [ 1604.0, 971.5912206272759 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.75, 1.7853571071357126 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18067121762305972500&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=3f5PALef5B", "pdf": "https://openreview.net/pdf?id=3f5PALef5B", "email": "sysu.edu.cn;ed.ac.uk;cse.cuhk.edu.hk;huawei.com;sysu.edu.cn;cityu.edu.hk;huawei.com;huawei.com;sysu.edu.cn;huawei.com;sysu.edu.cn;sysu.edu.cn", "author_num": 12, "aff_unique_index": "0;1;2;3;0;4;3;3;0;3;0;0", "aff_unique_norm": "Sun Yat-sen University;University of Edinburgh;Chinese University of Hong Kong;Huawei;City University of Hong Kong", "aff_unique_dep": ";;;Huawei Technologies;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.ed.ac.uk;https://www.cuhk.edu.hk;https://www.huawei.com;https://www.cityu.edu.hk", "aff_unique_abbr": "SYSU;Edinburgh;CUHK;Huawei;CityU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Detecting Machine-Generated Texts by Multi-Population Aware Optimization for Maximum Mean Discrepancy", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19498", "id": "3fEKavFsnv", "author_site": "Shuhai Zhang, Yiliao Song, Jiahao Yang, Yuanqing Li, Bo Han, Mingkui Tan", "tldr": "", "abstract": "Large language models (LLMs) such as ChatGPT have exhibited remarkable performance in generating human-like texts. However, machine-generated texts (MGTs) may carry critical risks, such as plagiarism issues and hallucination information. Therefore, it is very urgent and important to detect MGTs in many situations. Unfortunately, it is challenging to distinguish MGTs and human-written texts because the distributional discrepancy between them is often very subtle due to the remarkable performance of LLMS. In this paper, we seek to exploit \\textit{maximum mean discrepancy} (MMD) to address this issue in the sense that MMD can well identify distributional discrepancies. However, directly training a detector with MMD using diverse MGTs will incur a significantly increased variance of MMD since MGTs may contain \\textit{multiple text populations} due to various LLMs. This will severely impair MMD's ability to measure the difference between two samples. To tackle this, we propose a novel \\textit{multi-population} aware optimization method for MMD called MMD-MP, which can \\textit{avoid variance increases} and thus improve the stability to measure the distributional discrepancy. Relying on MMD-MP, we develop two methods for paragraph-based and sentence-based detection, respectively. Extensive experiments on various LLMs, \\eg, GPT2 and ChatGPT, show superior detection performance of our MMD-MP.", "keywords": "Large language models;Machine-generated text detection;Maximum mean discrepancy", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/502b3f4d4ad703826e0fac8873b805065226dfdb.pdf", "author": "Shuhai Zhang;Yiliao Song;Jiahao Yang;Yuanqing Li;Bo Han;Mingkui Tan", "authorids": "~Shuhai_Zhang1;~Yiliao_Song2;~Jiahao_Yang3;~Yuanqing_Li2;~Bo_Han1;~Mingkui_Tan2", "gender": "M;F;M;M;M;M", "homepage": "https://github.com/ZSHsh98;https://songyiliao.github.io/;https://github.com/TRISKEL10N;http://www2.scut.edu.cn/autonlaben/2015/0825/c5794a92900/page.htm;https://bhanml.github.io/;https://tanmingkui.github.io/", "dblp": "67/5655;186/7620;;51/2525.html;241/0472-3;49/2007", "google_scholar": "oNhLYoEAAAAJ;lKzKBHUAAAAJ;;https://scholar.google.com.sg/citations?user=wN3v1coAAAAJ;nTNjqHwAAAAJ;https://scholar.google.com.sg/citations?user=EVsoTGkAAAAJ", "orcid": "0000-0001-6877-3825;0000-0002-6633-2695;;;;0000-0001-8856-756X", "linkedin": ";;;;;", "or_profile": "~Shuhai_Zhang1;~Yiliao_Song2;~Jiahao_Yang3;~Yuanqing_Li2;~bo_han2;~Mingkui_Tan1", "aff": "South China University of Technology;Royal Melbourne Institute of Technology;South China University of Technology;South China University of Technology;MBZUAI;South China University of Technology", "aff_domain": "scut.edu.cn;rmit.edu.au;scut.edu.cn;scut.edu.cn;mbzuai.ac.ae;scut.edu.cn", "position": "PhD student;Postdoc;MS student;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2024detecting,\ntitle={Detecting Machine-Generated Texts by Multi-Population Aware Optimization for Maximum Mean Discrepancy},\nauthor={Shuhai Zhang and Yiliao Song and Jiahao Yang and Yuanqing Li and Bo Han and Mingkui Tan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3fEKavFsnv}\n}", "github": "", "project": "", "reviewers": "4WLs;DBzA;bnWc;1AtS", "pdf_size": 1680464, "rating": "6;6;6;8", "confidence": "4;3;3;3", "soundness": "3;3;2;4", "contribution": "3;4;2;3", "presentation": "4;3;3;3", "wc_summary": "259;101;182;69", "wc_strengths": "176;104;35;50", "wc_weaknesses": "228;42;258;154", "wc_questions": "132;70;282;127", "wc_review": "795;317;757;400", "wc_reply_reviewers": "428;0;249;0", "wc_reply_authors": "1536;1062;3370;1142", "reply_reviewers": "1;0;1;0", "reply_authors": "5;3;7;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 152.75, 73.88631470035571 ], "wc_strengths_avg": [ 91.25, 55.251131210139036 ], "wc_weaknesses_avg": [ 170.5, 83.28715387141044 ], "wc_questions_avg": [ 152.75, 78.49641711568752 ], "wc_review_avg": [ 567.25, 211.2301765846916 ], "wc_reply_reviewers_avg": [ 169.25, 180.69501238274398 ], "wc_reply_authors_avg": [ 1777.5, 936.7735852381834 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.5, 1.6583123951777 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13517469906614096814&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=3fEKavFsnv", "pdf": "https://openreview.net/pdf?id=3fEKavFsnv", "email": "scut.edu.cn;rmit.edu.au;scut.edu.cn;scut.edu.cn;mbzuai.ac.ae;scut.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "South China University of Technology;Royal Melbourne Institute of Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.scut.edu.cn;https://www.rmit.edu.au;https://www.mbzuai.ac.ae", "aff_unique_abbr": "SCUT;RMIT;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2;0", "aff_country_unique": "China;Australia;United Arab Emirates" }, { "id": "3fRbP8g2LT", "title": "Efficient Redundancy-Free Graph Networks: Higher Expressiveness and Less Over-Squashing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Message Passing Neural Networks (MPNNs) effectively learn graph structures. However, their message passing mechanism introduces redundancy, limiting expressiveness, and causing over-squashing. Prior research has addressed the problem of redundancy but often at the cost of increased complexity. Improving expressiveness and addressing over-squashing remain major concerns in MPNN research with significant room for improvement. This study explores the nature of message passing redundancy and presents efficient solutions through two surrogate structures: Directed Line Graph (DLG) and Directed Acyclic Line Graph (DALG). The surogate structures introduce two corresponding models, Directed Line Graph Network (DLGN) and Efficient Redundancy-Free Graph Network (ERFGN). DLGN, utilizing DLGs, achieves redundancy-free message passing for graphs with a minimum cycle size of \\(L\\) when composed of $L$ layers. ERFGN, on the other hand, leverages DALGs to achieve fully redundancy-free message passing and possesses the expressiveness to distinguish arbitrary graphs under certain conditions. Furthermore, we enhance the expressiveness of ERFGN by incorporating cycle modeling and global attention, thereby achieving higher-order expressiveness. The efficiency and efficacy of these models in improving expressiveness and mitigating over-squashing are analysed theoretically. Empirical results on realistic datasets validate the proposed methods.", "keywords": "graph neural network;redundancy-free message passing;expressiveness;over-squashing", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Rongqin Chen;Yan Li;Leong Hou U;Ye Li", "authorids": "~Rongqin_Chen1;~Yan_Li23;~Leong_Hou_U2;~Ye_Li4", "gender": "M;;M;", "homepage": ";;https://www.fst.um.edu.mo/personal/ryanlhu/;", "dblp": ";;38/4996;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-8498-0346;;0000-0002-5135-5165;", "linkedin": ";;;", "or_profile": "~Rongqin_Chen1;~Yan_Li23;~Leong_Hou_U2;~Ye_Li4", "aff": "University of Macau;;University of macau;", "aff_domain": "um.edu.mo;;um.edu.mo;", "position": "PhD student;;Associate Professor;", "bibtex": "@misc{\nchen2024efficient,\ntitle={Efficient Redundancy-Free Graph Networks: Higher Expressiveness and Less Over-Squashing},\nauthor={Rongqin Chen and Yan Li and Leong Hou U and Ye Li},\nyear={2024},\nurl={https://openreview.net/forum?id=3fRbP8g2LT}\n}", "github": "", "project": "", "reviewers": "Gehp;vob1;WgZE;Dnf7", "site": "https://openreview.net/forum?id=3fRbP8g2LT", "pdf_size": 662905, "rating": "3;5;6;6", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "contribution": "2;2;3;3", "presentation": "2;1;3;3", "wc_summary": "39;47;53;49", "wc_strengths": "33;48;112;15", "wc_weaknesses": "619;175;187;38", "wc_questions": "15;134;102;116", "wc_review": "706;404;454;218", "wc_reply_reviewers": "0;651;90;0", "wc_reply_authors": "1362;592;968;454", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;3;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 47.0, 5.0990195135927845 ], "wc_strengths_avg": [ 52.0, 36.55817282086182 ], "wc_weaknesses_avg": [ 254.75, 218.29381003592383 ], "wc_questions_avg": [ 91.75, 45.740436158829965 ], "wc_review_avg": [ 445.5, 174.2204063822605 ], "wc_reply_reviewers_avg": [ 185.25, 271.3994979730066 ], "wc_reply_authors_avg": [ 844.0, 353.3072317403084 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:m_Qs0_errloJ:scholar.google.com/&scioq=Efficient+Redundancy-Free+Graph+Networks:+Higher+Expressiveness+and+Less+Over-Squashing&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Macau", "aff_unique_dep": "", "aff_unique_url": "https://www.um.edu.mo", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Macau SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "3husFxdHI1", "title": "Duality of Information Flow: Insights in Graphical Models and Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "This research highlights the convergence of probabilistic graphical models and neural networks, shedding light on their inherent similarities and interactions. By interpreting Bayesian neural networks within the framework of Markov random fields, we uncovered deep connections between message passing and neural network propagation. Our exploration unveiled a striking equivalence between gradients in neural networks and posterior-prior differences in graphical models. Empirical evaluations across diverse scenarios and datasets showcased the efficacy and generalizability of our approach. This work introduces a novel perspective on Bayesian Neural Networks and probabilistic graphical models, offering insights that could pave the way for enhanced models and a deeper understanding of their relationship.", "keywords": "Bayesian neural network;Probabilistic graphical models;Message-passing algorithm;Langevin dynamics;Fokker-Planck dynamics", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/c0f190b5d64a1116f7ebb66ca95e10746c21d5c3.zip", "author": "Wen Dong", "authorids": "~Wen_Dong1", "gender": "M", "homepage": "https://www.cse.buffalo.edu/~wendong/", "dblp": "84/3520-1", "google_scholar": "https://scholar.google.com.tw/citations?user=UBrg28IAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Wen_Dong1", "aff": "State University of New York, Buffalo", "aff_domain": "buffalo.edu", "position": "Assistant Professor", "bibtex": "@misc{\ndong2024duality,\ntitle={Duality of Information Flow: Insights in Graphical Models and Neural Networks},\nauthor={Wen Dong},\nyear={2024},\nurl={https://openreview.net/forum?id=3husFxdHI1}\n}", "github": "", "project": "", "reviewers": "2p2Z;kosy;Ac7q", "site": "https://openreview.net/forum?id=3husFxdHI1", "pdf_size": 664656, "rating": "3;5;6", "confidence": "3;3;2", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "2;2;2", "wc_summary": "48;65;70", "wc_strengths": "34;63;49", "wc_weaknesses": "68;190;115", "wc_questions": "255;167;8", "wc_review": "405;485;242", "wc_reply_reviewers": "0;0;36", "wc_reply_authors": "644;632;525", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 61.0, 9.41629792788369 ], "wc_strengths_avg": [ 48.666666666666664, 11.841546445554407 ], "wc_weaknesses_avg": [ 124.33333333333333, 50.24163833139025 ], "wc_questions_avg": [ 143.33333333333334, 102.21654573610977 ], "wc_review_avg": [ 377.3333333333333, 101.114896144051 ], "wc_reply_reviewers_avg": [ 12.0, 16.97056274847714 ], "wc_reply_authors_avg": [ 600.3333333333334, 53.493509471295255 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-2VlTl7RZ0IJ:scholar.google.com/&scioq=Duality+of+Information+Flow:+Insights+in+Graphical+Models+and+Neural+Networks&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "State University of New York at Buffalo", "aff_unique_dep": "", "aff_unique_url": "https://www.buffalo.edu", "aff_unique_abbr": "SUNY Buffalo", "aff_campus_unique_index": "0", "aff_campus_unique": "Buffalo", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "3i7iNGxw6r", "title": "Where Does In-context Machine Translation Happen in Large Language Models?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-supervised large language models have demonstrated the ability to perform Machine Translation (MT) via in-context learning, but little is known about where the model performs MT with respect to prompt instructions and demonstration examples.\nIn this work, we attempt to characterize the region in layer-wise attention heads where GPT models transition from in-context learners to translation models. Through a series of layer-wise context-masking experiments on GPTNeo2.7B and Bloom3B, we demonstrate evidence of a \"task recognition\" point where the translation task is encoded into the input representations and attention to context is no longer necessary. Our layer-wise fine-tuning experiments indicate that the most effective layers for MT fine-tuning are the layers critical to task recognition. Next, we examine redundancy in layers following task recognition, observing that masking these later layers does not hurt performance significantly. Finally, we train discrete attention head gates with $L_0$ regularisation and find evidence that the most pruneable heads occur after task recognition.", "keywords": "In-context Machine Translation;Interpretability", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Suzanna Sia;David Mueller;Kevin Duh", "authorids": "~Suzanna_Sia1;~David_Mueller1;~Kevin_Duh1", "gender": ";M;M", "homepage": ";https://www.damueller.com;https://cs.jhu.edu/~kevinduh/", "dblp": ";224/2296;58/3217", "google_scholar": ";TMv0Lw8AAAAJ;M3BSiiQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Suzanna_Sia1;~David_Mueller1;~Kevin_Duh1", "aff": ";Johns Hopkins University;Johns Hopkins University", "aff_domain": ";jhu.edu;jhu.edu", "position": ";PhD student;Assistant Research Professor", "bibtex": "@misc{\nsia2024where,\ntitle={Where Does In-context Machine Translation Happen in Large Language Models?},\nauthor={Suzanna Sia and David Mueller and Kevin Duh},\nyear={2024},\nurl={https://openreview.net/forum?id=3i7iNGxw6r}\n}", "github": "", "project": "", "reviewers": "DzYR;tYJG;JM59;HfyK", "site": "https://openreview.net/forum?id=3i7iNGxw6r", "pdf_size": 1746934, "rating": "3;5;6;6", "confidence": "3;4;4;4", "soundness": "2;3;4;2", "contribution": "2;2;3;2", "presentation": "2;2;4;3", "wc_summary": "82;106;79;149", "wc_strengths": "34;42;48;68", "wc_weaknesses": "540;486;20;103", "wc_questions": "84;125;217;22", "wc_review": "740;759;364;342", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "480;677;173;415", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 104.0, 28.008927148321835 ], "wc_strengths_avg": [ 48.0, 12.569805089976535 ], "wc_weaknesses_avg": [ 287.25, 228.4484351007903 ], "wc_questions_avg": [ 112.0, 70.84842976382751 ], "wc_review_avg": [ 551.25, 198.51621470298088 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 436.25, 180.01857543042607 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mRnNXcCmdAMJ:scholar.google.com/&scioq=Where+Does+In-context+Machine+Translation+Happen+in+Large+Language+Models%3F&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "3ijmMNaSJk", "title": "Towards Understanding Masked Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the realm of self-supervised learning, Masked Image Modeling (MIM) serves as a \n viable approach for mitigating the dependency on large-scale annotated data, \n while demonstrating efficacy across a broad spectrum of downstream tasks. \n A recent variant of MIM known as Masked Distillation (MD) has \n emerged, which utilizes semantic features \n instead of low-level features as the supervision. Although prior work \n has demonstrated its effectiveness in various downstream tasks, the underlying mechanisms \n for its performance improvements remain unclear. Our investigation reveals that \n Masked Distillation mitigates multiple forms of overfitting present in the\n original models, including but not limited to attention homogenization \n and the representation folding of high layers. Further, we uncover that \n Masked Distillation introduces beneficial inductive biases stemming \n from MIM, which are believed to \n contribute positively to model performance. We also \n analyze the nuances of the model architecture design and decision-making tendencies \n in Masked Distillation, revealing inconsistencies with previous research findings.", "keywords": "representation learning;computer vision", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/6f998d60595f439efd7867f3369e32f1d081f12b.zip", "author": "Tuo Chen;Jie Gui", "authorids": "~Tuo_Chen1;~Jie_Gui1", "gender": ";M", "homepage": ";https://guijiejie.github.io/index.html", "dblp": "125/7619;45/794", "google_scholar": "https://scholar.google.com/citations?hl=en;f8oE8NgAAAAJ", "orcid": ";0000-0002-9450-1759", "linkedin": ";", "or_profile": "~Tuo_Chen1;~Jie_Gui1", "aff": "Southeast University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@misc{\nchen2024towards,\ntitle={Towards Understanding Masked Distillation},\nauthor={Tuo Chen and Jie Gui},\nyear={2024},\nurl={https://openreview.net/forum?id=3ijmMNaSJk}\n}", "github": "", "project": "", "reviewers": "65e1;2wDN;hmEG;xu9z", "site": "https://openreview.net/forum?id=3ijmMNaSJk", "pdf_size": 1248944, "rating": "1;3;3;5", "confidence": "3;4;5;4", "soundness": "1;3;1;2", "contribution": "1;2;1;2", "presentation": "1;3;3;3", "wc_summary": "75;52;56;97", "wc_strengths": "23;44;9;39", "wc_weaknesses": "183;59;123;127", "wc_questions": "14;28;392;109", "wc_review": "295;183;580;372", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 70.0, 17.84656829757475 ], "wc_strengths_avg": [ 28.75, 13.790848414800301 ], "wc_weaknesses_avg": [ 123.0, 43.9089968002003 ], "wc_questions_avg": [ 135.75, 152.32592523927107 ], "wc_review_avg": [ 357.5, 144.97672226947333 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dPtV1gG3T9gJ:scholar.google.com/&scioq=Towards+Understanding+Masked+Distillation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "3j5bsiwRv6", "title": "Sparse Refinement for Efficient High-Resolution Semantic Segmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Semantic segmentation empowers numerous real-world applications, such as autonomous driving and augmented/mixed reality. These applications often operate on high-resolution images (e.g., 8 megapixels) to capture the fine details. However, this comes at the cost of considerable computational complexity, hindering the deployment in latency-sensitive scenarios. In this paper, we introduce SparseRefine, a novel approach that enhances dense low-resolution predictions with sparse high-resolution refinements. Based on coarse low-resolution outputs, SparseRefine first uses an entropy selector to identify a sparse set of pixels with the least confidence. It then employs a sparse feature extractor to efficiently generate the refinements for those pixels of interest. Finally, it leverages a gated ensembler to apply these sparse refinements to the initial coarse predictions. SparseRefine can be seamlessly integrated into any existing semantic segmentation model, regardless of CNN- or ViT-based. SparseRefine achieves significant speedup: 1.5 to 3.9 times when applied to HRNet-W48, SegFormer-B5, Mask2Former-T/L and SegNeXt-L on Cityscapes, with negligible to no loss of accuracy. We will release the code to reproduce our results. We hope that our \"dense+sparse\" paradigm could inspire future research on efficient high-resolution visual computing.", "keywords": "Efficient machine learning;Semantic segmentation;Sparsity;Efficient model design;Model compression and acceleration", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/13b5d296154331dfc9827e106ba8e5fc10072d2f.pdf", "author": "Zhijian Liu;Zhuoyang Zhang;Shang Yang;Haotian Tang;Chenfeng Xu;Kurt Keutzer;Song Han", "authorids": "~Zhijian_Liu1;~Zhuoyang_Zhang1;~Shang_Yang1;~Haotian_Tang1;~Chenfeng_Xu1;~Kurt_Keutzer1;~Song_Han5", "gender": "M;M;M;M;M;M;", "homepage": "https://zhijianliu.com;https://hanlab.mit.edu/team/zhuoyang-zhang;;http://kentang.net;;https://people.eecs.berkeley.edu/~keutzer/;", "dblp": ";;79/9960;245/0058;65/1881;k/KurtKeutzer.html;", "google_scholar": "mwzYYPgAAAAJ;Q1csT-8AAAAJ;https://scholar.google.com.hk/citations?user=p71ikL4AAAAJ;WxL13BAAAAAJ;RpqvaTUAAAAJ;ID9QePIAAAAJ;", "orcid": ";;;;0000-0002-4941-6985;0000-0003-3868-8501;", "linkedin": "zhijianliu/;zhuoyang-zhang/;;;;kurtkeutzer/;", "or_profile": "~Zhijian_Liu1;~Zhuoyang_Zhang1;~Shang_Yang1;~Haotian_Tang1;~Chenfeng_Xu1;~Kurt_Keutzer1;~Song_Han5", "aff": "Massachusetts Institute of Technology;IIIS, Tsinghua University;Massachusetts Institute of Technology;NVIDIA;University of California, Berkeley;University of California, Berkeley;", "aff_domain": "mit.edu;mails.tsinghua.edu.cn;mit.edu;nvidia.com;berkeley.edu;berkeley.edu;", "position": "PhD student;Undergrad student;PhD student;Intern;PhD student;Full Professor;", "bibtex": "@misc{\nliu2024sparse,\ntitle={Sparse Refinement for Efficient High-Resolution Semantic Segmentation},\nauthor={Zhijian Liu and Zhuoyang Zhang and Shang Yang and Haotian Tang and Chenfeng Xu and Kurt Keutzer and Song Han},\nyear={2024},\nurl={https://openreview.net/forum?id=3j5bsiwRv6}\n}", "github": "", "project": "", "reviewers": "Jr1p;7yKn;c8GE;pDrt", "site": "https://openreview.net/forum?id=3j5bsiwRv6", "pdf_size": 26083187, "rating": "5;5;6;6", "confidence": "5;4;4;4", "soundness": "3;3;3;4", "contribution": "3;2;3;3", "presentation": "4;3;3;4", "wc_summary": "65;55;93;68", "wc_strengths": "72;46;87;124", "wc_weaknesses": "120;97;52;58", "wc_questions": "64;45;114;31", "wc_review": "321;243;346;281", "wc_reply_reviewers": "101;0;30;133", "wc_reply_authors": "613;661;708;766", "reply_reviewers": "1;0;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 70.25, 13.988834833537782 ], "wc_strengths_avg": [ 82.25, 28.216794644324857 ], "wc_weaknesses_avg": [ 81.75, 28.039035290109393 ], "wc_questions_avg": [ 63.5, 31.42053468672995 ], "wc_review_avg": [ 297.75, 39.20060586266493 ], "wc_reply_reviewers_avg": [ 66.0, 53.30572201931046 ], "wc_reply_authors_avg": [ 687.0, 56.64362276549762 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3823173143449573058&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;2;3;3", "aff_unique_norm": "Massachusetts Institute of Technology;Tsinghua University;NVIDIA;University of California, Berkeley", "aff_unique_dep": ";Institute for Interdisciplinary Information Sciences;NVIDIA Corporation;", "aff_unique_url": "https://web.mit.edu;https://www.tsinghua.edu.cn;https://www.nvidia.com;https://www.berkeley.edu", "aff_unique_abbr": "MIT;THU;NVIDIA;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "3jXCF5dNpC", "title": "Re-Reading Improves Reasoning in Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reasoning presents a significant and challenging issue for Large Language Models (LLMs). The predominant focus of research has revolved around developing diverse prompting strategies to guide and structure the reasoning processes of LLMs. However, these approaches based on decoder-only causal language models often operate the input question in a single forward pass, potentially missing the rich, back-and-forth interactions inherent in human reasoning. Scant attention has been paid to a critical dimension, i.e., the input question itself embedded within the prompts. In response, we introduce a seemingly straightforward yet remarkably effective prompting strategy\u2014Re2, which involves re-reading the question. Drawing inspiration from human learning and problem-solving, re-reading entails revisiting the question information embedded within input prompts. This approach aligns seamlessly with the cognitive principle of reinforcement, enabling LLMs to understand the input in a ''bidirectional'' manner, extract deeper insights, and ultimately enhance their reasoning capabilities across various tasks. Experiments conducted on a series of reasoning benchmarks serve to underscore the effectiveness {and generality} of our method. Moreover, our findings demonstrate that our approach seamlessly integrates with various language models, {thought-eliciting} prompting methods, and ensemble techniques, further underscoring its versatility and compatibility in the realm of LLMs.", "keywords": "Large Language Model;Reasoning", "primary_area": "generative models", "supplementary_material": "", "author": "Xiaohan Xu;Chongyang Tao;Tao Shen;Can Xu;Hongbo Xu;Guodong Long;Jian-Guang Lou", "authorids": "~Xiaohan_Xu1;~Chongyang_Tao1;~Tao_Shen1;~Can_Xu2;~Hongbo_Xu3;~Guodong_Long2;~Jian-Guang_Lou1", "gender": "M;M;M;M;M;M;M", "homepage": ";;;https://people.ucas.ac.cn/~xuhongbo;https://www.uts.edu.au/staff/guodong.long;https://www.microsoft.com/en-us/research/people/jlou/;https://tebmer.github.io", "dblp": ";95/4097-1;;https://dblp.org/search?q=Hongbo+Xu+Tingwen+Liu;34/10089;37/1917;268/5155", "google_scholar": "x_cOKuwAAAAJ;https://scholar.google.com.au/citations?user=SegyX9AAAAAJ;5aiE_NcAAAAJ;;https://scholar.google.com.au/citations?user=Pl8m7hMAAAAJ;alDxINIAAAAJ;iKf_N9oAAAAJ", "orcid": ";;0000-0002-1949-5715;0000-0002-0258-7840;0000-0003-3740-9515;;", "linkedin": ";;;;;;", "or_profile": "~Chongyang_Tao1;~Tao_Shen1;~Can_Xu2;~Hongbo_Xu3;~Guodong_Long2;~Jian-Guang_Lou1;~Shawn_Xu1", "aff": "Microsoft;University of Technology Sydney;Microsoft;Institute of Information Engineering;University of Technology Sydney;Microsoft Research Asia;University of Chinese Academy of Sciences", "aff_domain": "microsoft.com;uts.edu.au;microsoft.com;iie.ac.cn;uts.edu.au;microsoft.com;ucas.ac.cn", "position": "Researcher;Postdoc;Researcher;Full Professor;Associate Professor;Principal Researcher;MS student", "bibtex": "@misc{\nxu2024rereading,\ntitle={Re-Reading Improves Reasoning in Language Models},\nauthor={Xiaohan Xu and Chongyang Tao and Tao Shen and Can Xu and Hongbo Xu and Guodong Long and Jian-Guang Lou},\nyear={2024},\nurl={https://openreview.net/forum?id=3jXCF5dNpC}\n}", "github": "", "project": "", "reviewers": "hwHe;KNMq;ZszC", "site": "https://openreview.net/forum?id=3jXCF5dNpC", "pdf_size": 750659, "rating": "5;5;8", "confidence": "3;3;5", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "3;4;4", "wc_summary": "87;33;62", "wc_strengths": "65;32;59", "wc_weaknesses": "131;81;95", "wc_questions": "63;51;139", "wc_review": "346;197;355", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1230;995;1405", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 60.666666666666664, 22.065558884580486 ], "wc_strengths_avg": [ 52.0, 14.352700094407323 ], "wc_weaknesses_avg": [ 102.33333333333333, 21.06075866524175 ], "wc_questions_avg": [ 84.33333333333333, 38.96437118987322 ], "wc_review_avg": [ 299.3333333333333, 72.45381670799377 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1210.0, 167.97817318528817 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1557878389521115921&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;1;0;3", "aff_unique_norm": "Microsoft;University of Technology Sydney;Institute of Information Engineering;University of Chinese Academy of Sciences", "aff_unique_dep": "Microsoft Corporation;;;", "aff_unique_url": "https://www.microsoft.com;https://www.uts.edu.au;;http://www.ucas.ac.cn", "aff_unique_abbr": "Microsoft;UTS;;UCAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;1;3;3", "aff_country_unique": "United States;Australia;;China" }, { "id": "3k6raldhEd", "title": "A Best-of-Both-Worlds Algorithm for MDPs with Long-Term Constraints", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study online learning in episodic constrained Markov decision processes (CMDPs), where the goal of the learner is to collect as much reward as possible over the episodes, while guaranteeing that some long-term constraints are satisfied during the learning process. Rewards and constraints can be selected either stochastically or adversarially, and the transition function is not known to the learner.\nWhile online learning in classical (unconstrained) MDPs has received considerable attention over the last years, the setting of CMDPs is still largely unexplored. This is surprising, since in real-world applications, such as, e.g., autonomous driving, automated bidding, and recommender systems, there are usually additional constraints and specifications that an agent has to obey during the learning process.\nIn this paper, we provide the first best-of-both-worlds algorithm for CMDPs with long-term constraints. Our algorithm is capable of handling settings in which rewards and constraints are selected either stochastically or adversarially, without requiring any knowledge of the underling process. Moreover, our algorithm matches state-of-the-art regret and constraint violation bounds for settings in which constraints are selected stochastically, while it is the first to provide guarantees in the case in which they are chosen adversarially.", "keywords": "constrained;mdp;cmdp;markov decision process;online learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/6515e92a28fc64fd0d7b648872e9108da27be243.zip", "author": "Francesco Emanuele Stradi;Jacopo Germano;Gianmarco Genalti;Matteo Castiglioni;Alberto Marchesi;Nicola Gatti", "authorids": "~Francesco_Emanuele_Stradi1;~Jacopo_Germano1;~Gianmarco_Genalti1;~Matteo_Castiglioni1;~Alberto_Marchesi1;~Nicola_Gatti1", "gender": "M;M;Not Specified;;M;M", "homepage": "https://francescoemanuelestradi.github.io;https://www.deib.polimi.it/eng/people/details/1132094;;https://castiglionimatteo.github.io;https://albymarke.github.io;https://www4.ceda.polimi.it/manifesti/manifesti/controller/ricerche/RicercaPerDocentiPublic.do?k_doc=75785&lang=EN&EVN_PRODOTTI=evento&__pj0=0&__pj1=d918ee8916afbd0005f5c0bc3c0ff350", "dblp": "345/9650;;;225/7720;204/1718;g/NicolaGatti", "google_scholar": "JYdi_FMAAAAJ;;b4UMI8kAAAAJ;https://scholar.google.it/citations?user=NPE3HAYAAAAJ;vXDtCzoAAAAJ;https://scholar.google.com.tw/citations?user=j-HrYREAAAAJ", "orcid": ";;;0000-0002-1070-6766;;0000-0001-7349-3932", "linkedin": "francesco-emanuele-stradi-bb35b0222/;;gianmarco-genalti-26328a1a4/;;;nicola-gatti-1284b21", "or_profile": "~Francesco_Emanuele_Stradi1;~Jacopo_Germano1;~Gianmarco_Genalti1;~Matteo_Castiglioni1;~Alberto_Marchesi1;~Nicola_Gatti1", "aff": "Polytechnic Institute of Milan;Polytechnic Institute of Milan;Polytechnic Institute of Milan;Politecnico di Milano;Politecnico di Milano;Polytechnic Institute of Milan", "aff_domain": "polimi.it;polimi.it;polimi.it;polimi.it;polimi.it;polimi.it", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nstradi2024a,\ntitle={A Best-of-Both-Worlds Algorithm for {MDP}s with Long-Term Constraints},\nauthor={Francesco Emanuele Stradi and Jacopo Germano and Gianmarco Genalti and Matteo Castiglioni and Alberto Marchesi and Nicola Gatti},\nyear={2024},\nurl={https://openreview.net/forum?id=3k6raldhEd}\n}", "github": "", "project": "", "reviewers": "KHRX;Txce;r196;bJQa", "site": "https://openreview.net/forum?id=3k6raldhEd", "pdf_size": 401706, "rating": "3;5;6;6", "confidence": "5;3;3;3", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "71;76;79;79", "wc_strengths": "52;53;60;55", "wc_weaknesses": "184;50;8;114", "wc_questions": "71;33;34;25", "wc_review": "378;212;181;273", "wc_reply_reviewers": "206;75;0;0", "wc_reply_authors": "2575;913;302;639", "reply_reviewers": "1;1;0;0", "reply_authors": "5;2;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.25, 3.2691742076555053 ], "wc_strengths_avg": [ 55.0, 3.082207001484488 ], "wc_weaknesses_avg": [ 89.0, 66.58077800686922 ], "wc_questions_avg": [ 40.75, 17.80975856096876 ], "wc_review_avg": [ 261.0, 75.22300180131074 ], "wc_reply_reviewers_avg": [ 70.25, 84.14385004265017 ], "wc_reply_authors_avg": [ 1107.25, 874.6011591005354 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Kpb1njGELlkJ:scholar.google.com/&scioq=A+Best-of-Both-Worlds+Algorithm+for+MDPs+with+Long-Term+Constraints&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Polytechnic Institute of Milan;Politecnico di Milano", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it/;https://www.polimi.it", "aff_unique_abbr": "Politecnico di Milano;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Italy" }, { "id": "3klVRLhK7w", "title": "Budgeted Online Continual Learning by Adaptive Layer Freezing and Frequency-based Sampling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Majority of online continual learning (CL) places restrictions on the size of replay memory and a single-epoch training to ensure a prompt update of the model. However, the single-epoch training may imply a different amount of computations per CL algorithm, and additional storage for storing logit or model in addition to replay memory is largely ignored as a storage budget. Here, we used floating point operations (FLOPs) and total memory size in Byte as a metric for computational and memory budgets, respectively, to compare CL algorithms with the same total budget. Interestingly, we found that the new and advanced algorithms often perform worse than simple baselines under the same budget, implying that their value is less beneficial in real-world deployment. To improve the accuracy of online continual learners in the same budget, we propose an adaptive layer freezing and frequency-based memory retrieval for episodic memory usage for a storage- and computationally-efficient online CL algorithm. The proposed adaptive layer freezing does not update the layers for less informative batches to reduce computational cost with a negligible loss of accuracy. The proposed memory retrieval balances the training usage count of samples in episodic memory with a negligible computational and memory cost. In extensive empirical validations using CIFAR-10/100, CLEAR-10, and ImageNet-1K datasets, we demonstrate that the proposed method outperforms the state-of-the-art in the same total budget.", "keywords": "continual learning;constraint;layer freezing;efficient learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Minhyuk Seo;Hyunseo Koh;Jonghyun Choi", "authorids": "~Minhyuk_Seo1;~Hyunseo_Koh1;~Jonghyun_Choi1", "gender": "M;M;M", "homepage": "https://dbd05088.github.io/;;https://ppolon.github.io/", "dblp": "350/4104;304/4369;21/11103", "google_scholar": "ayDPR-gAAAAJ;Mi4cMxgAAAAJ;uiGWnm4AAAAJ", "orcid": ";0000-0002-2576-1581;0000-0002-7934-8434", "linkedin": "minhyuk-seo-59ba11247/;%ED%98%84%EC%84%9C-%EA%B3%A0-66298a221/;jonghyun-choi-459bb615/", "or_profile": "~Minhyuk_Seo1;~Hyunseo_Koh1;~Jonghyun_Choi1", "aff": "Yonsei University;Gwangju Institute of Science and Technology;Yonsei University", "aff_domain": "yonsei.ac.kr;gist.ac.kr;yonsei.ac.kr", "position": "MS student;PhD student;Associate Professor", "bibtex": "@misc{\nseo2024budgeted,\ntitle={Budgeted Online Continual Learning by Adaptive Layer Freezing and Frequency-based Sampling},\nauthor={Minhyuk Seo and Hyunseo Koh and Jonghyun Choi},\nyear={2024},\nurl={https://openreview.net/forum?id=3klVRLhK7w}\n}", "github": "", "project": "", "reviewers": "7DQF;htG8;sCXL", "site": "https://openreview.net/forum?id=3klVRLhK7w", "pdf_size": 722369, "rating": "5;5;5", "confidence": "2;2;4", "soundness": "2;2;2", "contribution": "2;2;2", "presentation": "3;2;2", "wc_summary": "78;116;54", "wc_strengths": "39;30;34", "wc_weaknesses": "3;48;309", "wc_questions": "112;159;269", "wc_review": "232;353;666", "wc_reply_reviewers": "16;27;315", "wc_reply_authors": "663;839;2557", "reply_reviewers": "1;1;2", "reply_authors": "2;2;5", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 82.66666666666667, 25.525586292102197 ], "wc_strengths_avg": [ 34.333333333333336, 3.6817870057290873 ], "wc_weaknesses_avg": [ 120.0, 134.89996293550269 ], "wc_questions_avg": [ 180.0, 65.79260343432738 ], "wc_review_avg": [ 417.0, 182.86789402917796 ], "wc_reply_reviewers_avg": [ 119.33333333333333, 138.4300866462522 ], "wc_reply_authors_avg": [ 1353.0, 854.3832083243833 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7960869026184940243&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Yonsei University;Gwangju Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.yonsei.ac.kr;https://www.gist.ac.kr", "aff_unique_abbr": "Yonsei;GIST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Gwangju", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "3mDe5o24BM", "title": "HFDream: Improving 3D Generation via Human-Assisted Multi-view Text-to-Image Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large-scale text-to-image models have demonstrated the potential for performing text-to-3D synthesis. However, existing approaches, e.g., DreamFusion, suffer from unstable 3D optimization due to the limitations of current text-to-image models that they struggle to synthesize images from certain viewpoints even when specified in the text prompt. Obtaining a view-aligned image-text pair dataset is challenging due to the limited availability of such data, and the inherent subjectivity and ambiguity of view-alignment. In this paper, we propose to enhance text-to- 3D generation by learning from human feedback for generating desired views. We generate multi-view images with the text-to-image model and engage human labelers to select a valid viewpoint. Using the human-labeled dataset, we train a reward model designed to verify whether the generated image aligns with the viewpoint specified in the text prompt. Finally, we fine-tune the text-to-image model to maximize the reward score. We find that our text-to-image diffusion models fine-tuned with human feedback, coined HFDream, consistently generate diverse viewpoints without the need for multi-view datasets created from 3D assets. This leads to high-quality text-to-3D generations with consistent geometry, when combined with view-dependent prompting in DreamFusion.", "keywords": "Learning from Human Feedback;Text-to-3D generation;Diffusion Model", "primary_area": "generative models", "supplementary_material": "/attachment/f74c02c92af065395df80d08a5d14c862d8658aa.zip", "author": "June Suk Choi;Kyungmin Lee;DongJun Lee;Jinwoo Shin;Kimin Lee", "authorids": "~June_Suk_Choi1;~Kyungmin_Lee1;~DongJun_Lee2;~Jinwoo_Shin1;~Kimin_Lee1", "gender": "M;M;M;M;M", "homepage": "https://choi403.github.io/;https://kyungmnlee.github.io/;https://dgjun32.github.io/;https://sites.google.com/site/mijirim/;https://sites.google.com/view/kiminlee", "dblp": ";57/5118;;31/7062;183/6849", "google_scholar": ";6dpime0AAAAJ;;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ;92M8xv4AAAAJ", "orcid": ";;;;", "linkedin": "william-june-suk-choi-b03158350/;;;;", "or_profile": "~June_Suk_Choi1;~Kyungmin_Lee1;~DongJun_Lee2;~Jinwoo_Shin1;~Kimin_Lee1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.ac.kr;korea.ac.kr;kaist.ac.kr;kaist.edu", "position": "MS student;PhD student;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@misc{\nchoi2024hfdream,\ntitle={{HFD}ream: Improving 3D Generation via Human-Assisted Multi-view Text-to-Image Models},\nauthor={June Suk Choi and Kyungmin Lee and DongJun Lee and Jinwoo Shin and Kimin Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=3mDe5o24BM}\n}", "github": "", "project": "", "reviewers": "L7ZF;PzSD;T5e1", "site": "https://openreview.net/forum?id=3mDe5o24BM", "pdf_size": 29630145, "rating": "5;5;6", "confidence": "5;5;4", "soundness": "2;3;3", "contribution": "2;2;2", "presentation": "2;3;3", "wc_summary": "28;52;99", "wc_strengths": "19;29;68", "wc_weaknesses": "126;205;160", "wc_questions": "16;3;38", "wc_review": "189;289;365", "wc_reply_reviewers": "0;0;18", "wc_reply_authors": "764;535;546", "reply_reviewers": "0;0;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 59.666666666666664, 29.48822740612863 ], "wc_strengths_avg": [ 38.666666666666664, 21.139746660943903 ], "wc_weaknesses_avg": [ 163.66666666666666, 32.355662392986005 ], "wc_questions_avg": [ 19.0, 14.445299120013633 ], "wc_review_avg": [ 281.0, 72.07403600927776 ], "wc_reply_reviewers_avg": [ 6.0, 8.48528137423857 ], "wc_reply_authors_avg": [ 615.0, 105.4545715778442 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18052567484890569273&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Korea University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.korea.ac.kr", "aff_unique_abbr": "KAIST;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "3mXJ9o2DNx", "title": "Connecting Domains and Contrasting Samples: A Ladder for Domain Generalization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Distribution shifts between training and testing datasets, contrary to classical machine learning assumptions, frequently occur in practice and impede model generalization performance. Studies on domain generalization (DG) thereby arise, aiming to predict the label on unseen target domain data by only using data from source domains. In the meanwhile, the contrastive learning (CL) technique, which prevails in self-supervised pre-training, can align different augmentation of samples to obtain invariant representation. It is intuitive to consider the class-separated representations learned in CL are able to improve domain generalization, while the reality is quite the opposite: people observe directly applying CL deteriorates the performance. We analyze the phenomenon with the CL theory and discover the lack of domain connectivity in the DG setting causes the deficiency. Thus we propose domain-connecting contrastive learning (\\model) to enhance the conceptual connectivity across domains and obtain generalizable representations for DG. Specifically, more aggressive data augmentation and cross-domain positive samples are introduced into self-contrastive learning to improve domain connectivity. Furthermore, to better embed the unseen test domains, we propose model anchoring to exploit the domain connectivity in pre-trained representations and complement it with generative transformation loss. Extensive experiments on five standard DG benchmarks are provided. The results verify that \\model~outperforms state-of-the-art baselines even without domain supervision.", "keywords": "Distribution shift;contrastive learning;self-supervised learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/37b224801f82a0f80626b3cda68bf0cbd37ee59a.zip", "author": "Tianxin Wei;Yifan Chen;Xinrui He;Jingrui He", "authorids": "~Tianxin_Wei1;~Yifan_Chen3;~Xinrui_He1;~Jingrui_He1", "gender": ";;;F", "homepage": "https://weitianxin.github.io/;;https://github.com/Xinrui17;https://www.hejingrui.org", "dblp": "277/5800;;;34/2685", "google_scholar": "_LU2-kMAAAAJ;;;hXpZynkAAAAJ", "orcid": "0000-0003-4450-2005;;0009-0003-4475-8059;0000-0002-6429-6272", "linkedin": "tianxin-wei-7063a2180/;;;", "or_profile": "~Tianxin_Wei1;~Yifan_Chen3;~Xinrui_He1;~Jingrui_He1", "aff": "University of Illinois, Urbana-Champaign;;University of Illinois, Urbana-Champaign;University of Illinois, Urbana Champaign", "aff_domain": "uiuc.edu;;cs.illinois.edu;illinois.edu", "position": "PhD student;;PhD student;Full Professor", "bibtex": "@misc{\nwei2024connecting,\ntitle={Connecting Domains and Contrasting Samples: A Ladder for Domain Generalization},\nauthor={Tianxin Wei and Yifan Chen and Xinrui He and Jingrui He},\nyear={2024},\nurl={https://openreview.net/forum?id=3mXJ9o2DNx}\n}", "github": "", "project": "", "reviewers": "BueW;2QYf;fbsW;ttfL;NxUh", "site": "https://openreview.net/forum?id=3mXJ9o2DNx", "pdf_size": 11910679, "rating": "3;5;5;5;5", "confidence": "5;4;4;4;3", "soundness": "2;2;3;2;3", "contribution": "2;2;2;2;2", "presentation": "2;3;3;2;3", "wc_summary": "75;90;70;27;46", "wc_strengths": "9;98;28;22;29", "wc_weaknesses": "160;207;74;91;199", "wc_questions": "3;5;6;157;4", "wc_review": "247;400;178;297;278", "wc_reply_reviewers": "0;78;0;71;0", "wc_reply_authors": "784;640;366;628;602", "reply_reviewers": "0;1;0;1;0", "reply_authors": "2;2;2;2;2", "rating_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 61.6, 22.348154286204487 ], "wc_strengths_avg": [ 37.2, 31.224349472807276 ], "wc_weaknesses_avg": [ 146.2, 54.653087744426664 ], "wc_questions_avg": [ 35.0, 61.00819617067857 ], "wc_review_avg": [ 280.0, 72.36850143536205 ], "wc_reply_reviewers_avg": [ 29.8, 36.56446362248461 ], "wc_reply_authors_avg": [ 604.0, 134.84806264830058 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.790569415042095, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11969925089597990194&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Illinois;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "3mY9aGiMn0", "title": "Sparser, Better, Deeper, Stronger: Improving Sparse Training with Exact Orthogonal Initialization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Sparse training aims to train sparse models from scratch, achieving remarkable results in recent years. A key design choice in sparse training is the sparse initialization, which determines the trainable sub-network through a binary mask. Existing methods mainly revolve around selecting the mask based on predefined dense weight initialization. However, such an approach may not efficiently leverage the mask's potential impact on training parameters and optimization. An alternative direction, inspired by research into dynamical isometry, is to introduce orthogonality in the sparse subnetwork. This helps prevent the gradient signal from vanishing or exploding, ultimately enhancing the reliability of the backpropagation process. In this work, we propose Exact Orthogonal Initialization (EOI), a novel sparse orthogonal initialization scheme based on composing random Givens rotations. Contrary to other existing approaches, our method provides exact (not approximated) orthogonality and enables the creation of layers with arbitrary densities. Through experiments on contemporary network architectures, we present the effectiveness of EOI and demonstrate that it consistently outperforms other commonly used sparse initialization techniques. Furthermore, to showcase the full potential of our method, we show that it enables the training of highly sparse 1000-layer MLP and CNN networks without any residual connections or normalization techniques. Our research highlights the importance of weight initialization in sparse training, underscoring the vital part it plays alongside the sparse mask selection.", "keywords": "Sparse Training;Pruning;Orthogonal Initialization", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/778bfcdfe08282d091b659a3c5f279b938c44c68.zip", "author": "Aleksandra Nowak;\u0141ukasz Gniecki;Filip Szatkowski;Jacek Tabor", "authorids": "~Aleksandra_Nowak1;~\u0141ukasz_Gniecki1;~Filip_Szatkowski1;~Jacek_Tabor1", "gender": "F;M;M;M", "homepage": ";;;", "dblp": "34/10106;;323/8425;31/5172", "google_scholar": "2A-eZhQAAAAJ;;xjnAIOEAAAAJ;https://scholar.google.pl/citations?user=zSKYziUAAAAJ", "orcid": "0000-0002-2830-6613;;0000-0001-8592-2001;0000-0001-6652-7727", "linkedin": ";lukaszgniecki/;fszatkowski/;", "or_profile": "~Aleksandra_Nowak1;~\u0141ukasz_Gniecki1;~Filip_Szatkowski1;~Jacek_Tabor1", "aff": "Google;Jagiellonian University Cracow;Amazon;Jagiellonian University", "aff_domain": "google.com;uj.edu.pl;amazon.de;uj.edu.pl", "position": "Intern;MS student;Intern;Full Professor", "bibtex": "@misc{\nnowak2024sparser,\ntitle={Sparser, Better, Deeper, Stronger: Improving Sparse Training with Exact Orthogonal Initialization},\nauthor={Aleksandra Nowak and {\\L}ukasz Gniecki and Filip Szatkowski and Jacek Tabor},\nyear={2024},\nurl={https://openreview.net/forum?id=3mY9aGiMn0}\n}", "github": "", "project": "", "reviewers": "7ET6;s3UF;4iab", "site": "https://openreview.net/forum?id=3mY9aGiMn0", "pdf_size": 1003240, "rating": "5;5;6", "confidence": "5;3;4", "soundness": "3;2;4", "contribution": "3;2;2", "presentation": "3;3;3", "wc_summary": "41;151;203", "wc_strengths": "26;15;40", "wc_weaknesses": "223;157;37", "wc_questions": "342;39;70", "wc_review": "632;362;350", "wc_reply_reviewers": "0;33;84", "wc_reply_authors": "2244;1403;1027", "reply_reviewers": "0;1;2", "reply_authors": "3;3;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 131.66666666666666, 67.53435339802174 ], "wc_strengths_avg": [ 27.0, 10.23067283548187 ], "wc_weaknesses_avg": [ 139.0, 76.99350621968063 ], "wc_questions_avg": [ 150.33333333333334, 136.11841250257888 ], "wc_review_avg": [ 448.0, 130.19984639007836 ], "wc_reply_reviewers_avg": [ 39.0, 34.55430508634199 ], "wc_reply_authors_avg": [ 1558.0, 508.7835165044821 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3081273972719296832&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Google;Jagiellonian University;Amazon", "aff_unique_dep": "Google;;Amazon.com, Inc.", "aff_unique_url": "https://www.google.com;https://www.uj.edu.pl;https://www.amazon.com", "aff_unique_abbr": "Google;UJ;Amazon", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Cracow;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;Poland" }, { "id": "3mZEMBM0jN", "title": "Learning Symbolic Interactions for Interpretable State-Space Modeling", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "A general model to learn complex system dynamics will be helpful for us to understand how natural and computational networks of simple computation units solve complex problems. We formulate discrete event dynamics as a Bayesian neural network with skip connections: we use linearity to select hidden features to interact and combine the effects of these interactions, and we use nonlinearity (exponential and logarithm) to compound these interactions. To make learning scalable, we derive a Bayesian backpropagation algorithm that computes the expected loss gradient through propagating filtering and smoothing probabilities of hidden features. Experiments demonstrate that our algorithm can data-efficiently capture complex system dynamics in several fields with meaningful interactions.", "keywords": "Bayesian back-propagation;complex systems", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "", "author": "Wen Dong", "authorids": "~Wen_Dong1", "gender": "M", "homepage": "https://www.cse.buffalo.edu/~wendong/", "dblp": "84/3520-1", "google_scholar": "https://scholar.google.com.tw/citations?user=UBrg28IAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Wen_Dong1", "aff": "State University of New York, Buffalo", "aff_domain": "buffalo.edu", "position": "Assistant Professor", "bibtex": "@misc{\ndong2024learning,\ntitle={Learning Symbolic Interactions for Interpretable State-Space Modeling},\nauthor={Wen Dong},\nyear={2024},\nurl={https://openreview.net/forum?id=3mZEMBM0jN}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=3mZEMBM0jN", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Mn1TLIr6P9EJ:scholar.google.com/&scioq=Learning+Symbolic+Interactions+for+Interpretable+State-Space+Modeling&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "State University of New York at Buffalo", "aff_unique_dep": "", "aff_unique_url": "https://www.buffalo.edu", "aff_unique_abbr": "SUNY Buffalo", "aff_campus_unique_index": "0", "aff_campus_unique": "Buffalo", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "3mdCet7vVv", "title": "Maestro: Uncovering Low-Rank Structures via Trainable Decomposition", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep Neural Networks (DNNs) have been a large drivers and enablers for AI breakthroughs in recent years. These models have been getting larger in their attempt to become more accurate and tackle new upcoming use-cases, including AR/VR and intelligent assistants. However, the training process of such large models is a costly and time-consuming process, which typically yields a single model to fit all targets. To mitigate this, various techniques have been proposed in the literature, including pruning, sparsification, or quantization of the model weights and updates. While able to achieve high compression rates, they often incur computational overheads or accuracy penalties. Alternatively, factorization methods have been leveraged to incorporate low-rank compression in the training process. Such techniques (e.g., SVD) also frequently rely on the computationally expensive decomposition of layers and are potentially sub-optimal for non-linear models, such as DNNs. In this work, we take a further step in designing efficient low-rank models and propose MAESTRO, a framework for trainable low-rank layers. Instead of regularly applying a priori decompositions such as SVD, the low-rank structure is built into the training process through a generalized variant of Ordered Dropout. This method imposes an importance ordering via sampling on the decomposed DNN structure. Our theoretical analysis demonstrates that our method recovers the SVD decomposition of linear mapping on uniformly distributed data and PCA for linear autoencoders. We further apply our technique on DNNs and empirically illustrate that MAESTRO enables the extraction of lower footprint models that preserve model performance while allowing for graceful accuracy-latency tradeoffs for the deployment to devices of different capabilities.", "keywords": "low-rank approximation;efficient model training;trainable decomposition", "primary_area": "optimization", "supplementary_material": "/attachment/3a54a9fff5e8356d044b0bdfd8a70585281ffe39.pdf", "author": "Samuel Horv\u00e1th;Stefanos Laskaridis;Shashank Rajput;Hongyi Wang", "authorids": "~Samuel_Horv\u00e1th1;~Stefanos_Laskaridis1;~Shashank_Rajput1;~Hongyi_Wang1", "gender": "M;;M;M", "homepage": "https://sites.google.com/view/samuelhorvath;https://stefanos.cc;https://pages.cs.wisc.edu/~srajput/;https://hwang595.github.io/", "dblp": "234/8604;241/6273;241/5361;15/832-1.html", "google_scholar": "k252J7kAAAAJ;https://scholar.google.co.uk/citations?user=TcVC--IAAAAJ;qEXxyDQAAAAJ;zYdZORsAAAAJ", "orcid": "0000-0003-0619-9260;;;", "linkedin": "samuel-horvath/;stevelaskaridis/;;hongyi-wang-b89651102/", "or_profile": "~Samuel_Horv\u00e1th1;~Stefanos_Laskaridis1;~Shashank_Rajput1;~Hongyi_Wang1", "aff": "MBZUAI;Brave Software;University of Wisconsin, Madison;Carnegie Mellon University", "aff_domain": "mbzuai.ac.ae;brave.com;wisc.edu;andrew.cmu.edu", "position": "Assistant Professor;Researcher;PhD student;Researcher", "bibtex": "@misc{\nhorv{\\'a}th2024maestro,\ntitle={Maestro: Uncovering Low-Rank Structures via Trainable Decomposition},\nauthor={Samuel Horv{\\'a}th and Stefanos Laskaridis and Shashank Rajput and Hongyi Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=3mdCet7vVv}\n}", "github": "", "project": "", "reviewers": "LrqS;Mad7;ZdCx;CAHp", "site": "https://openreview.net/forum?id=3mdCet7vVv", "pdf_size": 2326242, "rating": "5;5;5;8", "confidence": "4;4;3;3", "soundness": "3;3;2;3", "contribution": "2;1;2;3", "presentation": "3;4;2;2", "wc_summary": "70;76;68;76", "wc_strengths": "22;58;53;93", "wc_weaknesses": "80;63;191;159", "wc_questions": "16;64;5;7", "wc_review": "188;261;317;335", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "340;889;488;160", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 72.5, 3.570714214271425 ], "wc_strengths_avg": [ 56.5, 25.184320518926057 ], "wc_weaknesses_avg": [ 123.25, 53.312170280340304 ], "wc_questions_avg": [ 23.0, 24.031229681395832 ], "wc_review_avg": [ 275.25, 57.290378773403134 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 469.25, 268.7390695451631 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8374317016626564409&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Brave Software;University of Wisconsin;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.mbzuai.ac.ae;https://www.brave.com;https://www.wisc.edu;https://www.cmu.edu", "aff_unique_abbr": "MBZUAI;Brave;UW;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Arab Emirates;United States" }, { "title": "Towards Principled Representation Learning from Videos for Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19497", "id": "3mnWvUZIXt", "author_site": "Dipendra Kumar Misra, Akanksha Saran, Tengyang Xie, Alex Lamb, John Langford", "tldr": "", "abstract": "We study pre-training representations for decision-making using video data, which is abundantly available for tasks such as game agents and software testing. Even though significant empirical advances have been made on this problem, a theoretical understanding remains absent. We initiate the theoretical investigation into principled approaches for representation learning and focus on learning the latent state representations of the underlying MDP using video data. We study two types of settings: one where there is iid noise in the observation, and a more challenging setting where there is also the presence of exogenous noise, which is non-iid noise that is temporally correlated, such as the motion of people or cars in the background. We study three commonly used approaches: autoencoding, temporal contrastive learning, and forward modeling. We prove upper bounds for temporal contrastive learning and forward modeling in the presence of only iid noise. We show that these approaches can learn the latent state and use it to do efficient downstream RL with polynomial sample complexity. When exogenous noise is also present, we establish a lower bound result showing that the sample complexity of learning from video data can be exponentially worse than learning from action-labeled trajectory data. This partially explains why reinforcement learning with video pre-training is hard. We evaluate these representational learning methods in two visual domains, yielding results that are consistent with our theoretical findings.", "keywords": "Reinforcement Learning;Representation Learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Dipendra Misra;Akanksha Saran;Tengyang Xie;Alex Lamb;John Langford", "authorids": "~Dipendra_Misra1;~Akanksha_Saran1;~Tengyang_Xie1;~Alex_Lamb1;~John_Langford1", "gender": "M;F;;M;M", "homepage": "https://dipendramisra.com/;;https://tengyangxie.github.io/;http://hunch.net/~jl;", "dblp": "218/6569;173/6209;227/3335;77/4488;", "google_scholar": "rIoPIFsAAAAJ;zZhWSQ0AAAAJ;rlmROVsAAAAJ;LFiqVpwAAAAJ;https://scholar.google.ca/citations?user=BFzFy1YAAAAJ", "orcid": ";;;;", "linkedin": ";akanksha-saran-8b506620/;;;", "or_profile": "~Dipendra_Misra1;~Akanksha_Saran1;~Tengyang_Xie1;~John_Langford1;~Alex_Matthew_Lamb1", "aff": "Microsoft Research;Sony AI;Microsoft Research, New England & NYC;Microsoft;", "aff_domain": "microsoft.com;sony.com;microsoft.com;microsoft.com;", "position": "Researcher;Researcher;Postdoc;Researcher;", "bibtex": "@inproceedings{\nmisra2024towards,\ntitle={Towards Principled Representation Learning from Videos for Reinforcement Learning},\nauthor={Dipendra Misra and Akanksha Saran and Tengyang Xie and Alex Lamb and John Langford},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3mnWvUZIXt}\n}", "github": "", "project": "", "reviewers": "TsKD;ZrZp;tXAJ;sJQE", "pdf_size": 10089431, "rating": "5;8;8;8", "confidence": "3;4;3;3", "soundness": "2;4;3;3", "contribution": "2;4;3;3", "presentation": "3;3;3;3", "wc_summary": "78;144;102;175", "wc_strengths": "19;113;41;54", "wc_weaknesses": "55;48;182;126", "wc_questions": "219;9;73;108", "wc_review": "371;314;398;463", "wc_reply_reviewers": "95;0;64;68", "wc_reply_authors": "1135;279;676;776", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 124.75, 37.41239767777521 ], "wc_strengths_avg": [ 56.75, 34.802119188348286 ], "wc_weaknesses_avg": [ 102.75, 54.99715901753471 ], "wc_questions_avg": [ 102.25, 76.18193683544676 ], "wc_review_avg": [ 386.5, 53.57471418495856 ], "wc_reply_reviewers_avg": [ 56.75, 34.86671048435743 ], "wc_reply_authors_avg": [ 716.5, 304.8479129008431 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6575773369814907620&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3mnWvUZIXt", "pdf": "https://openreview.net/pdf?id=3mnWvUZIXt", "email": "microsoft.com;sony.com;microsoft.com;microsoft.com;", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Microsoft;Sony", "aff_unique_dep": "Microsoft Research;Sony AI", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.sony.com", "aff_unique_abbr": "MSR;Sony AI", "aff_campus_unique_index": "1", "aff_campus_unique": ";New England", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Japan" }, { "id": "3nPFco1EKt", "title": "Evolving Neural Network's Weights at Imagenet Scale", "track": "main", "status": "Reject", "tldr": "", "abstract": "Building upon evolutionary theory, this work proposes a deep neural network optimization framework based on evolutionary algorithms to enhance existing pre-trained models, usually trained by backpropagation (BP). Specifically, we consider a pre-trained model to generate an initial population of deep neural networks (DNNs) using BP with distinct hyper-parameters, and subsequently simulate the evolutionary process of DNNs. Moreover, we enhance the evolutionary process, by developing an adaptive differential evolution (DE) algorithm, SA-SHADE-tri-ensin, which integrates the strengths of two DE algorithms, SADE and SHADE, with trigonometric mutation and sinusoidal change of mutation rate. Compared to existing work (e.g., ensembling, weight averaging and evolution inspired techniques), the proposed method better enhanced existing pre-trained deep neural network models (e.g., ResNet variants) on large-scale ImageNet. Our analysis reveals that DE with an adaptive trigonometric mutation strategy yields improved offspring with higher success rates and the importance of diversity in the parent population. Hence, the underlying mechanism is worth further investigation and has implications for developing advanced neuro-evolutionary optimizers.", "keywords": "optimization;evolution", "primary_area": "optimization", "supplementary_material": "/attachment/d2dbb1f657d104f10afbdc5da3ec2db9241e510f.zip", "author": "Guodong DU;Senqiao Yang;Runhua Jiang;Shuyang Yu;Haoyang Li;Wei Chen;Keren Li;Ho-Kin Tang;Sim Kuan Goh", "authorids": "~Guodong_DU2;~Senqiao_Yang1;~Runhua_Jiang2;~Shuyang_Yu2;~Haoyang_Li7;~Wei_Chen49;~Keren_Li1;~Ho-Kin_Tang1;~Sim_Kuan_Goh2", "gender": "M;;M;F;M;;M;M;M", "homepage": "https://duguodong7.github.io;;;;https://github.com/LiHaoyang0517;;https://cpoe.szu.edu.cn/info/1060/1756.htm;http://faculty.hitsz.edu.cn/denghaojian;https://simkuangoh.github.io/", "dblp": "213/8915-4;;;;;;;;152/7784", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;sfFLGycAAAAJ;;;https://scholar.google.ca/citations?user=n_03hjkAAAAJ;https://scholar.google.com.hk/citations?user=a4yjxI4AAAAJ;", "orcid": ";;;;;;;0000-0002-8378-815X;", "linkedin": "\u56fd\u4e1c-\u675c-30b496169/;;runhua-jiang-295004261/?originalSubdomain=my;shuyang-yu;;;;;", "or_profile": "~Guodong_DU2;~Senqiao_Yang1;~Runhua_Jiang2;~Shuyang_Yu2;~Haoyang_Li7;~Wei_Chen49;~Keren_Li1;~Ho-Kin_Tang1;~Sim_Kuan_Goh2", "aff": "Harbin Institute of Technology;;Xiamen University;Xiamen University Malaysia;Xiamen University;;Shenzhen University;Harbin Institute of Technology;Xiamen University Malaysia", "aff_domain": "hit.edu.cn;;xmu.edu.cn;xmu.edu.my;xmu.edu.cn;;szu.edu.cn;hit.edu.cn;xmu.edu.cn", "position": "Researcher;;Undergrad student;Undergrad student;Undergrad student;;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@misc{\ndu2024evolving,\ntitle={Evolving Neural Network's Weights at Imagenet Scale},\nauthor={Guodong DU and Senqiao Yang and Runhua Jiang and Shuyang Yu and Haoyang Li and Wei Chen and Keren Li and Ho-Kin Tang and Sim Kuan Goh},\nyear={2024},\nurl={https://openreview.net/forum?id=3nPFco1EKt}\n}", "github": "", "project": "", "reviewers": "SWqn;4ELS;BX5K", "site": "https://openreview.net/forum?id=3nPFco1EKt", "pdf_size": 13585968, "rating": "3;3;3", "confidence": "4;4;5", "soundness": "3;2;2", "contribution": "1;1;1", "presentation": "2;2;1", "wc_summary": "100;47;67", "wc_strengths": "112;8;92", "wc_weaknesses": "165;26;687", "wc_questions": "364;136;67", "wc_review": "741;217;913", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.0, 0.0 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 71.33333333333333, 21.85304453744502 ], "wc_strengths_avg": [ 70.66666666666667, 45.057987329908805 ], "wc_weaknesses_avg": [ 292.6666666666667, 284.55149895151766 ], "wc_questions_avg": [ 189.0, 126.90941651429968 ], "wc_review_avg": [ 623.6666666666666, 296.00600594507466 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gtAHD9oKsAQJ:scholar.google.com/&scioq=Evolving+Neural+Network%27s+Weights+at+Imagenet+Scale&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;2;0;1", "aff_unique_norm": "Harbin Institute of Technology;Xiamen University;Shenzhen University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.xmu.edu.cn;https://www.szu.edu.cn", "aff_unique_abbr": "HIT;XMU;SZU", "aff_campus_unique_index": "0;2;0;2", "aff_campus_unique": "Harbin;;Malaysia", "aff_country_unique_index": "0;0;1;0;0;0;1", "aff_country_unique": "China;Malaysia" }, { "id": "3nyovHUr5A", "title": "Learning with Instance-Dependent Noisy Labels by Hard Sample Selection with Anchor Hallucination", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Learning from noisily-labeled data is common in real-world visual learning tasks. Mainstream Noisy-Label Learning (NLL) methods mainly focus on sample-selection approaches, which typically divide the training dataset into clean and noisy subsets according to the loss distribution of samples. However, they overlook the fact that clean samples with complex visual patterns may also yield large losses, especially for datasets with Instance-Dependent Noise (IDN), in which the probability of an image being mislabeled depends on its visual appearance. This paper extends this idea and distinguishes complex samples from noisy ones. Specifically, we first select training samples with small initial losses to form an *easy* subset, where these easy samples are assumed to contain simple patterns with correct labels. The remaining samples either have complex patterns or incorrect labels, forming a *hard* subset. Subsequently, we utilize the easy subset to hallucinate multiple anchors, which are used to select hard samples to form a *clean hard* subset. We further exploit samples from these subsets following a semi-supervised training scheme to better characterize the decision boundary. Extensive experiments on synthetic and real-world instance-dependent noisy datasets show that our method outperforms the State-of-The-Art NLL methods.", "keywords": "Noisy label learning;semi-supervised learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Po-Hsuan Huang;Chia-Ching Lin;Chih-Fan Hsu;Ming-Ching Chang;Wei-Chao Chen", "authorids": "~Po-Hsuan_Huang2;~Chia-Ching_Lin1;~Chih-Fan_Hsu1;~Ming-Ching_Chang1;~Wei-Chao_Chen1", "gender": ";M;M;M;M", "homepage": ";;https://sites.google.com/site/chihfanhsuwebsite/;https://www.albany.edu/faculty/mchang2/;", "dblp": ";36/9449;120/8997;21/4361;37/1413", "google_scholar": ";https://scholar.google.com.tw/citations?user=iJUWS9YAAAAJ;https://scholar.google.com.tw/citations?hl=en;a3-Gl8YAAAAJ;bndb0gYAAAAJ", "orcid": ";;0000-0002-4180-8255;0000-0001-9325-5341;", "linkedin": ";;;mingchingchang/edit/forms/intro/new/?profileFormEntryPoint=PROFILE_SECTION;wei-chao-chen-b4b0bb1/", "or_profile": "~Po-Hsuan_Huang2;~Chia-Ching_Lin1;~Chih-Fan_Hsu1;~Ming-Ching_Chang1;~Wei-Chao_Chen1", "aff": ";Inventec Inc.;Inventec Inc.;State University of New York at Albany;Inventec Inc.", "aff_domain": ";inventec.com;inventec.com;albany.edu;inventec.com", "position": ";Researcher;Researcher;Assistant Professor;Senior Vice President", "bibtex": "@misc{\nhuang2024learning,\ntitle={Learning with Instance-Dependent Noisy Labels by Hard Sample Selection with Anchor Hallucination},\nauthor={Po-Hsuan Huang and Chia-Ching Lin and Chih-Fan Hsu and Ming-Ching Chang and Wei-Chao Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=3nyovHUr5A}\n}", "github": "", "project": "", "reviewers": "Yvwi;T8rR;VL2q", "site": "https://openreview.net/forum?id=3nyovHUr5A", "pdf_size": 2037681, "rating": "3;3;5", "confidence": "5;4;4", "soundness": "2;2;3", "contribution": "1;2;2", "presentation": "1;3;3", "wc_summary": "83;16;85", "wc_strengths": "12;9;17", "wc_weaknesses": "297;94;88", "wc_questions": "2;7;91", "wc_review": "394;126;281", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 61.333333333333336, 32.0659043568433 ], "wc_strengths_avg": [ 12.666666666666666, 3.2998316455372216 ], "wc_weaknesses_avg": [ 159.66666666666666, 97.14021938528974 ], "wc_questions_avg": [ 33.333333333333336, 40.827550610940264 ], "wc_review_avg": [ 267.0, 109.85748343497892 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-aZk3EVbOrMJ:scholar.google.com/&scioq=Learning+with+Instance-Dependent+Noisy+Labels+by+Hard+Sample+Selection+with+Anchor+Hallucination&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Inventec Corporation;State University of New York", "aff_unique_dep": ";", "aff_unique_url": "https://www.inventec.com;https://www.albany.edu", "aff_unique_abbr": "Inventec;SUNY Albany", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Taiwan;Albany", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "SEGNO: Generalizing Equivariant Graph Neural Networks with Physical Inductive Biases", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19496", "id": "3oTPsORaDH", "author_site": "Yang Liu, Jiashun Cheng, Haihong Zhao, Tingyang Xu, Peilin Zhao, Fugee Tsung, Jia Li, Yu Rong", "tldr": "", "abstract": "Graph Neural Networks (GNNs) with equivariant properties have emerged as powerful tools for modeling complex dynamics of multi-object physical systems. However, their generalization ability is limited by the inadequate consideration of physical inductive biases: (1) Existing studies overlook the continuity of transitions among system states, opting to employ several discrete transformation layers to learn the direct mapping between two adjacent states; (2) Most models only account for first-order velocity information, despite the fact that many physical systems are governed by second-order motion laws. To incorporate these inductive biases, we propose the Second-order Equivariant Graph Neural Ordinary Differential Equation (SEGNO). Specifically, we show how the second-order continuity can be incorporated into GNNs while maintaining the equivariant property. Furthermore, we offer theoretical insights into SEGNO, highlighting that it can learn a unique trajectory between adjacent states, which is crucial for model generalization. Additionally, we prove that the discrepancy between this learned trajectory of SEGNO and the true trajectory is bounded. Extensive experiments on complex dynamical systems including molecular dynamics and motion capture demonstrate that our model yields a significant improvement over the state-of-the-art baselines.", "keywords": "Equivariant Graph Neural Network;Graph Neural Network", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/39e5fdfd3c0682b49cf79245d7776f22592626f6.zip", "author": "Yang Liu;Jiashun Cheng;Haihong Zhao;Tingyang Xu;Peilin Zhao;Fugee Tsung;Jia Li;Yu Rong", "authorids": "~Yang_Liu21;~Jiashun_Cheng1;~Haihong_Zhao2;~Tingyang_Xu1;~Peilin_Zhao2;~Fugee_Tsung1;~Jia_Li4;~Yu_Rong1", "gender": "M;M;M;;M;M;M;M", "homepage": "https://scholar.google.com/citations?hl=zh-CN&user=IWyM6l0AAAAJ;https://www.linkedin.com/in/jiashun-cheng-b2b31a149/;;;https://ieda.ust.hk/dfaculty/tsung/;https://sites.google.com/view/lijia;https://royrong.me/;https://haihongzhao.com", "dblp": "51/3710-245;323/4178;157/0940;84/8411;95/2794;23/6950-9;24/10036-1;116/7210", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;HVKMb10AAAAJ;6gIs5YMAAAAJ;https://scholar.google.com.hk/citations?user=HPeX_YcAAAAJ;yQVoXS0AAAAJ;1gSbcYoAAAAJ;https://scholar.google.com.hk/citations?user=itezhEMAAAAJ;", "orcid": "0000-0002-2633-512X;0000-0002-5485-7224;0009-0002-0106-8376;0000-0001-8543-3953;0000-0002-0575-8254;0000-0002-6362-4385;0000-0001-7387-302X;0000-0003-4188-6517", "linkedin": ";;;;ftsung/;;;", "or_profile": "~Yang_Liu21;~Jiashun_Cheng1;~Tingyang_Xu1;~Peilin_Zhao2;~Fugee_Tsung1;~Jia_Li4;~Yu_Rong1;~Haihong_ZHAO1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Tencent AI Lab;Tencent;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology (Guangzhou);Tencent AI Lab;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;tencent.com;tencent.com;ust.hk;ust.hk;tencent.com;hkust.edu", "position": "PhD student;PhD student;Researcher;Researcher;Full Professor;Assistant Professor;Principal Researcher;PhD student", "bibtex": "@inproceedings{\nliu2024segno,\ntitle={{SEGNO}: Generalizing Equivariant Graph Neural Networks with Physical Inductive Biases},\nauthor={Yang Liu and Jiashun Cheng and Haihong Zhao and Tingyang Xu and Peilin Zhao and Fugee Tsung and Jia Li and Yu Rong},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3oTPsORaDH}\n}", "github": "", "project": "", "reviewers": "mXJL;GrTs;rsLd", "pdf_size": 2762438, "rating": "6;6;8", "confidence": "4;4;3", "soundness": "3;3;4", "contribution": "2;2;4", "presentation": "3;3;3", "wc_summary": "389;56;81", "wc_strengths": "2;61;150", "wc_weaknesses": "2;327;56", "wc_questions": "2;271;80", "wc_review": "395;715;367", "wc_reply_reviewers": "0;17;23", "wc_reply_authors": "284;1874;492", "reply_reviewers": "0;1;1", "reply_authors": "2;4;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 175.33333333333334, 151.42948487295627 ], "wc_strengths_avg": [ 71.0, 60.83310502240262 ], "wc_weaknesses_avg": [ 128.33333333333334, 142.1978277690001 ], "wc_questions_avg": [ 117.66666666666667, 113.00245818368535 ], "wc_review_avg": [ 492.3333333333333, 157.8635134820231 ], "wc_reply_reviewers_avg": [ 13.333333333333334, 9.741092797468305 ], "wc_reply_authors_avg": [ 883.3333333333334, 705.6350961289332 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12252397141382208062&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=3oTPsORaDH", "pdf": "https://openreview.net/pdf?id=3oTPsORaDH", "email": "ust.hk;ust.hk;tencent.com;tencent.com;ust.hk;ust.hk;tencent.com;hkust.edu", "author_num": 8, "aff_unique_index": "0;0;1;1;0;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.ust.hk;https://ai.tencent.com", "aff_unique_abbr": "HKUST;Tencent AI Lab", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Where We Have Arrived in Proving the Emergence of Sparse Interaction Primitives in DNNs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19495", "id": "3pWSL8My6B", "author_site": "Qihan Ren, Jiayang Gao, Wen Shen, Quanshi Zhang", "tldr": "", "abstract": "This study aims to prove the emergence of symbolic concepts (or more precisely, sparse primitive inference patterns) in well-trained deep neural networks (DNNs). Specifically, we prove the following three conditions for the emergence. (i) The high-order derivatives of the network output with respect to the input variables are all zero. (ii) The DNN can be used on occluded samples, and when the input sample is less occluded, the DNN will yield higher confidence. (iii) The confidence of the DNN does not significantly degrade on occluded samples. These conditions are quite common, and we prove that under these conditions, the DNN will only encode a relatively small number of sparse interactions between input variables. Moreover, we can consider such interactions as symbolic primitive inference patterns encoded by a DNN, because we show that inference scores of the DNN on an exponentially large number of randomly masked samples can always be well mimicked by numerical effects of just a few interactions.", "keywords": "Explainable AI;Neural networks;Symbolism", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Qihan Ren;Jiayang Gao;Wen Shen;Quanshi Zhang", "authorids": "~Qihan_Ren1;~Jiayang_Gao1;~Wen_Shen3;~Quanshi_Zhang1", "gender": "M;F;M;M", "homepage": "https://nebularaid2000.github.io/;https://ada-shen.github.io/;http://qszhang.com;https://github.com/gjy0515", "dblp": "268/5838;55/8186-2;http://dblp.uni-trier.de/pers/hd/z/Zhang:Quanshi;", "google_scholar": "ybTy_DwAAAAJ;;iFFhHK0AAAAJ;", "orcid": ";0000-0002-4210-5447;;", "linkedin": ";;;", "or_profile": "~Qihan_Ren1;~Wen_Shen3;~Quanshi_Zhang1;~\u4f73\u9633_\u9ad81", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Postdoc;Associate Professor;Undergrad student", "bibtex": "@inproceedings{\nren2024where,\ntitle={Where We Have Arrived in Proving the Emergence of Sparse Interaction Primitives in {DNN}s},\nauthor={Qihan Ren and Jiayang Gao and Wen Shen and Quanshi Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3pWSL8My6B}\n}", "github": "", "project": "", "reviewers": "iVY1;8DUx;JPCH;WKdR", "pdf_size": 3099888, "rating": "6;6;8;8", "confidence": "2;3;3;2", "soundness": "2;3;4;3", "contribution": "2;2;4;3", "presentation": "3;3;4;3", "wc_summary": "36;67;97;113", "wc_strengths": "39;33;113;46", "wc_weaknesses": "336;260;107;409", "wc_questions": "107;164;50;58", "wc_review": "518;524;367;626", "wc_reply_reviewers": "53;373;13;32", "wc_reply_authors": "981;3311;1169;1830", "reply_reviewers": "1;4;1;1", "reply_authors": "2;5;2;3", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 29.45653577731095 ], "wc_strengths_avg": [ 57.75, 32.22867512014727 ], "wc_weaknesses_avg": [ 278.0, 111.90397669430698 ], "wc_questions_avg": [ 94.75, 45.54873763344051 ], "wc_review_avg": [ 508.75, 92.41042960618677 ], "wc_reply_reviewers_avg": [ 117.75, 148.04623433238686 ], "wc_reply_authors_avg": [ 1822.75, 915.2694616887422 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4914810873657200932&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "openreview": "https://openreview.net/forum?id=3pWSL8My6B", "pdf": "https://openreview.net/pdf?id=3pWSL8My6B", "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Rethinking the Uniformity Metric in Self-Supervised Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19494", "id": "3pf2hEdu8B", "author_site": "Xianghong Fang, Jian Li, Qiang Sun, Wang Benyou", "tldr": "", "abstract": "Uniformity plays an important role in evaluating learned representations, providing insights into self-supervised learning. In our quest for effective uniformity metrics, we pinpoint four principled properties that such metrics should possess. Namely, an effective uniformity metric should remain invariant to instance permutations and sample replications while accurately capturing feature redundancy and dimensional collapse. Surprisingly, we find that the uniformity metric proposed by \\citet{Wang2020UnderstandingCR} fails to satisfy the majority of these properties. Specifically, their metric is sensitive to sample replications, and can not account for feature redundancy and dimensional collapse correctly. To overcome these limitations, we introduce a new uniformity metric based on the Wasserstein distance, which satisfies all the aforementioned properties. Integrating this new metric in existing self-supervised learning methods effectively mitigates dimensional collapse and consistently improves their performance on downstream tasks involving CIFAR-10 and CIFAR-100 datasets. Code is available at \\url{https://github.com/statsle/WassersteinSSL}.", "keywords": "Effective uniformity metrics;dimensional collapse;Wasserstein distance;self-supervised learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/c684951f3e22d50fe6f7a9a520403451de0fd62b.pdf", "author": "Xianghong Fang;Jian Li;Qiang Sun;Benyou Wang", "authorids": "~Xianghong_Fang1;~Jian_Li17;~Qiang_Sun2;~Benyou_Wang2", "gender": "M;M;M;M", "homepage": "https://jack57lee.github.io/;https://sites.google.com/view/qsun;https://wabyking.github.io/old.html;", "dblp": "33/5448-54.html;73/2066-7;169/1793;202/6322.html", "google_scholar": ";f0V2fAYAAAAJ;Jk4vJU8AAAAJ;https://scholar.google.com.hk/citations?user=hQfxe5QAAAAJ", "orcid": ";;0000-0002-1501-9914;0000-0003-2250-6961", "linkedin": ";;;", "or_profile": "~Jian_Li17;~Qiang_Sun2;~Benyou_Wang2;~Fang_XiangHong1", "aff": "Tencent;University of Toronto;The Chinese University of Hong Kong, Shenzhen;University of Toronto", "aff_domain": "tencent.com;utoronto.ca;cuhk.edu.cn;utoronto.ca", "position": "Researcher;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nfang2024rethinking,\ntitle={Rethinking the Uniformity Metric in Self-Supervised Learning},\nauthor={Xianghong Fang and Jian Li and Qiang Sun and Benyou Wang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3pf2hEdu8B}\n}", "github": "", "project": "", "reviewers": "u9aL;a1G3;KkL8;RABe", "pdf_size": 1523269, "rating": "5;5;6;8", "confidence": "4;5;2;3", "soundness": "3;2;3;3", "contribution": "2;2;3;3", "presentation": "3;2;2;4", "wc_summary": "119;96;29;60", "wc_strengths": "131;26;17;75", "wc_weaknesses": "126;155;25;116", "wc_questions": "19;4;12;52", "wc_review": "395;281;83;303", "wc_reply_reviewers": "171;47;15;0", "wc_reply_authors": "1304;1608;398;335", "reply_reviewers": "1;1;1;0", "reply_authors": "3;5;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 76.0, 34.32928778754374 ], "wc_strengths_avg": [ 62.25, 45.41681957160805 ], "wc_weaknesses_avg": [ 105.5, 48.63383595810637 ], "wc_questions_avg": [ 21.75, 18.25342433627181 ], "wc_review_avg": [ 265.5, 113.71345566818378 ], "wc_reply_reviewers_avg": [ 58.25, 67.27323018853785 ], "wc_reply_authors_avg": [ 911.25, 555.6983781693086 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5477225575051661, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:P8Ys2UUAgwIJ:scholar.google.com/&scioq=Rethinking+the+Uniformity+Metric+in+Self-Supervised+Learning&hl=en&as_sdt=0,33", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=3pf2hEdu8B", "pdf": "https://openreview.net/pdf?id=3pf2hEdu8B", "email": "tencent.com;utoronto.ca;cuhk.edu.cn;utoronto.ca", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Tencent;University of Toronto;Chinese University of Hong Kong", "aff_unique_dep": "Tencent Holdings Limited;;", "aff_unique_url": "https://www.tencent.com;https://www.utoronto.ca;https://www.cuhk.edu.cn", "aff_unique_abbr": "Tencent;U of T;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;Canada" }, { "id": "3pgJNIx3gc", "title": "AlphaFold Distillation for Protein Design", "track": "main", "status": "Reject", "tldr": "", "abstract": "Inverse protein folding, the process of designing sequences that fold into a specific 3D structure, is crucial in bio-engineering and drug discovery. Traditional methods rely on experimentally resolved structures, but these cover only a small fraction of protein sequences. Forward folding models like AlphaFold offer a potential solution by accurately predicting structures from sequences. However, these models are too slow for integration into the optimization loop of inverse folding models during training.\nTo address this, we propose using knowledge distillation on folding model confidence metrics, such as pTM or pLDDT scores, to create faster and end-to-end differentiable distilled model. This model can then be used as a structure consistency regularizer in training the inverse folding model. Our technique is versatile and can be applied to other design tasks, such as sequence-based protein infilling.\nExperimental results show that our method outperforms non-regularized baselines, yielding up to 3\\% improvement in sequence recovery and up to 45\\% improvement in protein diversity while maintaining structural consistency in generated sequences. Anonymized code for this work is available at https://anonymous.4open.science/r/AFDistill-28C3", "keywords": "Inverse Protein Folding Design;Protein Design;Model Distillation;AlphaFold;Protein Folding", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Igor Melnyk;Aurelie Lozano;Payel Das;Vijil Chenthamarakshan", "authorids": "~Igor_Melnyk1;~Aurelie_Lozano1;~Payel_Das1;~Vijil_Chenthamarakshan1", "gender": "M;F;F;M", "homepage": "https://imelnyk.github.io/;https://research.ibm.com/people/aurelie-lozano;;https://researcher.watson.ibm.com/researcher/view.php?person=us-ecvijil", "dblp": ";06/274;56/7926;", "google_scholar": "4vDRTWwAAAAJ;4wTGaDsAAAAJ;;g9hboJ0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Igor_Melnyk1;~Aurelie_Lozano1;~Payel_Das1;~Vijil_Chenthamarakshan1", "aff": "International Business Machines;IBM Research;IBM, International Business Machines;International Business Machines", "aff_domain": "ibm.com;us.ibm.com;us.ibm.com;ibm.com", "position": "Researcher;Principal Researcher;Principal Researcher;Senior Technical Staff member", "bibtex": "@misc{\nmelnyk2024alphafold,\ntitle={AlphaFold Distillation for Protein Design},\nauthor={Igor Melnyk and Aurelie Lozano and Payel Das and Vijil Chenthamarakshan},\nyear={2024},\nurl={https://openreview.net/forum?id=3pgJNIx3gc}\n}", "github": "", "project": "", "reviewers": "6HvL;XUpx;s5YM", "site": "https://openreview.net/forum?id=3pgJNIx3gc", "pdf_size": 2714676, "rating": "3;3;5", "confidence": "5;4;4", "soundness": "1;3;2", "contribution": "2;2;2", "presentation": "2;3;3", "wc_summary": "75;97;82", "wc_strengths": "26;29;32", "wc_weaknesses": "161;125;47", "wc_questions": "45;11;124", "wc_review": "307;262;285", "wc_reply_reviewers": "68;112;0", "wc_reply_authors": "422;1094;487", "reply_reviewers": "1;1;0", "reply_authors": "1;2;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 84.66666666666667, 9.177266598624136 ], "wc_strengths_avg": [ 29.0, 2.449489742783178 ], "wc_weaknesses_avg": [ 111.0, 47.58150901348127 ], "wc_questions_avg": [ 60.0, 47.33568069296845 ], "wc_review_avg": [ 284.6666666666667, 18.372685039360892 ], "wc_reply_reviewers_avg": [ 60.0, 46.07240678178932 ], "wc_reply_authors_avg": [ 667.6666666666666, 302.628852263333 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15258041265459336797&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "International Business Machines Corporation;IBM;International Business Machines", "aff_unique_dep": ";IBM Research;", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com/research;https://www.ibm.com", "aff_unique_abbr": "IBM;IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "LRR: Language-Driven Resamplable Continuous Representation against Adversarial Tracking Attacks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19493", "id": "3qo1pJHabg", "author_site": "Jianlang Chen, Xuhong Ren, Qing Guo, Felix Juefei-Xu, Di Lin, Wei Feng, Lei Ma, Jianjun Zhao", "tldr": "", "abstract": "Visual object tracking plays a critical role in visual-based autonomous systems, as it aims to estimate the position and size of the object of interest within a live video. Despite significant progress made in this field, state-of-the-art (SOTA) trackers often fail when faced with adversarial perturbations in the incoming frames. This can lead to significant robustness and security issues when these trackers are deployed in the real world. To achieve high accuracy on both clean and adversarial data, we propose building a spatial-temporal continuous representation using the semantic text guidance of the object of interest. This novel continuous representation enables us to reconstruct incoming frames to maintain semantic and appearance consistency with the object of interest and its clean counterparts. As a result, our proposed method successfully defends against different SOTA adversarial tracking attacks while maintaining high accuracy on clean data. In particular, our method significantly increases tracking accuracy under adversarial attacks with around 90% relative improvement on UAV123, which is even higher than the accuracy on clean data.", "keywords": "Tracking defence;spatial-temporal implicit representation;languange-image model", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/68d998940505f042f10064d97ab0ab3404c58553.pdf", "author": "Jianlang Chen;Xuhong Ren;Qing Guo;Felix Juefei-Xu;Di Lin;Wei Feng;Lei Ma;Jianjun Zhao", "authorids": "~Jianlang_Chen1;~Xuhong_Ren1;~Qing_Guo3;~Felix_Juefei-Xu1;~Di_Lin3;~Wei_Feng1;~Lei_Ma1;~Jianjun_Zhao1", "gender": ";F;M;;M;M;M;M", "homepage": ";https://www.facebook.com/profile.php?id=100074098905175;https://tsingqguo.github.io;;https://dilincv.github.io/;;https://www.malei.org;http://stap.ait.kyushu-u.ac.jp/~zhao/", "dblp": ";;25/3038-5;;20/3191-2.html;17/1152-5;20/6534-3;71/6948", "google_scholar": ";;Rj2x4QUAAAAJ;;rW0r-hMAAAAJ;https://scholar.google.co.jp/citations?user=7ory1i8AAAAJ;xsfGc58AAAAJ;https://scholar.google.com/scholar?hl=en", "orcid": ";;0000-0003-0974-9299;;;;;", "linkedin": ";;;;;;lei-ma-345a0484;jianjunzhao/", "or_profile": "~Jianlang_Chen1;~Xuhong_Ren1;~Qing_Guo3;~Felix_Juefei-Xu1;~Di_Lin3;~Wei_Feng1;~Lei_Ma1;~Jianjun_Zhao1", "aff": ";Tianjin University of Technology; Agency for Science, Technology and Research (A*STAR));;Tianjin University;Tianjin University;University of Alberta;Kyushu University", "aff_domain": ";tjut.edu.cn;cfar.a-star.edu.sg;;tju.edu.cn;tju.edu.cn;ualberta.ca;kyushu-u.ac.jp", "position": ";PhD student;Researcher;;Associate Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024lrr,\ntitle={{LRR}: Language-Driven Resamplable Continuous Representation against Adversarial Tracking Attacks},\nauthor={Jianlang Chen and Xuhong Ren and Qing Guo and Felix Juefei-Xu and Di Lin and Wei Feng and Lei Ma and Jianjun Zhao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3qo1pJHabg}\n}", "github": "", "project": "", "reviewers": "DMWE;n1Fj;bxnS", "pdf_size": 7849670, "rating": "5;6;6", "confidence": "5;4;4", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "2;2;2", "wc_summary": "71;76;71", "wc_strengths": "34;52;44", "wc_weaknesses": "126;135;118", "wc_questions": "15;48;48", "wc_review": "246;311;281", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1226;1126;603", "reply_reviewers": "0;0;0", "reply_authors": "4;4;4", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 72.66666666666667, 2.357022603955158 ], "wc_strengths_avg": [ 43.333333333333336, 7.363574011458174 ], "wc_weaknesses_avg": [ 126.33333333333333, 6.944222218666553 ], "wc_questions_avg": [ 37.0, 15.556349186104045 ], "wc_review_avg": [ 279.3333333333333, 26.562295750848712 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 985.0, 273.18247869632245 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6269508987053163622&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3qo1pJHabg", "pdf": "https://openreview.net/pdf?id=3qo1pJHabg", "email": ";tjut.edu.cn;cfar.a-star.edu.sg;;tju.edu.cn;tju.edu.cn;ualberta.ca;kyushu-u.ac.jp", "author_num": 8, "aff_unique_index": "0;1;2;2;3;4", "aff_unique_norm": "Tianjin University of Technology;Agency for Science, Technology and Research;Tianjin University;University of Alberta;Kyushu University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.tjut.edu.cn;https://www.a-star.edu.sg;http://www.tju.edu.cn;https://www.ualberta.ca;https://www.kyushu-u.ac.jp", "aff_unique_abbr": "TUT;A*STAR;TJU;UAlberta;Kyushu U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2;3", "aff_country_unique": "China;Singapore;Canada;Japan" }, { "id": "3rBu7dR7rm", "title": "Unified Long-Term Time-Series Forecasting Benchmark", "track": "main", "status": "Reject", "tldr": "", "abstract": "In order to support the advancement of machine learning methods for predicting time-series data, we present a comprehensive dataset designed explicitly for long-term time-series forecasting. We incorporate a collection of datasets obtained from diverse, dynamic systems and real-life records. Each dataset is standardized by dividing it into training and test trajectories with predetermined lookback lengths. We include trajectories of length up to $2000$ to ensure a reliable evaluation of long-term forecasting capabilities. To determine the most effective model in diverse scenarios, we conduct an extensive benchmarking analysis using classical and state-of-the-art models, namely LSTM, DeepAR, NLinear, N-Hits, PatchTST, and LatentODE. Our findings reveal intriguing performance comparisons among these models, highlighting the dataset-dependent nature of model effectiveness. Notably, we introduce a custom latent NLinear model and enhance DeepAR with a curriculum learning phase. Both consistently outperform their vanilla counterparts.", "keywords": "time-series;forecasting;long-term;benchmark;neural network", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/d531b4956510f30cbbe4177c5acf89bf1d1d531c.zip", "author": "Jacek Cyranka;Szymon Jan Haponiuk", "authorids": "~Jacek_Cyranka1;~Szymon_Jan_Haponiuk1", "gender": ";M", "homepage": ";", "dblp": "142/0441;", "google_scholar": "-60x4zkAAAAJ;2PRTAF4AAAAJ", "orcid": "0000-0001-5719-0616;", "linkedin": "cyranka/;szymon-haponiuk-a3901115b/", "or_profile": "~Jacek_Cyranka1;~Szymon_Jan_Haponiuk1", "aff": "University of Warsaw;University of Warsaw", "aff_domain": "mimuw.edu.pl;mimuw.edu.pl", "position": "Assistant Professor;MS student", "bibtex": "@misc{\ncyranka2024unified,\ntitle={Unified Long-Term Time-Series Forecasting Benchmark},\nauthor={Jacek Cyranka and Szymon Jan Haponiuk},\nyear={2024},\nurl={https://openreview.net/forum?id=3rBu7dR7rm}\n}", "github": "", "project": "", "reviewers": "av6e;ptT1;WuTL", "site": "https://openreview.net/forum?id=3rBu7dR7rm", "pdf_size": 5058133, "rating": "3;5;5", "confidence": "4;4;2", "soundness": "2;2;1", "contribution": "2;2;1", "presentation": "2;3;2", "wc_summary": "38;132;216", "wc_strengths": "18;221;8", "wc_weaknesses": "150;404;8", "wc_questions": "44;97;8", "wc_review": "250;854;240", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "614;1514;518", "reply_reviewers": "0;0;0", "reply_authors": "1;3;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 128.66666666666666, 72.70641114937679 ], "wc_strengths_avg": [ 82.33333333333333, 98.1370923193106 ], "wc_weaknesses_avg": [ 187.33333333333334, 163.80747507024486 ], "wc_questions_avg": [ 49.666666666666664, 36.55437350334734 ], "wc_review_avg": [ 448.0, 287.11437906636905 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 882.0, 448.60673200477055 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11549142946293258542&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Warsaw", "aff_unique_dep": "", "aff_unique_url": "https://www.uw.edu.pl", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Poland" }, { "title": "Boosting Vanilla Lightweight Vision Transformers via Re-parameterization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19492", "id": "3rmpixOjPS", "author_site": "Zhentao Tan, Xiaodan Li, Yue Wu, Qi Chu, Le Lu, Nenghai Yu, Jieping Ye", "tldr": "", "abstract": "Large-scale Vision Transformers have achieved promising performance on downstream tasks through feature pre-training. However, the performance of vanilla lightweight Vision Transformers (ViTs) is still far from satisfactory compared to that of recent lightweight CNNs or hybrid networks. In this paper, we aim to unlock the potential of vanilla lightweight ViTs by exploring the adaptation of the widely-used re-parameterization technology to ViTs for improving learning ability during training without increasing the inference cost. The main challenge comes from the fact that CNNs perfectly complement with re-parameterization over convolution and batch normalization, while vanilla Transformer architectures are mainly comprised of linear and layer normalization layers. We propose to incorporate the nonlinear ensemble into linear layers by expanding the depth of the linear layers with batch normalization and fusing multiple linear features with hierarchical representation ability through a pyramid structure. We also discover and solve a new transformer-specific distribution rectification problem caused by multi-branch re-parameterization. Finally, we propose our Two-Dimensional Re-parameterized Linear module (TDRL) for ViTs. Under the popular self-supervised pre-training and supervised fine-tuning strategy, our TDRL can be used in these two stages to enhance both generic and task-specific representation. Experiments demonstrate that our proposed method not only boosts the performance of vanilla Vit-Tiny on various vision tasks to new state-of-the-art (SOTA) but also shows promising generality ability on other networks. Code will be available.", "keywords": "Vision Transformers;Re-parameterization;Lightweight Models", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/75c9d86ba84232b31ffd8b9cdd0286c7a6152539.zip", "author": "Zhentao Tan;Xiaodan Li;Yue Wu;Qi Chu;Le Lu;Nenghai Yu;Jieping Ye", "authorids": "~Zhentao_Tan1;~Xiaodan_Li1;~Yue_Wu18;~Qi_Chu1;~Le_Lu3;~Nenghai_Yu1;~Jieping_Ye4", "gender": "M;F;;M;M;M;M", "homepage": ";;;http://www.cs.jhu.edu/~lelu/;;http://yelabs.net/;", "dblp": "211/5776;126/7789;52/9077-1;78/6574-1.html;96/5144;03/5454;", "google_scholar": "VCX7itEAAAAJ;YximuHAAAAAJ;JZjOMdsAAAAJ;kZn0f6gAAAAJ;https://scholar.google.com.hk/citations?user=7620QAMAAAAJ;T9AzhwcAAAAJ;srajsjoAAAAJ", "orcid": "0000-0001-9095-4462;;0000-0003-3028-0755;0000-0002-6799-9416;;0000-0001-8662-5818;", "linkedin": ";;;tigerlelu/;;;", "or_profile": "~Zhentao_Tan1;~Xiaodan_Li1;~Qi_Chu1;~Le_Lu3;~Nenghai_Yu1;~Jieping_Ye4;~Yue_Wu3", "aff": "University of Science and Technology of China;Alibaba Group;University of Science and Technology of China;Alibaba Group;University of Science and Technology of China;Alibaba Group;Alibaba Group", "aff_domain": "ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;alibaba-inc.com;alibaba-inc.com", "position": "Postdoc;Researcher;Associate Professor;Full Professor;Full Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\ntan2024boosting,\ntitle={Boosting Vanilla Lightweight Vision Transformers via Re-parameterization},\nauthor={Zhentao Tan and Xiaodan Li and Yue Wu and Qi Chu and Le Lu and Nenghai Yu and Jieping Ye},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3rmpixOjPS}\n}", "github": "", "project": "", "reviewers": "Yszy;WZef;GsFu;wWZH;HAf5", "pdf_size": 816146, "rating": "6;6;8;8;8", "confidence": "5;3;4;4;4", "soundness": "4;3;3;3;2", "contribution": "3;3;4;3;2", "presentation": "3;3;4;2;2", "wc_summary": "47;67;168;70;77", "wc_strengths": "39;37;55;131;22", "wc_weaknesses": "93;96;58;124;100", "wc_questions": "3;2;12;3;53", "wc_review": "182;202;293;328;252", "wc_reply_reviewers": "13;0;14;85;327", "wc_reply_authors": "589;429;543;630;1608", "reply_reviewers": "1;0;1;1;2", "reply_authors": "3;2;3;3;7", "rating_avg": [ 7.2, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "contribution_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 85.8, 42.291370278107564 ], "wc_strengths_avg": [ 56.8, 38.54555746126913 ], "wc_weaknesses_avg": [ 94.2, 21.15088650624366 ], "wc_questions_avg": [ 14.6, 19.540726700918775 ], "wc_review_avg": [ 251.4, 54.507247224566385 ], "wc_reply_reviewers_avg": [ 87.8, 123.26783846567602 ], "wc_reply_authors_avg": [ 759.8, 429.3937121104593 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.6, 1.7435595774162693 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3696924111028153520&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "openreview": "https://openreview.net/forum?id=3rmpixOjPS", "pdf": "https://openreview.net/pdf?id=3rmpixOjPS", "email": "ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;alibaba-inc.com;alibaba-inc.com", "author_num": 7, "aff_unique_index": "0;1;0;1;0;1;1", "aff_unique_norm": "University of Science and Technology of China;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "USTC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3s4fZTr1ce", "title": "RLAdapter: Bridging Large Language Models to Reinforcement Learning in Open Worlds", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "While reinforcement learning (RL) shows remarkable success in decision-making problems, it often requires a lot of interactions with the environment, and in sparse-reward environments, it is challenging to learn meaningful policies. Large Language Models (LLMs) can potentially provide valuable guidance to agents in learning policies, thereby enhancing the performance of RL algorithms in such environments. However, LLMs often encounter difficulties in understanding downstream tasks, which hinders their ability to optimally assist agents in these tasks. A common approach to mitigating this issue is to fine-tune the LLMs with task-related data, enabling them to offer useful guidance for RL agents. However, this approach encounters several difficulties, such as inaccessible model weights or the need for significant computational resources, making it impractical. In this work, we introduce RLAdapter, a framework that builds a better connection between RL algorithms and LLMs by incorporating an adapter model. Within the RLAdapter framework, fine-tuning a lightweight language model with information generated during the training process of RL agents significantly aids LLMs in adapting to downstream tasks, thereby providing better guidance for RL agents. We conducted experiments to evaluate RLAdapter in the Crafter environment, and the results show that RLAdapter surpasses the SOTA baselines. Furthermore, agents under our framework exhibit common-sense behaviors that are absent in baseline models.", "keywords": "Reinforcement Learning;Large Language Models", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Wanpeng Zhang;Zongqing Lu", "authorids": "~Wanpeng_Zhang1;~Zongqing_Lu2", "gender": "M;", "homepage": "https://zhangwp.com;", "dblp": "73/10693-2;", "google_scholar": "_IKNf9EAAAAJ;", "orcid": "0000-0001-5351-3449;", "linkedin": ";", "or_profile": "~Wanpeng_Zhang1;~Zongqing_Lu2", "aff": "Peking University;", "aff_domain": "pku.edu.cn;", "position": "PhD student;", "bibtex": "@misc{\nzhang2024rladapter,\ntitle={{RLA}dapter: Bridging Large Language Models to Reinforcement Learning in Open Worlds},\nauthor={Wanpeng Zhang and Zongqing Lu},\nyear={2024},\nurl={https://openreview.net/forum?id=3s4fZTr1ce}\n}", "github": "", "project": "", "reviewers": "aWpB;1niL;6tyY;mt27", "site": "https://openreview.net/forum?id=3s4fZTr1ce", "pdf_size": 2536279, "rating": "3;3;3;5", "confidence": "4;2;4;3", "soundness": "2;2;2;3", "contribution": "2;3;2;3", "presentation": "3;4;3;2", "wc_summary": "131;188;100;98", "wc_strengths": "60;135;103;80", "wc_weaknesses": "217;95;364;159", "wc_questions": "46;847;231;185", "wc_review": "454;1265;798;522", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 129.25, 36.355020286062285 ], "wc_strengths_avg": [ 94.5, 27.897132469126642 ], "wc_weaknesses_avg": [ 208.75, 99.47958333246073 ], "wc_questions_avg": [ 327.25, 307.7095830486922 ], "wc_review_avg": [ 759.75, 318.8842854390915 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12669604310559425831&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "3sOE3MFepx", "title": "PDE-Diffusion: Physic guided diffusion model for solving partial derivative equations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Solving partial differential equations (PDEs) is crucial in various disciplines, and their resolution often necessitates the use of computationally intensive numerical methods as well as specialized domain expertise. While data-driven approaches have emerged as promising alternatives, they encounter limitations in terms of generalizability, interpretability, and long-horizon predictive performance, as well as issues related to temporal incoherence. To address these challenges, we introduce the PDE-Diffusion, a two-stage model with three distinctive features: (i) the incorporation of physics-based priors to enhance model interpretability and generalization, (ii) a two-stage diffusion model that efficiently handles physical field forecasting without requiring multi-frame inputs, and (iii) the assimilation of PDE-informed constraints to ensure temporal coherence while producing high-quality predictive results. We conduct extensive experiments to evaluate PDE-Diffusion's capabilities using the PDEBench dataset and two of our newly proposed datasets. The results indicate that PDE-Diffusion delivers state-of-the-art performance in all cases.", "keywords": "AI for science;PDE;diffusion model;generative model", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/7fecff3c4ea127c6363e0c6652a5afb4cd969e6c.zip", "author": "Chonghan Gao;Haoyi Zhou;wen xin gong;QING PO WU WU;Tianyu Chen;Qian Yu;Shanghang Zhang;Jianxin Li", "authorids": "~Chonghan_Gao1;~Haoyi_Zhou1;~wen_xin_gong1;~QING_PO_WU_WU1;~Tianyu_Chen1;~Qian_Yu4;~Shanghang_Zhang4;~Jianxin_Li3", "gender": "M;M;M;;M;F;;M", "homepage": "https://homepage-gao.vercel.app/;https://www.zhouhaoyi.com/;https://github.com/ruoyunbai;;https://github.com/Tarpelite;https://yuqian1023.github.io/;;http://myjianxin.github.io", "dblp": ";162/1287;;;;;;l/JianxinLi-2.html", "google_scholar": "9O6yafkAAAAJ;mbrFlN0AAAAJ;;;;mmm90qgAAAAJ;;EY2lqD0AAAAJ", "orcid": ";0000-0002-2393-3634;;;;0000-0002-0538-7940;;0000-0001-5152-0055", "linkedin": ";haoyi-zhou-54a7a69a/;;;;;;", "or_profile": "~Chonghan_Gao1;~Haoyi_Zhou1;~wen_xin_gong1;~QING_PO_WU_WU1;~Tianyu_Chen1;~Qian_Yu4;~Shanghang_Zhang4;~Jianxin_Li3", "aff": "Beihang University;Beihang University;Beihang University;;Beihang University;Beihang University;;Beihang University ", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;;buaa.edu.cn;buaa.edu.cn;;buaa.edu.cn", "position": "PhD student;Assistant Professor;Undergrad student;;PhD student;Associate Professor;;Full Professor", "bibtex": "@misc{\ngao2024pdediffusion,\ntitle={{PDE}-Diffusion: Physic guided diffusion model for solving partial derivative equations},\nauthor={Chonghan Gao and Haoyi Zhou and wen xin gong and QING PO WU WU and Tianyu Chen and Qian Yu and Shanghang Zhang and Jianxin Li},\nyear={2024},\nurl={https://openreview.net/forum?id=3sOE3MFepx}\n}", "github": "", "project": "", "reviewers": "Mvr7;uZRL;Hhh3;t73C;p93J", "site": "https://openreview.net/forum?id=3sOE3MFepx", "pdf_size": 443502, "rating": "1;1;3;3;3", "confidence": "4;5;4;3;5", "soundness": "1;1;1;2;3", "contribution": "2;1;1;2;2", "presentation": "2;1;2;2;1", "wc_summary": "38;28;116;52;38", "wc_strengths": "49;25;14;58;67", "wc_weaknesses": "365;164;658;150;69", "wc_questions": "1;9;3;57;205", "wc_review": "453;226;791;317;379", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 2.2, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 1.6, 0.8 ], "contribution_avg": [ 1.6, 0.4898979485566356 ], "presentation_avg": [ 1.6, 0.4898979485566356 ], "wc_summary_avg": [ 54.4, 31.733893552477927 ], "wc_strengths_avg": [ 42.6, 20.0059991002699 ], "wc_weaknesses_avg": [ 281.2, 212.12204034470346 ], "wc_questions_avg": [ 55.0, 77.76888838089432 ], "wc_review_avg": [ 433.2, 193.79618159293025 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AHhXA_niAW4J:scholar.google.com/&scioq=PDE-Diffusion:+Physic+guided+diffusion+model+for+solving+partial+derivative+equations&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3t57X1Fvaf", "title": "Learning Graph Representation for Model Ensemble", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We introduce, LGR-ME (Learning Graph Representation for Model Ensemble), a groundbreaking approach within the domain of general-purpose learning systems. Its primary focal point is to establish a foundational framework that facilitates self-adaptation and versatility in the ever-evolving landscape of emerging machine learning tasks. Despite the strides made in machine learning, it has yet to reach the adaptive and all-encompassing cognitive prowess demonstrated by biological learning systems. This discrepancy is particularly pronounced in the sphere of replicating learning representations and mastering a diverse spectrum of general-purpose learning algorithms. Our proposition entails a graph-centered representation of machine learning models. This representation operates on a graph composed of models, where the interconnections among akin models are established based on model specifications and their corresponding performances. In pursuit of this representation, we employ a graph neural network to undergo training. In this aspect, we present a novel method through the utilization of the top $k$ maximum spanning trees. This encoding is then subjected to training by a meta-model that minimizes a newly devised loss function. This combined loss function effectively accounts for both Diversity and Accuracy. Furthermore, we provide a theoretical examination of both the graph encoding algorithm and the newly introduced loss function. This advanced training process engenders an understanding of the intricate interdependencies and correlations existing among the model ensemble. The acquired features are subsequently harnessed to generate the ultimate output for the initial task at hand. By means of extensive empirical comparisons, we showcase the efficacy of LGR-ME in contrast to solutions predicated on ensemble pruning techniques (additional details can be found in the Appendix).", "keywords": "Model Ensemble;Graph Representation;Graph Convolution Neural Network", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/22a77cee1bc7119f52f245ba6c1bdc83c9e9bdf7.zip", "author": "Nassim Belmecheri;Youcef Djenouri;Philip S. Yu", "authorids": "~Nassim_Belmecheri1;~Youcef_Djenouri1;~Philip_S._Yu1", "gender": "M;M;M", "homepage": "https://www.simula.no/people/nassim;https://sites.google.com/site/youcefdjenouri/;https://cs.uic.edu/profiles/philip-yu/", "dblp": "315/2623.html;;y/PhilipSYu", "google_scholar": "VccEbq4AAAAJ;;D0lL1r0AAAAJ", "orcid": ";;0000-0002-3491-5968", "linkedin": ";;", "or_profile": "~Nassim_Belmecheri1;~Youcef_Djenouri1;~Philip_S._Yu1", "aff": "Simula Research Laboratory;Norwegian Research Center;University of Illinois Chicago", "aff_domain": "simula.no;norceresearch.no;uic.edu", "position": "Postdoc;Principal Researcher;Full Professor", "bibtex": "@misc{\nbelmecheri2024learning,\ntitle={Learning Graph Representation for Model Ensemble},\nauthor={Nassim Belmecheri and Youcef Djenouri and Philip S. Yu},\nyear={2024},\nurl={https://openreview.net/forum?id=3t57X1Fvaf}\n}", "github": "", "project": "", "reviewers": "3xiQ;oYx9;eLAm;WLfx", "site": "https://openreview.net/forum?id=3t57X1Fvaf", "pdf_size": 997391, "rating": "1;1;1;3", "confidence": "5;3;3;4", "soundness": "1;2;1;2", "contribution": "1;1;1;2", "presentation": "1;1;2;1", "wc_summary": "95;81;52;75", "wc_strengths": "63;9;20;31", "wc_weaknesses": "431;110;150;352", "wc_questions": "73;32;10;143", "wc_review": "662;232;232;601", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 1.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 1.5, 0.5 ], "contribution_avg": [ 1.25, 0.4330127018922193 ], "presentation_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 15.514106484100203 ], "wc_strengths_avg": [ 30.75, 20.17888748172208 ], "wc_weaknesses_avg": [ 260.75, 134.44585341318637 ], "wc_questions_avg": [ 64.5, 50.64829710858994 ], "wc_review_avg": [ 431.75, 200.91089442835099 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jHQx2Od6haQJ:scholar.google.com/&scioq=Learning+Graph+Representation+for+Model+Ensemble&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Simula Research Laboratory;Norwegian Research Center;University of Illinois at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.simula.no;;https://www.uic.edu", "aff_unique_abbr": "Simula;;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Norway;United States" }, { "title": "Generative Learning for Solving Non-Convex Problem with Multi-Valued Input-Solution Mapping", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19491", "id": "3tM1l5tSbv", "author_site": "Enming Liang, Minghua Chen", "tldr": "", "abstract": "By employing neural networks (NN) to learn input-solution mappings and passing a new input through the learned mapping to obtain a solution instantly, recent studies have shown remarkable speed improvements over iterative algorithms for solving optimization problems. Meanwhile, they also highlight methodological challenges to be addressed. In particular, general non-convex problems often present multiple optimal solutions for identical inputs, signifying a complex, multi-valued input-solution mapping. Conventional learning techniques, primarily tailored to learn single-valued mappings, struggle to train NNs to accurately decipher multi-valued ones, leading to inferior solutions. We address this fundamental issue by developing a generative learning approach using a rectified flow (RectFlow) model built upon ordinary differential equations. In contrast to learning input-solution mapping, we learn the mapping from input to solution distribution, exploiting the universal approximation capability of the RectFlow model. Upon receiving a new input, we employ the trained RectFlow model to sample high-quality solutions from the input-dependent distribution it has learned. Our approach outperforms conceivable GAN and Diffusion models in terms of training stability and run-time complexity. We provide a detailed characterization of the optimality loss and runtime complexity associated with our generative approach. Simulation results for solving non-convex problems show that our method achieves significantly better solution optimality than recent NN schemes, with comparable feasibility and speedup performance.", "keywords": "Non-convex optimization;Multi-valued solution mapping;Generative model;Ordinary differential equation;Supervised learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Enming Liang;Minghua Chen", "authorids": "~Enming_Liang1;~Minghua_Chen1", "gender": "M;M", "homepage": "https://emliang.github.io/;https://www.mhchen.com", "dblp": ";12/4395-1.html", "google_scholar": "https://scholar.google.com.sg/citations?user=Todfu6AAAAAJ;https://scholar.google.com.hk/citations?user=WzEQ9QwAAAAJ", "orcid": ";0000-0003-4763-0037", "linkedin": "enming-liang-95b5b216a/;", "or_profile": "~Enming_Liang1;~Minghua_Chen1", "aff": "City University of Hong Kong;City University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nliang2024generative,\ntitle={Generative Learning for Solving Non-Convex Problem with Multi-Valued Input-Solution Mapping},\nauthor={Enming Liang and Minghua Chen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3tM1l5tSbv}\n}", "github": "", "project": "", "reviewers": "rFky;GsRT;85JM;ibgc", "pdf_size": 6099014, "rating": "5;6;8;8", "confidence": "3;3;4;4", "soundness": "2;3;4;4", "contribution": "2;3;4;4", "presentation": "3;2;4;3", "wc_summary": "54;95;218;21", "wc_strengths": "46;71;94;53", "wc_weaknesses": "193;127;91;94", "wc_questions": "118;49;64;66", "wc_review": "411;342;467;234", "wc_reply_reviewers": "43;41;82;0", "wc_reply_authors": "2414;1488;936;675", "reply_reviewers": "1;1;1;0", "reply_authors": "7;5;3;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 97.0, 74.61568199782134 ], "wc_strengths_avg": [ 66.0, 18.560711193270585 ], "wc_weaknesses_avg": [ 126.25, 41.04494487753638 ], "wc_questions_avg": [ 74.25, 26.099568961957974 ], "wc_review_avg": [ 363.5, 86.89217456134931 ], "wc_reply_reviewers_avg": [ 41.5, 29.004310024546353 ], "wc_reply_authors_avg": [ 1378.25, 666.1397657398934 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 1.920286436967152 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9622504486493761, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16103035139839675510&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=3tM1l5tSbv", "pdf": "https://openreview.net/pdf?id=3tM1l5tSbv", "email": "cityu.edu.hk;cityu.edu.hk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "City University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cityu.edu.hk", "aff_unique_abbr": "CityU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "3tjTJeXyA7", "title": "Revitalizing Channel-dimension Fourier Transform for Image Enhancement", "track": "main", "status": "Reject", "tldr": "", "abstract": "Exploring the global representations of Fourier transform for image enhancement has become an alternative and made significant advancements. However, previous works only operate in the spatial dimensional, overlooking the potential of the channel dimension that inherently possesses discriminative features. In this work, we propose a fresh perspective, channel-dimension Fourier transform, for image enhancement. Our designs are simple yet effective and comprise three straightforward steps: applying the Fourier transform to the channel dimension to obtain channel-wise Fourier domain features, performing a channel-wise transformation on both its amplitude and phase components, and then reverting back to the spatial domain. Following the above rules, we offer three alternative implementation formats of the channel transform in different operational spaces, performing operations in 1) the global vector with higher orders; 2) the global vector with channel groups; and 3) the Fourier features derived from spatial-based Fourier transform. The above core designs, as general operators, can be seamlessly integrated with enhancement networks, achieving remarkable gains and building efficient models. Through extensive experiments on multiple image enhancement tasks, like low-light image enhancement, exposure correction, SDR2HDR translation, and underwater image enhancement, our designs exhibit consistent performance gains. The code will be publicly available.", "keywords": "Image Enhancement;Fourier transform;Image Restoration", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Jie Huang;Man Zhou;Dong Li;Bing Li;Chun-Le Guo;Chongyi Li", "authorids": "~Jie_Huang4;~Man_Zhou4;~Dong_Li15;~Bing_Li16;~Chun-Le_Guo1;~Chongyi_Li1", "gender": "M;;M;M;;", "homepage": ";;https://github.com/universe-six;https://www.ustc.edu.cn/;;", "dblp": ";;;;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;LdfwzzQAAAAJ;;;", "orcid": "0000-0002-3518-3404;;0000-0002-7038-7402;;;", "linkedin": ";;;;;", "or_profile": "~Jie_Huang4;~Man_Zhou4;~Dong_Li15;~Bing_Li16;~Chun-Le_Guo1;~Chongyi_Li1", "aff": "University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;;", "aff_domain": "ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;;", "position": "PhD student;;PhD student;MS student;;", "bibtex": "@misc{\nhuang2024revitalizing,\ntitle={Revitalizing Channel-dimension Fourier Transform for Image Enhancement},\nauthor={Jie Huang and Man Zhou and Dong Li and Bing Li and Chun-Le Guo and Chongyi Li},\nyear={2024},\nurl={https://openreview.net/forum?id=3tjTJeXyA7}\n}", "github": "", "project": "", "reviewers": "WFwC;6DxW;MiY2;Tiry", "site": "https://openreview.net/forum?id=3tjTJeXyA7", "pdf_size": 6754444, "rating": "6;6;8;10", "confidence": "5;5;4;5", "soundness": "4;4;4;3", "contribution": "4;3;3;3", "presentation": "3;3;3;3", "wc_summary": "47;57;53;70", "wc_strengths": "88;69;90;50", "wc_weaknesses": "278;318;91;33", "wc_questions": "50;5;31;20", "wc_review": "463;449;265;173", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "827;865;704;301", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 7.5, 1.6583123951777 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.75, 8.437268515343103 ], "wc_strengths_avg": [ 74.25, 16.223054582907622 ], "wc_weaknesses_avg": [ 180.0, 120.60058042978069 ], "wc_questions_avg": [ 26.5, 16.408839081421938 ], "wc_review_avg": [ 337.5, 122.9827223637532 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 674.25, 223.56137300526672 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16518628138777599967&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "3uITarEQ7p", "title": "Differentially Private Model Compression via Selective Pretraining", "track": "main", "status": "Reject", "tldr": "", "abstract": "Suppose we want to train text prediction models in email clients or word processors. \nThese models, which serve billions of predictions per hour, must preserve the privacy of user data and adhere to specific model size constraints to meet memory, inference time requirements, and to reduce inference cost. \nBuilding small, fast, and private domain-specific language models is a thriving\narea of research.\nIn this work, we show that a careful pre-training on a {\\em subset} of the public dataset that is guided by the private dataset is crucial to train small DP language models.\nOn standard benchmarks, models trained with our new framework achieve state-of-the-art performance, improving upon all the baselines from the literature.\n\nBesides performance improvements, our framework also shows that with careful pre-training and private fine-tuning, smaller models can match the performance of much larger models that do not have access to private data, highlighting the promise of private learning as a tool for model compression and efficiency.", "keywords": "differentially private deep learning;model compression", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/613fcfb60b224cba892cc66ecd8b5e96d87bcaf7.zip", "author": "Janardhan Kulkarni;Da Yu;Sivakanth Gopi;Zinan Lin;Saurabh Naik;Tomasz Lukasz Religa;Jian Yin;Huishuai Zhang", "authorids": "~Janardhan_Kulkarni2;~Da_Yu1;~Sivakanth_Gopi1;~Zinan_Lin1;~Saurabh_Naik1;~Tomasz_Lukasz_Religa1;~Jian_Yin3;~Huishuai_Zhang3", "gender": "M;M;M;M;M;;M;M", "homepage": ";;https://aka.ms/sigopi;https://zinanlin.me/;;http://www.cam.ac.uk;http://sai.sysu.edu.cn/teacher/teacher01/1385356.htm;https://huishuai-git.github.io", "dblp": "54/1978;48/8545;123/7803.html;64/237-1;;;95/578-1;144/7537", "google_scholar": "_fxnybwAAAAJ;FcRGdiwAAAAJ;bYhGFrwAAAAJ;67nE-wQ_g_cC;;;;w1srHyIAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;sivakanthgopi/;;saurabh-naik-9750b0b/;;;", "or_profile": "~Janardhan_Kulkarni2;~Da_Yu1;~Sivakanth_Gopi1;~Zinan_Lin1;~Saurabh_Naik1;~Tomasz_Lukasz_Religa1;~Jian_Yin3;~Huishuai_Zhang2", "aff": "Microsoft Research, Redmond;SUN YAT-SEN UNIVERSITY;Microsoft Research;Microsoft;;;SUN YAT-SEN UNIVERSITY;Peking University", "aff_domain": "microsoft.com;sysu.edu.cn;microsoft.com;microsoft.com;;;sysu.edu.cn;pku.edu.cn", "position": "Researcher;PhD student;Senior Researcher;Senior Researcher;;;Full Professor;Assistant Professor", "bibtex": "@misc{\nkulkarni2024differentially,\ntitle={Differentially Private Model Compression via Selective Pretraining},\nauthor={Janardhan Kulkarni and Da Yu and Sivakanth Gopi and Zinan Lin and Saurabh Naik and Tomasz Lukasz Religa and Jian Yin and Huishuai Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=3uITarEQ7p}\n}", "github": "", "project": "", "reviewers": "3s2h;kdmU;CqKu;mFxj", "site": "https://openreview.net/forum?id=3uITarEQ7p", "pdf_size": 7505163, "rating": "3;5;6;8", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "contribution": "1;2;3;4", "presentation": "2;2;4;4", "wc_summary": "126;96;189;228", "wc_strengths": "58;47;322;122", "wc_weaknesses": "222;134;206;331", "wc_questions": "53;1;256;186", "wc_review": "459;278;973;867", "wc_reply_reviewers": "0;0;34;20", "wc_reply_authors": "1709;770;1111;696", "reply_reviewers": "0;0;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 159.75, 51.760868423935854 ], "wc_strengths_avg": [ 137.25, 110.44314148012995 ], "wc_weaknesses_avg": [ 223.25, 70.48891756865046 ], "wc_questions_avg": [ 124.0, 101.78162899069753 ], "wc_review_avg": [ 644.25, 285.54804762071126 ], "wc_reply_reviewers_avg": [ 13.5, 14.378803844548406 ], "wc_reply_authors_avg": [ 1071.5, 399.95906040493696 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ALMmwCkMmQEJ:scholar.google.com/&scioq=Differentially+Private+Model+Compression+via+Selective+Pretraining&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;1;2", "aff_unique_norm": "Microsoft;Sun Yat-sen University;Peking University", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;http://www.sysu.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "MSR;SYSU;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Redmond;", "aff_country_unique_index": "0;1;0;0;1;1", "aff_country_unique": "United States;China" }, { "id": "3ucOvX8WVu", "title": "LoFT: Local Proxy Fine-tuning Improves Transferability to Large Language Model Attacks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "It has been shown that Large Language Model (LLM) alignments can be circumvented by appending specially crafted attack suffixes with harmful queries to elicit harmful responses. To conduct attacks against private target models whose characterization is unknown, public models can be used as proxies to fashion the attack, with successful attacks being transferred from public proxies to private target models. The success rate of attack depends on how closely the proxy model approximates the private model. We hypothesize that for attacks to be transferrable, it is sufficient if the proxy can approximate the target model in the neighborhood of the harmful query. Therefore, in this paper, we propose \\emph{Local Fine-Tuning (LoFT)}, i.e., fine-tuning proxy models on similar queries that lie in the lexico-semantic neighborhood of harmful queries to decrease the divergence between the proxy and target models. First, we demonstrate three approaches to prompt private target models to obtain similar queries given harmful queries. Next, we obtain data for local fine-tuning by eliciting responses from target models for the generated similar queries. Then, we optimize attack suffixes to generate attack prompts and evaluate the impact of our local fine-tuning on the attack's success rate. Experiments show that local fine-tuning of proxy models improves attack transferability and increases attack success rate by $39\\%$, $7\\%$, and $0.5\\%$ absolute on target models ChatGPT, GPT-4, and Claude respectively.", "keywords": "adversarial attacks;large languages model;alignment;fine-tuning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/968a737950b697cd65c37bdff5fc83e6f1f6e7ca.pdf", "author": "Muhammad A Shah;Roshan Sharma;Hira Dhamyal;Ankit Shah;Dareen Safar Alharthi;Massa Baali;Hazim Bukhari;Joseph Konan;Soham Deshmukh;Bhiksha Raj;Rita Singh", "authorids": "~Muhammad_A_Shah1;~Roshan_Sharma1;~Hira_Dhamyal1;~Ankit_Shah1;~Dareen_Safar_Alharthi1;~Massa_Baali1;~Hazim_Bukhari1;~Joseph_Konan1;~Soham_Deshmukh1;~Bhiksha_Raj1;~Rita_Singh1", "gender": ";M;;M;F;F;M;;M;M;F", "homepage": ";https://roshansh-cmu.github.io/;https://www.linkedin.com/in/hiradhamyal/;https://ankitshah009.github.io/;https://dareenharthi.github.io/;https://www.linkedin.com/in/massa-baali-37ba9386;;;https://soham97.github.io;https://www.cs.cmu.edu/directory/bhikshar/;http://mlsp.cs.cmu.edu/people/rsingh/index.html", "dblp": "142/5481;263/9903;;04/1935-1.html;;;;306/1680.html;241/9651;60/3996;", "google_scholar": "74MwzTcAAAAJ;yZ4QLqsAAAAJ;;https://scholar.google.co.in/citations?user=TqG1H4cAAAAJ;jBtEAgIAAAAJ;;;;MasiEogAAAAJ;;", "orcid": ";;;0000-0002-8838-5421;;;;;;;", "linkedin": ";;;ankpsh01/;;;hazim-bukhari;;sdeshmuk;;", "or_profile": "~Muhammad_A_Shah1;~Roshan_Sharma1;~Hira_Dhamyal1;~Ankit_Shah1;~Dareen_Safar_Alharthi1;~Massa_Baali1;~Hazim_Bukhari1;~Joseph_Konan1;~Soham_Deshmukh1;~Bhiksha_Raj1;~Rita_Singh1", "aff": "Carnegie Mellon University;Google;Carnegie Mellon University;Accenture;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Microsoft;Mohamed bin Zayed University of Artificial Intelligence;School of Computer Science, Carnegie Mellon University", "aff_domain": "cmu.edu;google.com;cmu.edu;accenture.com;cs.cmu.edu;cmu.edu;cmu.edu;andrew.cmu.edu;microsoft.com;mbzuai.ac.ae;cs.cmu.edu", "position": "PhD student;Researcher;PhD student;Principal Researcher;MS student;PhD student;Researcher;PhD student;Researcher;Full Professor;Research Professor", "bibtex": "@misc{\nshah2024loft,\ntitle={Lo{FT}: Local Proxy Fine-tuning Improves Transferability to Large Language Model Attacks},\nauthor={Muhammad A Shah and Roshan Sharma and Hira Dhamyal and Ankit Shah and Dareen Safar Alharthi and Massa Baali and Hazim Bukhari and Joseph Konan and Soham Deshmukh and Bhiksha Raj and Rita Singh},\nyear={2024},\nurl={https://openreview.net/forum?id=3ucOvX8WVu}\n}", "github": "", "project": "", "reviewers": "LSRN;A2bi;a6tE;5Wu7", "site": "https://openreview.net/forum?id=3ucOvX8WVu", "pdf_size": 2524526, "rating": "3;3;5;5", "confidence": "2;4;3;4", "soundness": "2;2;3;2", "contribution": "2;2;2;2", "presentation": "2;2;2;3", "wc_summary": "32;121;117;80", "wc_strengths": "18;32;57;68", "wc_weaknesses": "12;21;164;117", "wc_questions": "696;158;40;23", "wc_review": "758;332;378;288", "wc_reply_reviewers": "0;228;18;38", "wc_reply_authors": "1378;1156;1187;757", "reply_reviewers": "0;3;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.5, 35.80851853958776 ], "wc_strengths_avg": [ 43.75, 19.778460506318485 ], "wc_weaknesses_avg": [ 78.5, 64.26702109169211 ], "wc_questions_avg": [ 229.25, 274.44796865708446 ], "wc_review_avg": [ 439.0, 186.90371852908652 ], "wc_reply_reviewers_avg": [ 71.0, 91.63514609580758 ], "wc_reply_authors_avg": [ 1119.5, 225.89654711836567 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Mitusnqm_SsJ:scholar.google.com/&scioq=LoFT:+Local+Proxy+Fine-tuning+Improves+Transferability+to+Large+Language+Model+Attacks&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0;0;0;0;3;4;0", "aff_unique_norm": "Carnegie Mellon University;Google;Accenture;Microsoft;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";Google;;Microsoft Corporation;", "aff_unique_url": "https://www.cmu.edu;https://www.google.com;https://www.accenture.com;https://www.microsoft.com;https://mbzuai.ac.ae", "aff_unique_abbr": "CMU;Google;Accenture;Microsoft;MBZUAI", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Mountain View;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;1;0", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "3ukT8oODY0", "title": "Careful at Estimation and Bold at Exploration for Deterministic Policy Gradient Algorithm", "track": "main", "status": "Reject", "tldr": "", "abstract": "Exploration strategies within continuous action spaces often adopt heuristic approaches due to the challenge of dealing with an infinite array of possible actions. Previous research has established the advantages of policy-based exploration in the context of deterministic policy reinforcement learning (DPRL) for continuous action spaces. However, policy-based exploration in DPRL presents two notable issues: unguided exploration and exclusive policy, both stemming from the soft policy learning schema, which is famous for DPRL policy learning. In response to these challenges, we introduce a novel approach called Bold Actor Conservative Critic (BACC), which leverages Q-value to guide out-of-distribution exploration. We extend the dynamic Boltzmann softmax update theorem to the double Q function framework, incorporating modified weights and Q values. This extension enables us to derive an exploration policy directly for policy exploration, which is constructed with the modified weights. Furthermore, our theorem offers substantial support for utilizing the minimum Q value as an intermediate step in policy gradient computation for policy optimization. In practice, we construct such an exploration policy with a limited set of actions and train a parameterized policy by minimizing the expected KL-divergence between the target policy and a policy constructed based on the minimum Q value. To evaluate the effectiveness of our approach, we conduct experiments on the Mujoco and Roboschool benchmarks, showcasing superior performance compared to previous state-of-the-art methods across a range of environments. Notably, our method excels in the highly complex Humanoid environment, demonstrating its efficacy in tackling challenging continuous action space exploration problems.", "keywords": "exploration;actor critic;out of distribution;deterministic policy", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/e127fb9554acba5b34674796193b277f12189d82.zip", "author": "Xing Chen;Yijun Liu;Shutong Zhang;Siyuan Guo;Zhaogeng Liu;Yu Jin;haiyin piao;Hechang Chen;Hengshuai Yao;Yi Chang", "authorids": "~Xing_Chen2;~Yijun_Liu5;~Shutong_Zhang3;~Siyuan_Guo2;~Zhaogeng_Liu1;~Yu_Jin4;~haiyin_piao1;~Hechang_Chen2;~Hengshuai_Yao2;~Yi_Chang4", "gender": "M;F;;M;M;M;M;M;;M", "homepage": "https://github.com/raincchio;;;;https://github.com/Peter7777777;;https://www.researchgate.net/profile/Haiyin-Piao;http://sai.jlu.edu.cn/info/1094/2387.htm;;http://www.yichang-cs.com", "dblp": ";;;244/5858;264/5265;https://dblp.dagstuhl.de/;269/4228.html;145/1142;;02/5438.html", "google_scholar": ";;;JE1Yco4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;;EezEcbgAAAAJ;;https://scholar.google.com.hk/citations?user=drEkR50AAAAJ", "orcid": "0000-0001-5685-8506;0000-0001-7752-1611;;0000-0002-9304-5405;0000-0002-3958-8740;;;;;0000-0003-2697-8093", "linkedin": ";;;;;;;;;", "or_profile": "~Xing_Chen2;~Yijun_Liu5;~Shutong_Zhang3;~Siyuan_Guo2;~Zhaogeng_Liu1;~Yu_Jin4;~haiyin_piao1;~Hechang_Chen2;~Hengshuai_Yao2;~Yi_Chang4", "aff": "Jilin University;Jilin University;;Jilin University;Jilin University;;;Jilin University;;Jilin University, China", "aff_domain": "jlu.edu.cn;jlu.edu.cn;;jlu.edu.cn;jlu.edu.cn;;;jlu.edu.cn;;jlu.edu.cn", "position": "PhD student;PhD student;;PhD student;PhD student;;;Associate Professor;;Full Professor", "bibtex": "@misc{\nchen2024careful,\ntitle={Careful at Estimation and Bold at Exploration for Deterministic Policy Gradient Algorithm},\nauthor={Xing Chen and Yijun Liu and Shutong Zhang and Siyuan Guo and Zhaogeng Liu and Yu Jin and haiyin piao and Hechang Chen and Hengshuai Yao and Yi Chang},\nyear={2024},\nurl={https://openreview.net/forum?id=3ukT8oODY0}\n}", "github": "", "project": "", "reviewers": "3zdT;4MmP;vPPz", "site": "https://openreview.net/forum?id=3ukT8oODY0", "pdf_size": 8156725, "rating": "5;5;6", "confidence": "3;3;4", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "2;3;3", "wc_summary": "103;76;70", "wc_strengths": "44;32;32", "wc_weaknesses": "436;60;64", "wc_questions": "6;119;155", "wc_review": "589;287;321", "wc_reply_reviewers": "141;0;16", "wc_reply_authors": "790;464;375", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 83.0, 14.352700094407323 ], "wc_strengths_avg": [ 36.0, 5.656854249492381 ], "wc_weaknesses_avg": [ 186.66666666666666, 176.31285325302358 ], "wc_questions_avg": [ 93.33333333333333, 63.47878035655345 ], "wc_review_avg": [ 399.0, 135.06541624955912 ], "wc_reply_reviewers_avg": [ 52.333333333333336, 63.03614483417871 ], "wc_reply_authors_avg": [ 543.0, 178.39469349357526 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:G621eedBhCQJ:scholar.google.com/&scioq=Careful+at+Estimation+and+Bold+at+Exploration+for+Deterministic+Policy+Gradient+Algorithm&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Jilin University", "aff_unique_dep": "", "aff_unique_url": "http://www.jlu.edu.cn", "aff_unique_abbr": "JLU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3vHWAGV9Wj", "title": "Estimating Performative Effects in Dynamical Systems: the advantage of sequential observations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Regulators and academics are increasingly interested in the causal effect that algorithmic actions of a digital platform have on consumption, a quantity in the machine learning literature termed the performative effect. In this work, we first show how isolated (non-sequential) observations are not enough to identify the performative effect of interest in general, then we show how sequential observations overcome these limitations. The key novelty of our approach is to explicitly model the dynamics of consumption over time, viewing the platform as a controller acting on a dynamical system. From this dynamical systems perspective, we are able to show that exogenous variation in consumption and appropriately responsive algorithmic control actions are sufficient for identifying the performative effect of interest. Our results illustrate the fruitful interplay of control theory and causal inference, which we illustrate with examples from econometrics, macroeconomics, and machine learning.", "keywords": "performativity;dynamical systems;causal inference;control theory", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Gary Cheng;Moritz Hardt;Celestine Mendler-D\u00fcnner", "authorids": "~Gary_Cheng2;~Moritz_Hardt1;~Celestine_Mendler-D\u00fcnner1", "gender": "M;Not Specified;", "homepage": "http://garycheng.me;http://mrtz.org/;http://celestine.ai/", "dblp": ";26/4683;176/5511", "google_scholar": "qArWV_wAAAAJ;adnTgaAAAAAJ;UqtDdZUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Gary_Cheng2;~Moritz_Hardt1;~Celestine_Mendler-D\u00fcnner1", "aff": "Stanford University;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems", "aff_domain": "stanford.edu;is.mpg.de;tuebingen.mpg.de", "position": "PhD student;Principal Researcher;Group Lead", "bibtex": "@misc{\ncheng2024estimating,\ntitle={Estimating Performative Effects in Dynamical Systems: the advantage of sequential observations},\nauthor={Gary Cheng and Moritz Hardt and Celestine Mendler-D{\\\"u}nner},\nyear={2024},\nurl={https://openreview.net/forum?id=3vHWAGV9Wj}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=3vHWAGV9Wj", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DTDSoxXxFbEJ:scholar.google.com/&scioq=Estimating+Performative+Effects+in+Dynamical+Systems:+the+advantage+of+sequential+observations&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Stanford University;Max-Planck-Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";Intelligent Systems;Intelligent Systems", "aff_unique_url": "https://www.stanford.edu;https://www.mpi-is.mpg.de;https://www.mpi-is.mpg.de", "aff_unique_abbr": "Stanford;MPI-IS;MPI-IS", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Germany" }, { "title": "The Generalization Gap in Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19490", "id": "3w6xuXDOdY", "author_site": "Ishita Mediratta, Qingfei You, Minqi Jiang, Roberta Raileanu", "tldr": "", "abstract": "Despite recent progress in offline learning, these methods are still trained and tested on the same environment. In this paper, we compare the generalization abilities of widely used online and offline learning methods such as online reinforcement learning (RL), offline RL, sequence modeling, and behavioral cloning. Our experiments show that offline learning algorithms perform worse on new environments than online learning ones. We also introduce the first benchmark for evaluating generalization in offline learning, collecting datasets of varying sizes and skill-levels from Procgen (2D video games) and WebShop (e-commerce websites). The datasets contain trajectories for a limited number of game levels or natural language instructions and at test time, the agent has to generalize to new levels or instructions. Our experiments reveal that existing offline learning algorithms struggle to match the performance of online RL on both train and test environments. Behavioral cloning is a strong baseline, outperforming state-of-the-art offline RL and sequence modeling approaches when trained on data from multiple environments and tested on new ones. Finally, we find that increasing the diversity of the data, rather than its size, improves performance on new environments for all offline learning algorithms. Our study demonstrates the limited generalization of current offline learning algorithms highlighting the need for more research in this area.", "keywords": "Offline RL;Dataset;Generalization;Procgen;Webshop", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Ishita Mediratta;Qingfei You;Minqi Jiang;Roberta Raileanu", "authorids": "~Ishita_Mediratta1;~Qingfei_You2;~Minqi_Jiang1;~Roberta_Raileanu2", "gender": "F;;M;F", "homepage": "https://ishita.io;;https://twitter.com/minqijiang;https://rraileanu.github.io/", "dblp": "289/7108;;270/7949;215/5579", "google_scholar": "hgaAO6QAAAAJ;;;9hVXpJ0AAAAJ", "orcid": ";;;", "linkedin": ";qingfeiyou/;minqi-jiang-585a6536/;roberta-raileanu-44b25660/", "or_profile": "~Ishita_Mediratta1;~Qingfei_You2;~Minqi_Jiang1;~Roberta_Raileanu1", "aff": "Meta AI;;Google;Meta Facebook", "aff_domain": "meta.com;;google.com;fb.com", "position": "Researcher;;Researcher;Researcher", "bibtex": "@inproceedings{\nmediratta2024the,\ntitle={The Generalization Gap in Offline Reinforcement Learning},\nauthor={Ishita Mediratta and Qingfei You and Minqi Jiang and Roberta Raileanu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3w6xuXDOdY}\n}", "github": "", "project": "", "reviewers": "qttr;4tcZ;HieC;f7uZ", "pdf_size": 2279286, "rating": "6;6;6;8", "confidence": "4;4;4;4", "soundness": "3;3;2;3", "contribution": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "62;26;75;69", "wc_strengths": "60;47;44;149", "wc_weaknesses": "186;178;118;64", "wc_questions": "104;65;161;14", "wc_review": "412;316;398;296", "wc_reply_reviewers": "64;135;19;0", "wc_reply_authors": "1577;1690;774;764", "reply_reviewers": "1;1;1;0", "reply_authors": "4;4;3;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.0, 19.03943276465977 ], "wc_strengths_avg": [ 75.0, 43.14510400960925 ], "wc_weaknesses_avg": [ 136.5, 49.42418436352794 ], "wc_questions_avg": [ 86.0, 53.791263231123324 ], "wc_review_avg": [ 355.5, 50.2468904510518 ], "wc_reply_reviewers_avg": [ 54.5, 51.96392979750473 ], "wc_reply_authors_avg": [ 1201.25, 434.10676970072694 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16522965744672101615&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=3w6xuXDOdY", "pdf": "https://openreview.net/pdf?id=3w6xuXDOdY", "email": "meta.com;;google.com;fb.com", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Meta;Google", "aff_unique_dep": "Meta AI;Google", "aff_unique_url": "https://meta.com;https://www.google.com", "aff_unique_abbr": "Meta;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "3wGi5m2YHY", "title": "FlowHash: Accelerating Audio Search with Balanced Hashing via Normalizing Flow", "track": "main", "status": "Reject", "tldr": "", "abstract": "Nearest neighbor search on context representation vectors is a formidable task due to challenges posed by high dimensionality, scalability issues, and potential noise within query vectors. Our novel approach leverages normalizing flow within a self-supervised learning framework to effectively tackle these challenges, specifically in the context of audio fingerprinting tasks. Audio fingerprinting systems incorporate two key components: audio encoding and indexing. The existing systems consider these components independently, resulting in suboptimal performance. Our approach optimizes the interplay between these components, facilitating the adaptation of vectors to the indexing structure. Additionally, we distribute vectors in the latent $\\mathbb{R}^K$ space using normalizing flow, resulting in balanced $K$-bit hash codes. This allows indexing vectors using a balanced hash table, where vectors are uniformly distributed across all possible $2^K$ hash buckets. This significantly accelerates retrieval, achieving speedups of up to 3$\\times$ compared to the Locality-Sensitive Hashing (LSH). We empirically demonstrate that our system is scalable, highly effective, and efficient in identifying short audio queries ($\\leq$2s), particularly at high noise and reverberation levels.", "keywords": "Audio fingerprinting;Indexing;Normalizing Flows;Information Retrieval;Self-supervised learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Anup Singh;Kris Demuynck;Vipul Arora", "authorids": "~Anup_Singh1;~Kris_Demuynck1;~Vipul_Arora1", "gender": "M;M;M", "homepage": ";;https://home.iitk.ac.in/~vipular", "dblp": ";49/603;", "google_scholar": ";;https://scholar.google.co.in/citations?user=SC9YYPAAAAAJ", "orcid": ";0000-0001-8525-7160;0000-0002-1207-1258", "linkedin": "anup-singh-a84a7476/;;", "or_profile": "~Anup_Singh1;~Kris_Demuynck1;~Vipul_Arora1", "aff": "Universiteit Gent;Universiteit Gent;IIT Kanpur", "aff_domain": "ugent.be;ugent.be;iitk.ac.in", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nsingh2024flowhash,\ntitle={FlowHash: Accelerating Audio Search with Balanced Hashing via Normalizing Flow},\nauthor={Anup Singh and Kris Demuynck and Vipul Arora},\nyear={2024},\nurl={https://openreview.net/forum?id=3wGi5m2YHY}\n}", "github": "", "project": "", "reviewers": "q2gt;AxNq;p7bp", "site": "https://openreview.net/forum?id=3wGi5m2YHY", "pdf_size": 2028111, "rating": "3;3;5", "confidence": "4;4;4", "soundness": "2;3;3", "contribution": "1;2;3", "presentation": "2;2;4", "wc_summary": "39;70;103", "wc_strengths": "11;67;67", "wc_weaknesses": "346;289;179", "wc_questions": "1;90;95", "wc_review": "397;516;444", "wc_reply_reviewers": "13;49;19", "wc_reply_authors": "392;414;578", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 70.66666666666667, 26.132142830026183 ], "wc_strengths_avg": [ 48.333333333333336, 26.398653164297777 ], "wc_weaknesses_avg": [ 271.3333333333333, 69.31249686905113 ], "wc_questions_avg": [ 62.0, 43.18178628387977 ], "wc_review_avg": [ 452.3333333333333, 48.93760199364993 ], "wc_reply_reviewers_avg": [ 27.0, 15.748015748023622 ], "wc_reply_authors_avg": [ 461.3333333333333, 82.98326471176114 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1116698979299600068&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Ghent;Indian Institute of Technology Kanpur", "aff_unique_dep": ";", "aff_unique_url": "https://www.ugent.be/en;https://www.iitk.ac.in", "aff_unique_abbr": "UGent;IITK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kanpur", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Belgium;India" }, { "id": "3wL1tj3kqE", "title": "Fair Domain Generalization with Arbitrary Sensitive Attributes", "track": "main", "status": "Reject", "tldr": "", "abstract": "We consider the problem of fairness transfer in domain generalization. Traditional domain generalization methods are designed to generalize a model to unseen domains. Recent work has extended this capability to incorporate fairness as an additional requirement. However, it is only applicable to a single, unchanging sensitive attribute across all domains. As a naive approach to extend it to a multi-attribute context, we can train a model for each subset of the potential set of sensitive attributes. However, this results in $2^n$ models for $n$ attributes. We propose a novel approach that allows any combination of sensitive attributes in the target domain. We learn two representations, a domain invariant representation to generalize the model's performance, and a selective domain invariant representation to transfer the model's fairness to unseen domains. As each domain can have a different set of sensitive attributes, we transfer the fairness by learning a selective domain invariant representation which enforces similar representations among only those domains that have similar sensitive attributes. We demonstrate that our method decreases the current requirement of $2^n$ models to $1$ to accomplish this task. Moreover, our method outperforms the state-of-the-art on unseen target domains across multiple experimental settings.", "keywords": "domain generalization;fairness;multiple sensitive attributes", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Ragja Palakkadavath;Hung Le;Thanh Nguyen-Tang;Svetha Venkatesh;Sunil Gupta", "authorids": "~Ragja_Palakkadavath1;~Hung_Le1;~Thanh_Nguyen-Tang1;~Svetha_Venkatesh1;~Sunil_Gupta2", "gender": "F;F;M;M;M", "homepage": ";https://www.deakin.edu.au/about-deakin/people/svetha-venkatesh;https://thaihungle.github.io/;https://personal-sites.deakin.edu.au/~sunilg/;https://thanhnguyentang.github.io/", "dblp": "283/0847;81/1984;45/466-2;47/333-1;287/5102.html", "google_scholar": "https://scholar.google.co.in/citations?user=cVTpiuoAAAAJ;AEkRUQcAAAAJ;https://scholar.google.com.au/citations?user=q2HbxngAAAAJ;https://scholar.google.com.au/citations?user=bXeL2t8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-3126-184X;0000-0002-3308-1930;0000-0002-1917-2190", "linkedin": ";;;;thanhnguyentang/", "or_profile": "~Ragja_Palakkadavath1;~Svetha_Venkatesh1;~Hung_Thai_Le1;~Sunil_Kumar_Gupta1;~Thanh_Tang_Nguyen2", "aff": "Deakin University;Deakin University;Deakin University;Deakin University;Johns Hopkins University", "aff_domain": "deakin.edu.au;deakin.edu.au;deakin.edu.au;deakin.edu.au;jhu.edu", "position": "PhD student;Full Professor;Lecturer;Full Professor;Postdoc", "bibtex": "@misc{\npalakkadavath2024fair,\ntitle={Fair Domain Generalization with Arbitrary Sensitive Attributes},\nauthor={Ragja Palakkadavath and Hung Le and Thanh Nguyen-Tang and Svetha Venkatesh and Sunil Gupta},\nyear={2024},\nurl={https://openreview.net/forum?id=3wL1tj3kqE}\n}", "github": "", "project": "", "reviewers": "kdx1;cyJi;XuYi;FctA", "site": "https://openreview.net/forum?id=3wL1tj3kqE", "pdf_size": 380256, "rating": "1;3;3;6", "confidence": "5;4;4;4", "soundness": "1;2;2;3", "contribution": "2;2;2;3", "presentation": "2;3;2;2", "wc_summary": "72;104;68;121", "wc_strengths": "38;197;14;79", "wc_weaknesses": "508;258;365;81", "wc_questions": "2;191;4;24", "wc_review": "620;750;451;305", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1445;977;1087;563", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.25, 22.128883839904805 ], "wc_strengths_avg": [ 82.0, 70.3455755538328 ], "wc_weaknesses_avg": [ 303.0, 155.8669304246414 ], "wc_questions_avg": [ 55.25, 78.84597326433354 ], "wc_review_avg": [ 531.5, 168.34265650749367 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1018.0, 314.5615996907442 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7276068751089989, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jEEsnffqJ1EJ:scholar.google.com/&scioq=Fair+Domain+Generalization+with+Arbitrary+Sensitive+Attributes&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Deakin University;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www.deakin.edu.au;https://www.jhu.edu", "aff_unique_abbr": "Deakin;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Australia;United States" }, { "id": "3wde105NL2", "title": "test", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "test", "keywords": "CUHK", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Jiyue Jiang", "authorids": "~Jiyue_Jiang1", "gender": "M", "homepage": "", "dblp": "271/4659", "google_scholar": "https://scholar.google.com.hk/citations?user=O4ZaJ7QAAAAJ", "orcid": "", "linkedin": "jethro-jiang-1215661a1/", "or_profile": "~Jiyue_Jiang1", "aff": "The Chinese University of Hong Kong", "aff_domain": "link.cuhk.edu.hk", "position": "PhD student", "bibtex": "@misc{\njiang2024test,\ntitle={test},\nauthor={Jiyue Jiang},\nyear={2024},\nurl={https://openreview.net/forum?id=3wde105NL2}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=3wde105NL2", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "3wde105NL2", "title": "test", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "test", "keywords": "CUHK", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Jiyue Jiang", "authorids": "~Jiyue_Jiang1", "gender": "M", "homepage": "", "dblp": "271/4659", "google_scholar": "https://scholar.google.com.hk/citations?user=O4ZaJ7QAAAAJ", "orcid": "", "linkedin": "jethro-jiang-1215661a1/", "or_profile": "~Jiyue_Jiang1", "aff": "The Chinese University of Hong Kong", "aff_domain": "link.cuhk.edu.hk", "position": "PhD student", "bibtex": "@misc{\njiang2024test,\ntitle={test},\nauthor={Jiyue Jiang},\nyear={2024},\nurl={https://openreview.net/forum?id=3wde105NL2}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=3wde105NL2", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Sharpness-Aware Minimization Enhances Feature Quality via Balanced Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19489", "id": "3xDaj4pRna", "author_site": "Jacob Springer, Vaishnavh Nagarajan, Aditi Raghunathan", "tldr": "", "abstract": "Sharpness-Aware Minimization (SAM) has emerged as a promising alternative optimizer to stochastic gradient descent (SGD). The originally-proposed motivation behind SAM was to bias neural networks towards flatter minima that are believed to generalize better. However, recent studies have shown conflicting evidence on the relationship between flatness and generalization, suggesting that flatness does fully explain SAM's success. Sidestepping this debate, we identify an orthogonal effect of SAM that is beneficial out-of-distribution: we argue that SAM implicitly balances the quality of diverse features. SAM achieves this effect by adaptively suppressing well-learned features which gives remaining features opportunity to be learned. We show that this mechanism is beneficial in datasets that contain redundant or spurious features where SGD falls for the simplicity bias and would not otherwise learn all available features. Our insights are supported by experiments on real data: we demonstrate that SAM improves the quality of features in datasets containing redundant or spurious features, including CelebA, Waterbirds, CIFAR-MNIST, and DomainBed.", "keywords": "sharpness-aware minimization;representation learning;spurious correlations", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Jacob Mitchell Springer;Vaishnavh Nagarajan;Aditi Raghunathan", "authorids": "~Jacob_Mitchell_Springer1;~Vaishnavh_Nagarajan3;~Aditi_Raghunathan1", "gender": "M;F;M", "homepage": "https://sprin.xyz;https://www.cs.cmu.edu/~aditirag/;https://vaishnavh.github.io/", "dblp": ";166/1409;161/0079", "google_scholar": "niZiN38AAAAJ;Ch9iRwQAAAAJ;https://scholar.google.nl/citations?user=LrsjJfwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jacob_Mitchell_Springer1;~Aditi_Raghunathan1;~Vaishnavh_Nagarajan1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Google", "aff_domain": "cmu.edu;cmu.edu;google.com", "position": "PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nspringer2024sharpnessaware,\ntitle={Sharpness-Aware Minimization Enhances Feature Quality via Balanced Learning},\nauthor={Jacob Mitchell Springer and Vaishnavh Nagarajan and Aditi Raghunathan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3xDaj4pRna}\n}", "github": "", "project": "", "reviewers": "Jhr2;qpzp;BgWc;NKN8;LFgC;KbQq", "pdf_size": 1269488, "rating": "6;6;6;6;6;8", "confidence": "3;4;3;4;4;3", "soundness": "3;2;2;3;2;3", "contribution": "3;2;3;2;3;3", "presentation": "3;1;2;3;3;3", "wc_summary": "84;34;84;34;113;120", "wc_strengths": "59;16;54;42;43;122", "wc_weaknesses": "87;62;170;37;32;118", "wc_questions": "34;2;3;1;238;336", "wc_review": "264;114;311;114;426;696", "wc_reply_reviewers": "11;34;17;0;73;82", "wc_reply_authors": "590;423;527;51;797;535", "reply_reviewers": "1;1;1;0;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.5, 0.7637626158259734 ], "wc_summary_avg": [ 78.16666666666667, 33.99223767600807 ], "wc_strengths_avg": [ 56.0, 32.49102440162001 ], "wc_weaknesses_avg": [ 84.33333333333333, 48.230925993829125 ], "wc_questions_avg": [ 102.33333333333333, 134.0878650570422 ], "wc_review_avg": [ 320.8333333333333, 200.29013677385336 ], "wc_reply_reviewers_avg": [ 36.166666666666664, 31.01299010988066 ], "wc_reply_authors_avg": [ 487.1666666666667, 225.3903404220233 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17722484160754341186&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=3xDaj4pRna", "pdf": "https://openreview.net/pdf?id=3xDaj4pRna", "email": "cmu.edu;cmu.edu;google.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sophia: A Scalable Stochastic Second-order Optimizer for Language Model Pre-training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19488", "id": "3xHDeA8Noi", "author_site": "Hong Liu, Zhiyuan Li, David Hall, Percy Liang, Tengyu Ma", "tldr": "", "abstract": "Given the massive cost of language model pre-training, a non-trivial improvement of the optimization algorithm would lead to a material reduction on the time and cost of training. Adam and its variants have been state-of-the-art for years, and more sophisticated second-order (Hessian-based) optimizers often incur too much per-step overhead. In this paper, we propose Sophia, a simple scalable second-order optimizer that uses a light-weight estimate of the diagonal Hessian as the pre-conditioner. The update is the moving average of the gradients divided by the moving average of the estimated Hessian, followed by element-wise clipping. The clipping controls the worst-case update size and tames the negative impact of non-convexity and rapid change of Hessian along the trajectory. Sophia only estimates the diagonal Hessian every handful of iterations, which has negligible average per-step time and memory overhead. On language modeling with GPT models of sizes ranging from 125M to 1.5B, Sophia achieves a 2x speed-up compared to Adam in the number of steps, total compute, and wall-clock time, achieving the same perplexity with 50\\% fewer steps, less total compute, and reduced wall-clock time.", "keywords": "large language models;pretraining;optimization in deep learning", "primary_area": "optimization", "supplementary_material": "/attachment/95a79cae116529a8b29395efabc5dd175b828ebd.zip", "author": "Hong Liu;Zhiyuan Li;David Leo Wright Hall;Percy Liang;Tengyu Ma", "authorids": "~Hong_Liu5;~Zhiyuan_Li2;~David_Leo_Wright_Hall1;~Percy_Liang1;~Tengyu_Ma1", "gender": "M;M;M;;M", "homepage": ";https://zhiyuanli.ttic.edu;;https://cs.stanford.edu/~pliang/;http://ai.stanford.edu/~tengyuma/", "dblp": ";l/ZhiyuanLi;133/2070;04/1701;54/9061", "google_scholar": "BUc2uq0AAAAJ;https://scholar.google.com/citations?hl=en;6GpZV0YAAAAJ;pouyVyUAAAAJ;i38QlUwAAAAJ", "orcid": ";;;;", "linkedin": ";;dlwhall/;;", "or_profile": "~Hong_Liu5;~Zhiyuan_Li2;~David_Leo_Wright_Hall1;~Percy_Liang1;~Tengyu_Ma1", "aff": "Stanford University;Toyota Technological Institute at Chicago;Stanford University;Stanford University;Facebook AI Research", "aff_domain": "stanford.edu;ttic.edu;stanford.edu;stanford.edu;fb.com", "position": "PhD student;Assistant Professor;Researcher;Associate Professor;Visiting Scientist", "bibtex": "@inproceedings{\nliu2024sophia,\ntitle={Sophia: A Scalable Stochastic Second-order Optimizer for Language Model Pre-training},\nauthor={Hong Liu and Zhiyuan Li and David Leo Wright Hall and Percy Liang and Tengyu Ma},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3xHDeA8Noi}\n}", "github": "", "project": "", "reviewers": "x5VB;Gbmx;knid;R1G3", "pdf_size": 2834253, "rating": "6;8;8;8", "confidence": "4;4;3;3", "soundness": "3;4;3;3", "contribution": "2;4;4;4", "presentation": "3;2;3;4", "wc_summary": "213;66;100;107", "wc_strengths": "96;73;111;155", "wc_weaknesses": "82;98;91;21", "wc_questions": "61;141;110;366", "wc_review": "452;378;412;649", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "314;306;185;425", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 121.5, 55.056788863863105 ], "wc_strengths_avg": [ 108.75, 29.93639089803579 ], "wc_weaknesses_avg": [ 73.0, 30.553232234904378 ], "wc_questions_avg": [ 169.5, 116.97969909347519 ], "wc_review_avg": [ 472.75, 105.07467582629032 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 307.5, 84.9367411665882 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 151, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1035408644622871003&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3xHDeA8Noi", "pdf": "https://openreview.net/pdf?id=3xHDeA8Noi", "email": "stanford.edu;ttic.edu;stanford.edu;stanford.edu;fb.com", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Stanford University;Toyota Technological Institute at Chicago;Meta", "aff_unique_dep": ";;Facebook AI Research", "aff_unique_url": "https://www.stanford.edu;https://www.tti-chicago.org;https://research.facebook.com", "aff_unique_abbr": "Stanford;TTI Chicago;FAIR", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Stanford;Chicago;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3xHbRLymyZ", "title": "DeeDiff: Dynamic Uncertainty-Aware Early Exiting for Accelerating Diffusion Model Generation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion models achieve great success in generating diverse and high-fidelity images. The performance improvements come with low generation speed per image, which hinders the application diffusion models in real-time scenarios. While some certain predictions benefit from the full computation of the model in each sample iteration, not every iteration requires the same amount of computation, potentially leading to computation waste. In this work, we propose DeeDiff, an early exiting framework that adaptively allocates computation resources in each sampling step to improve the generation efficiency of diffusion models. Specifically, we introduce a timestep-aware uncertainty estimation module (UEM) for diffusion models which is attached to each intermediate layer to estimate the prediction uncertainty of each layer. The uncertainty is regarded as the signal to decide if the inference terminates. Moreover, we propose uncertainty-aware layer-wise loss to fill the performance gap between full models and early-exited models. With such loss strategy, our model is able to obtain comparable results as full-layer models. Extensive experiments of class-conditional, unconditional, and text-guided generation on several datasets show that our method achieves state-of-the-art performance and efficiency trade-off compared with existing early exiting methods on diffusion models. More importantly, our method even brings extra benefits to baseline models and obtains better performance on CIFAR-10 and Celeb-A datasets. Full code and model are released for reproduction.", "keywords": "Diffusion;Efficiency;Diffusion acceleration;Early Exiting", "primary_area": "generative models", "supplementary_material": "/attachment/8d54309968936bfe4e8fe8263a3f58de64671d0e.zip", "author": "Shengkun Tang;Yaqing Wang;Caiwen Ding;Yi Liang;Yao Li;Dongkuan Xu", "authorids": "~Shengkun_Tang1;~Yaqing_Wang1;~Caiwen_Ding1;~Yi_Liang1;~Yao_Li1;~Dongkuan_Xu2", "gender": ";M;M;M;F;M", "homepage": ";https://yaqingwang.github.io/;https://caiwending.cse.uconn.edu/;https://research.google/people/108265/;https://liyao880.github.io/yaoli/;https://dongkuanx27.github.io/", "dblp": ";147/1393;175/2489;;;142/8139", "google_scholar": ";_Rfg2CAAAAAJ;7hR0r_EAAAAJ;9vQ7gbgAAAAJ;bQ6YhCwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-0891-1231;0000-0002-6622-8919;0000-0002-7195-5774;0000-0002-1456-9658", "linkedin": ";;caiwen-ding-47144489/;;yao-li-b189574a/;dongkuan-dk-xu-%F0%9F%87%BA%F0%9F%87%A6-05038087/", "or_profile": "~Shengkun_Tang1;~Yaqing_Wang1;~Caiwen_Ding1;~Yi_Liang1;~Yao_Li1;~Dongkuan_Xu2", "aff": ";Google DeepMind;University of Connecticut;Research, Google;University of North Carolina, Chapel Hill;North Carolina State University", "aff_domain": ";google.com;uconn.edu;research.google.com;unc.edu;ncsu.edu", "position": ";Research Scientist;Assistant Professor;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@misc{\ntang2024deediff,\ntitle={DeeDiff: Dynamic Uncertainty-Aware Early Exiting for Accelerating Diffusion Model Generation},\nauthor={Shengkun Tang and Yaqing Wang and Caiwen Ding and Yi Liang and Yao Li and Dongkuan Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=3xHbRLymyZ}\n}", "github": "", "project": "", "reviewers": "xWTy;ttQR;eCAn;PB4D", "site": "https://openreview.net/forum?id=3xHbRLymyZ", "pdf_size": 8973925, "rating": "3;5;5;5", "confidence": "4;4;4;4", "soundness": "3;3;3;2", "contribution": "2;2;2;2", "presentation": "1;2;3;2", "wc_summary": "52;102;32;61", "wc_strengths": "42;61;31;37", "wc_weaknesses": "245;365;94;351", "wc_questions": "5;5;4;63", "wc_review": "344;533;161;512", "wc_reply_reviewers": "0;20;0;0", "wc_reply_authors": "204;515;217;888", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.75, 25.498774480354932 ], "wc_strengths_avg": [ 42.75, 11.233320969330485 ], "wc_weaknesses_avg": [ 263.75, 108.43287093865956 ], "wc_questions_avg": [ 19.25, 25.262373206015305 ], "wc_review_avg": [ 387.5, 149.8874577808297 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 456.0, 278.71580507750184 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11105961738940576174&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Google;University of Connecticut;University of North Carolina;North Carolina State University", "aff_unique_dep": "Google DeepMind;;;", "aff_unique_url": "https://deepmind.com;https://www.uconn.edu;https://www.unc.edu;https://www.ncsu.edu", "aff_unique_abbr": "DeepMind;UConn;UNC;NCSU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Chapel Hill", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Brain decoding: toward real-time reconstruction of visual perception", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19487", "id": "3y1K6buO8c", "author_site": "Yohann Benchetrit, Hubert Banville, Jean-Remi King", "tldr": "", "abstract": "In the past five years, the use of generative and foundational AI systems has greatly improved the decoding of brain activity. Visual perception, in particular, can now be decoded from functional Magnetic Resonance Imaging (fMRI) with remarkable fidelity. This neuroimaging technique, however, suffers from a limited temporal resolution ($\\approx$0.5\\,Hz) and thus fundamentally constrains its real-time usage. Here, we propose an alternative approach based on magnetoencephalography (MEG), a neuroimaging device capable of measuring brain activity with high temporal resolution ($\\approx$5,000 Hz). For this, we develop an MEG decoding model trained with both contrastive and regression objectives and consisting of three modules: i) pretrained embeddings obtained from the image, ii) an MEG module trained end-to-end and iii) a pretrained image generator. Our results are threefold: Firstly, our MEG decoder shows a 7X improvement of image-retrieval over classic linear decoders. Second, late brain responses to images are best decoded with DINOv2, a recent foundational image model. Third, image retrievals and generations both suggest that high-level visual features can be decoded from MEG signals, although the same approach applied to 7T fMRI also recovers better low-level features. Overall, these results, while preliminary, provide an important step towards the decoding - in real-time - of the visual processes continuously unfolding within the human brain.", "keywords": "brain decoding;neuroimaging;image generation;visual perception", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/33660daed024e43122d3ebfc674cd73724ade9bb.pdf", "author": "Yohann Benchetrit;Hubert Banville;Jean-Remi King", "authorids": "~Yohann_Benchetrit1;~Hubert_Banville1;~Jean-Remi_King1", "gender": "M;;M", "homepage": ";;https://kingjr.github.io/", "dblp": "42/9992.html;;", "google_scholar": ";;XZOgIwEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yohann_Benchetrit1;~Hubert_Banville1;~Jean-Remi_King1", "aff": "Meta AI;;CNRS", "aff_domain": "ai.meta.com;;cnrs.fr", "position": "Researcher;;Associate Professor", "bibtex": "@inproceedings{\nbenchetrit2024brain,\ntitle={Brain decoding: toward real-time reconstruction of visual perception},\nauthor={Yohann Benchetrit and Hubert Banville and Jean-Remi King},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3y1K6buO8c}\n}", "github": "", "project": "", "reviewers": "F12m;4PGh;hWdQ;hpxq", "pdf_size": 8172811, "rating": "6;6;8;8", "confidence": "3;3;4;4", "soundness": "3;3;4;3", "contribution": "2;2;4;2", "presentation": "3;3;3;4", "wc_summary": "92;145;122;89", "wc_strengths": "139;98;49;34", "wc_weaknesses": "597;142;42;371", "wc_questions": "7;119;14;51", "wc_review": "835;504;227;545", "wc_reply_reviewers": "192;47;0;21", "wc_reply_authors": "2911;1014;770;1765", "reply_reviewers": "2;2;0;1", "reply_authors": "7;4;2;3", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 112.0, 23.010866998007703 ], "wc_strengths_avg": [ 80.0, 41.47891030391228 ], "wc_weaknesses_avg": [ 288.0, 214.5938023336182 ], "wc_questions_avg": [ 47.75, 44.40368790990226 ], "wc_review_avg": [ 527.75, 215.47317118379263 ], "wc_reply_reviewers_avg": [ 65.0, 75.18975994109836 ], "wc_reply_authors_avg": [ 1615.0, 833.2679641027848 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 4.0, 1.8708286933869707 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11496380628988912301&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=3y1K6buO8c", "pdf": "https://openreview.net/pdf?id=3y1K6buO8c", "email": "ai.meta.com;;cnrs.fr", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Centre National de la Recherche Scientifique", "aff_unique_dep": "Meta AI;", "aff_unique_url": "https://meta.com;https://www.cnrs.fr", "aff_unique_abbr": "Meta;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;France" }, { "title": "T-Rep: Representation Learning for Time Series using Time-Embeddings", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19486", "id": "3y2TfP966N", "author_site": "Archibald Fraikin, Adrien Bennetot, Stephanie Allassonniere", "tldr": "", "abstract": "Multivariate time series present challenges to standard machine learning techniques, as they are often unlabeled, high dimensional, noisy, and contain missing data. To address this, we propose T-Rep, a self-supervised method to learn time series representations at a timestep granularity. T-Rep learns vector embeddings of time alongside its feature extractor, to extract temporal features such as trend, periodicity, or distribution shifts from the signal. These time-embeddings are leveraged in pretext tasks, to incorporate smooth and fine-grained temporal dependencies in the representations, as well as reinforce robustness to missing data. We evaluate T-Rep on downstream classification, forecasting, and anomaly detection tasks. It is compared to existing self-supervised algorithms for time series, which it outperforms in all three tasks. We test T-Rep in missing data regimes, where it proves more resilient than its counterparts. Finally, we provide latent space visualisation experiments, highlighting the interpretability of the learned representations.", "keywords": "Multivariate time series;Self-supervised;Time series representations;Temporal features;Time-Embeddings;Representation Learning;Missing data", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Archibald Felix Fraikin;Adrien Bennetot;Stephanie Allassonniere", "authorids": "~Archibald_Felix_Fraikin1;~Adrien_Bennetot1;~Stephanie_Allassonniere1", "gender": "M;M;F", "homepage": ";;https://sites.google.com/site/stephanieallassonniere/", "dblp": ";;", "google_scholar": ";Wo4dfcgAAAAJ;https://scholar.google.fr/citations?user=9ubMya8AAAAJ", "orcid": ";;", "linkedin": "archibald-fraikin-819607194/;;", "or_profile": "~Archibald_Felix_Fraikin1;~Adrien_Bennetot1;~Stephanie_Allassonniere1", "aff": ";;University Paris Descartes", "aff_domain": ";;parisdescartes.fr", "position": ";;Full Professor", "bibtex": "@inproceedings{\nfraikin2024trep,\ntitle={T-Rep: Representation Learning for Time Series using Time-Embeddings},\nauthor={Archibald Felix Fraikin and Adrien Bennetot and Stephanie Allassonniere},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3y2TfP966N}\n}", "github": "", "project": "", "reviewers": "znEu;99iS;9kuw;rWLt;5x3c", "pdf_size": 2636673, "rating": "5;5;6;6;8", "confidence": "5;4;4;4;3", "soundness": "3;3;2;3;3", "contribution": "2;3;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "46;85;68;103;109", "wc_strengths": "119;126;43;44;231", "wc_weaknesses": "204;190;165;78;301", "wc_questions": "3;8;152;215;51", "wc_review": "372;409;428;440;692", "wc_reply_reviewers": "69;0;140;0;188", "wc_reply_authors": "1669;777;1433;1523;938", "reply_reviewers": "1;0;1;0;1", "reply_authors": "4;2;4;3;3", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 82.2, 23.111901695879553 ], "wc_strengths_avg": [ 112.6, 68.97709764842241 ], "wc_weaknesses_avg": [ 187.6, 71.63407010633976 ], "wc_questions_avg": [ 85.8, 83.89851011787992 ], "wc_review_avg": [ 468.2, 114.24254899117054 ], "wc_reply_reviewers_avg": [ 79.4, 75.07755989641645 ], "wc_reply_authors_avg": [ 1268.0, 347.2843215579995 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 3.2, 0.7483314773547882 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9027301723575336990&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3y2TfP966N", "pdf": "https://openreview.net/pdf?id=3y2TfP966N", "email": ";;parisdescartes.fr", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University Paris Descartes", "aff_unique_dep": "", "aff_unique_url": "https://www.univ-paris5.fr", "aff_unique_abbr": "UPD", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "3yyGlNHnlj", "title": "GraphECL: Towards Efficient Contrastive Learning for Graphs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Due to the inherent label scarcity, learning useful representations on graphs with no supervision is of great benefit. Yet, existing graph self-supervised learning methods overlook the scalability challenge and fail to conduct fast inference of representations in latency-constrained applications due to the intensive message passing of graph neural networks. In this paper, we present GraphECL, a simple and efficient contrastive learning paradigm for graphs. To achieve inference acceleration, GraphECL does not rely on graph augmentations but introduces cross-model contrastive learning, where positive samples are obtained through \\MLP and \\GNN representations from the central node and its neighbors. We provide theoretical analysis on the design of this cross-model framework and discuss why our \\MLP can still capture structure information and enjoys better downstream performance as \\GNN. Extensive experiments on common real-world tasks verify the superior performance of \\simper compared to state-of-the-art methods, highlighting its intriguing properties, including better inference efficiency and generalization to both homophilous and heterophilous graphs. On large-scale datasets such as Snap-patents, the \\MLP learned by GraphECL is 286.82x faster than GCL methods with the same number of \\GNN layers.", "keywords": "Graph Neural Networks", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Teng Xiao;Huaisheng Zhu;Zhiwei Zhang;Zhimeng Guo;Charu C. Aggarwal;Suhang Wang", "authorids": "~Teng_Xiao2;~Huaisheng_Zhu1;~Zhiwei_Zhang10;~Zhimeng_Guo1;~Charu_C._Aggarwal2;~Suhang_Wang1", "gender": "M;M;M;M;M;M", "homepage": ";https://zzwjames.github.io/zhiweizhang.github.io/;;http://www.charuaggarwal.net;https://faculty.ist.psu.edu/szw494/;https://tengxiao1.github.io/", "dblp": "264/2622.html;68/1980-1.html;304/3478;a/CharuCAggarwal;136/9440;", "google_scholar": ";bT8RwQMAAAAJ;Du6bnGQAAAAJ;x_wsduUAAAAJ;cdT_WMMAAAAJ;ld3OKXwAAAAJ", "orcid": ";0009-0007-6153-2739;;0000-0003-2579-7581;0000-0003-3448-4878;", "linkedin": ";;;;;", "or_profile": "~Huaisheng_Zhu1;~Zhiwei_Zhang10;~Zhimeng_Guo1;~Charu_C._Aggarwal2;~Suhang_Wang1;~Teng_Xiao1", "aff": "Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;International Business Machines;Pennsylvania State University;The Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu;ibm.com;psu.edu;psu.edu", "position": "PhD student;PhD student;PhD student;Distinguished Research Staff Member;Assistant Professor;PhD student", "bibtex": "@misc{\nxiao2024graphecl,\ntitle={Graph{ECL}: Towards Efficient Contrastive Learning for Graphs},\nauthor={Teng Xiao and Huaisheng Zhu and Zhiwei Zhang and Zhimeng Guo and Charu C. Aggarwal and Suhang Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=3yyGlNHnlj}\n}", "github": "", "project": "", "reviewers": "ZTuD;GMYZ;eihc;ezu8;xAqE", "site": "https://openreview.net/forum?id=3yyGlNHnlj", "pdf_size": 1482963, "rating": "3;6;6;6;6", "confidence": "4;5;4;5;3", "soundness": "2;2;3;3;2", "contribution": "2;2;3;3;3", "presentation": "2;2;2;3;3", "wc_summary": "43;69;213;103;90", "wc_strengths": "67;33;97;47;37", "wc_weaknesses": "231;280;172;62;141", "wc_questions": "170;5;28;48;13", "wc_review": "511;387;510;260;281", "wc_reply_reviewers": "267;42;0;27;16", "wc_reply_authors": "1497;834;56;563;656", "reply_reviewers": "1;2;0;1;1", "reply_authors": "4;3;1;2;3", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 103.6, 58.34929305484343 ], "wc_strengths_avg": [ 56.2, 23.54909764725604 ], "wc_weaknesses_avg": [ 177.2, 74.92769848327119 ], "wc_questions_avg": [ 52.8, 60.40331116751796 ], "wc_review_avg": [ 389.8, 107.54608314578454 ], "wc_reply_reviewers_avg": [ 70.4, 99.2544205564669 ], "wc_reply_authors_avg": [ 721.2, 466.1241894602768 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.6, 1.019803902718557 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.1336306209562122, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:InVHuKjFXzEJ:scholar.google.com/&scioq=GraphECL:+Towards+Efficient+Contrastive+Learning+for+Graphs&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Pennsylvania State University;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.ibm.com", "aff_unique_abbr": "PSU;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Geometrically Aligned Transfer Encoder for Inductive Transfer in Regression Tasks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19485", "id": "3z60EWfh1p", "author_site": "Sung Moon Ko, Sumin Lee, Dae-Woong Jeong, Woohyung Lim, Sehui Han", "tldr": "", "abstract": "Transfer learning is a crucial technique for handling a small amount of data that is potentially related to other abundant data. However, most of the existing methods are focused on classification tasks using images and language datasets. Therefore, in order to expand the transfer learning scheme to regression tasks, we propose a novel transfer technique based on differential geometry, namely the Geometrically Aligned Transfer Encoder (${\\it GATE}$). In this method, we interpret the latent vectors from the model to exist on a Riemannian curved manifold. We find a proper diffeomorphism between pairs of tasks to ensure that every arbitrary point maps to a locally flat coordinate in the overlapping region, allowing the transfer of knowledge from the source to the target data. This also serves as an effective regularizer for the model to behave in extrapolation regions. In this article, we demonstrate that ${\\it GATE}$ outperforms conventional methods and exhibits stable behavior in both the latent space and extrapolation regions for various molecular graph datasets.", "keywords": "Transfer Learning;Inductive Transfer;Geometrical Deeplearning;Regression", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/6f3c2c8e33c955955cfe00ae0297256536311fc8.zip", "author": "Sung Moon Ko;Sumin Lee;Dae-Woong Jeong;Woohyung Lim;Sehui Han", "authorids": "~Sung_Moon_Ko1;~Sumin_Lee4;~Dae-Woong_Jeong1;~Woohyung_Lim1;~Sehui_Han1", "gender": "M;;M;M;F", "homepage": ";;;;", "dblp": "329/4280;;;86/7195;323/8751", "google_scholar": ";;https://scholar.google.co.kr/citations?hl=en;https://scholar.google.co.kr/citations?user=gtvxdcUAAAAJ;", "orcid": ";0009-0006-9077-2435;;0000-0003-0525-9065;", "linkedin": "sungmoonko/;sumin-lee-781138273/;;woohyunglim/;sehui-han-817a90182/", "or_profile": "~Sung_Moon_Ko1;~Sumin_Lee4;~Dae-Woong_Jeong1;~Woohyung_Lim1;~Sehui_Han1", "aff": "LG AI Research;LG AI Research;LG AI Research;LG AI Research;LG AI Research", "aff_domain": "lgresearch.ai;lgresearch.ai;lgresearch.ai;lgresearch.ai;lgresearch.ai", "position": "Researcher;Researcher;Researcher;Vice President;Researcher", "bibtex": "@inproceedings{\nko2024geometrically,\ntitle={Geometrically Aligned Transfer Encoder for Inductive Transfer in Regression Tasks},\nauthor={Sung Moon Ko and Sumin Lee and Dae-Woong Jeong and Woohyung Lim and Sehui Han},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3z60EWfh1p}\n}", "github": "", "project": "", "reviewers": "NCqT;nPy4;6Cr1;Gg1C", "pdf_size": 846519, "rating": "5;6;6;8", "confidence": "4;3;3;2", "soundness": "2;3;3;2", "contribution": "2;3;3;3", "presentation": "3;3;2;2", "wc_summary": "104;204;33;132", "wc_strengths": "64;93;47;97", "wc_weaknesses": "69;235;114;144", "wc_questions": "114;60;126;100", "wc_review": "351;592;320;473", "wc_reply_reviewers": "0;215;30;252", "wc_reply_authors": "627;757;614;771", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 118.25, 61.26326386995717 ], "wc_strengths_avg": [ 75.25, 20.69269194667528 ], "wc_weaknesses_avg": [ 140.5, 60.73919657025437 ], "wc_questions_avg": [ 100.0, 24.859605789312106 ], "wc_review_avg": [ 434.0, 107.66847263707236 ], "wc_reply_reviewers_avg": [ 124.25, 110.54043377877618 ], "wc_reply_authors_avg": [ 692.25, 72.06724290549764 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9733285267845754, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=844556749006184771&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=3z60EWfh1p", "pdf": "https://openreview.net/pdf?id=3z60EWfh1p", "email": "lgresearch.ai;lgresearch.ai;lgresearch.ai;lgresearch.ai;lgresearch.ai", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "LG", "aff_unique_dep": "LG AI Research", "aff_unique_url": "https://www.lgaires.com", "aff_unique_abbr": "LG AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19484", "id": "3zKtaqxLhW", "author_site": "Rishabh Agarwal, Nino Vieillard, Yongchao Zhou, Piotr Stanczyk, Sabela Ramos Garea, Matthieu Geist, Olivier Bachem", "tldr": "", "abstract": "Knowledge distillation (KD) is widely used for compressing a teacher model to reduce its inference cost and memory footprint, by training a smaller student model. However, current KD methods for auto-regressive sequence models suffer from distribution mismatch between output sequences seen during training and those generated by the student during inference. To address this issue, we introduce Generalized Knowledge Distillation (GKD). Instead of solely relying on a fixed set of output sequences, GKD trains the student on its self-generated output sequences by leveraging feedback from the teacher on such sequences. Unlike supervised KD approaches, GKD also offers the flexibility to employ alternative loss functions between the student and teacher, which can be useful when the student lacks the expressivity to mimic the teacher's distribution. Furthermore, GKD facilitates the seamless integration of distillation with RL fine-tuning (RLHF). We demonstrate the efficacy of GKD for distilling auto-regressive T5 language models on summarization, translation, and arithmetic reasoning tasks.", "keywords": "Language models;Distillation;RLHF", "primary_area": "generative models", "supplementary_material": "", "author": "Rishabh Agarwal;Nino Vieillard;Yongchao Zhou;Piotr Stanczyk;Sabela Ramos Garea;Matthieu Geist;Olivier Bachem", "authorids": "~Rishabh_Agarwal2;~Nino_Vieillard1;~Yongchao_Zhou1;~Piotr_Stanczyk1;~Sabela_Ramos_Garea1;~Matthieu_Geist1;~Olivier_Bachem1", "gender": "M;;M;M;F;M;M", "homepage": "https://agarwl.github.io;;;;https://scholar.google.com/citations?user=97trlAkAAAAJ&hl=en;;http://www.olivierbachem.ch/", "dblp": ";243/5918;;;;38/6508;https://dblp.org/pers/hd/b/Bachem:Olivier", "google_scholar": "https://scholar.google.ca/citations?user=aH8AJu4AAAAJ;https://scholar.google.fr/citations?user=4jua80IAAAAJ;35M6rhsAAAAJ;fKVK0dYAAAAJ;97trlAkAAAAJ;ectPLEUAAAAJ;https://scholar.google.ch/citations?user=mW9BcgsAAAAJ", "orcid": ";;;;;;", "linkedin": ";;yongchao-zhou-a298a7158/;;;;olivier-bachem-10257756/", "or_profile": "~Rishabh_Agarwal2;~Nino_Vieillard1;~Yongchao_Zhou1;~Piotr_Stanczyk1;~Sabela_Ramos_Garea1;~Matthieu_Geist1;~Olivier_Bachem1", "aff": "Google DeepMind;Google Deepmind;University of Toronto;;University of A Coru\u00f1a;Google;Google Brain", "aff_domain": "google.com;google.com;mail.utoronto.ca;;udc.es;google.com;google.com", "position": "Research Scientist;Researcher;PhD student;;PhD student;Researcher;Research scientist", "bibtex": "@inproceedings{\nagarwal2024onpolicy,\ntitle={On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes},\nauthor={Rishabh Agarwal and Nino Vieillard and Yongchao Zhou and Piotr Stanczyk and Sabela Ramos Garea and Matthieu Geist and Olivier Bachem},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3zKtaqxLhW}\n}", "github": "", "project": "", "reviewers": "Jqut;YEFj;Qwi5;HokL", "pdf_size": 671860, "rating": "6;6;6;8", "confidence": "3;3;3;3", "soundness": "3;3;2;3", "contribution": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "107;61;44;80", "wc_strengths": "151;35;57;61", "wc_weaknesses": "116;30;106;119", "wc_questions": "94;26;2;59", "wc_review": "468;152;209;319", "wc_reply_reviewers": "0;0;24;14", "wc_reply_authors": "628;255;502;514", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.0, 23.39871791359518 ], "wc_strengths_avg": [ 76.0, 44.41846462902562 ], "wc_weaknesses_avg": [ 92.75, 36.54705870518173 ], "wc_questions_avg": [ 45.25, 34.665364558879226 ], "wc_review_avg": [ 287.0, 120.51348472266496 ], "wc_reply_reviewers_avg": [ 9.5, 10.136567466356647 ], "wc_reply_authors_avg": [ 474.75, 136.06868669903446 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 109, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17644314409470129095&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=3zKtaqxLhW", "pdf": "https://openreview.net/pdf?id=3zKtaqxLhW", "email": "google.com;google.com;mail.utoronto.ca;;udc.es;google.com;google.com", "author_num": 7, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Google;DeepMind;University of Toronto;University of A Coru\u00f1a", "aff_unique_dep": "Google DeepMind;DeepMind;;", "aff_unique_url": "https://deepmind.com;https://deepmind.com;https://www.utoronto.ca;https://www.udc.es", "aff_unique_abbr": "DeepMind;DeepMind;U of T;UDC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;2;3;3", "aff_country_unique": "United Kingdom;Canada;Spain;United States" }, { "title": "REBAR: Retrieval-Based Reconstruction for Time-series Contrastive Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19483", "id": "3zQo5oUvia", "author_site": "Maxwell Xu, Alexander Moreno, Hui Wei, Benjamin M Marlin, James Rehg", "tldr": "", "abstract": "The success of self-supervised contrastive learning hinges on identifying positive data pairs, such that when they are pushed together in embedding space, the space encodes useful information for subsequent downstream tasks. Constructing positive pairs is non-trivial as the pairing must be similar enough to reflect a shared semantic meaning, but different enough to capture within-class variation. Classical approaches in vision use augmentations to exploit well-established invariances to construct positive pairs, but invariances in the time-series domain are much less obvious. In our work, we propose a novel method of using a learned measure for identifying positive pairs. Our Retrieval-Based Reconstruction (REBAR) measure measures the similarity between two sequences as the reconstruction error that results from reconstructing one sequence with retrieved information from the other. Then, if the two sequences have high REBAR similarity, we label them as a positive pair. Through validation experiments, we show that the REBAR error is a predictor of mutual class membership. Once integrated into a contrastive learning framework, our REBAR method learns an embedding that achieves state-of-the-art performance on downstream tasks across various modalities.", "keywords": "time-series;contrastive learning;masked reconstruction;self-supervised learning;imputation;unsupervised learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/c86726dbbfe0b277c6a2e05cefadb1c1da643426.pdf", "author": "Maxwell Xu;Alexander Moreno;Hui Wei;Benjamin Marlin;James Matthew Rehg", "authorids": "~Maxwell_Xu1;~Alexander_Moreno1;~Hui_Wei3;~Benjamin_Marlin1;~James_Matthew_Rehg1", "gender": ";M;M;M;M", "homepage": "https://maxxu05.github.io/;;https://wll199566.github.io/davidhuiwei.github.io/;https://groups.cs.umass.edu/marlin/;http://rehg.org/", "dblp": ";161/6588;;03/7058.html;r/JMRehg", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;zoqP2-IAAAAJ;;ey960FIAAAAJ;https://scholar.google.com.tw/citations?user=8kA3eDwAAAAJ", "orcid": ";;;0000-0002-2626-3410;0000-0003-1793-5462", "linkedin": ";;;;", "or_profile": "~Maxwell_Xu1;~Alexander_Moreno1;~Hui_Wei3;~Benjamin_Marlin1;~James_Rehg1", "aff": "University of Illinois, Urbana Champaign;STR;University of Massachusetts Amherst;University of Massachusetts at Amherst;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;str.edu;cs.umass.edu;umass.edu;illinois.edu", "position": "PhD student;Researcher;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024rebar,\ntitle={{REBAR}: Retrieval-Based Reconstruction for Time-series Contrastive Learning},\nauthor={Maxwell Xu and Alexander Moreno and Hui Wei and Benjamin Marlin and James Matthew Rehg},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3zQo5oUvia}\n}", "github": "", "project": "", "reviewers": "mFnV;gCTH;jiyG;Hx4L", "pdf_size": 5845969, "rating": "5;5;6;8", "confidence": "3;4;3;4", "soundness": "3;2;2;3", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "92;37;72;34", "wc_strengths": "12;88;93;95", "wc_weaknesses": "131;137;293;113", "wc_questions": "135;80;39;5", "wc_review": "370;342;497;247", "wc_reply_reviewers": "34;0;38;0", "wc_reply_authors": "2303;1495;1870;370", "reply_reviewers": "1;0;2;0", "reply_authors": "4;2;3;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 58.75, 24.324627438051337 ], "wc_strengths_avg": [ 72.0, 34.734708865916815 ], "wc_weaknesses_avg": [ 168.5, 72.42064622743986 ], "wc_questions_avg": [ 64.75, 48.478732450426136 ], "wc_review_avg": [ 364.0, 89.30005599102388 ], "wc_reply_reviewers_avg": [ 18.0, 18.05547008526779 ], "wc_reply_authors_avg": [ 1509.5, 717.3341271680862 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14225911127705256116&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=3zQo5oUvia", "pdf": "https://openreview.net/pdf?id=3zQo5oUvia", "email": "illinois.edu;str.edu;cs.umass.edu;umass.edu;illinois.edu", "author_num": 5, "aff_unique_index": "0;2;2;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;;University of Massachusetts Amherst", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;;https://www.umass.edu", "aff_unique_abbr": "UIUC;;UMass Amherst", "aff_campus_unique_index": "0;2;2;0", "aff_campus_unique": "Urbana-Champaign;;Amherst", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "DORSal: Diffusion for Object-centric Representations of Scenes $\\textit{et al.}$", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19482", "id": "3zvB14IF6D", "author_site": "Allan Jabri, Sjoerd van Steenkiste, Emiel Hoogeboom, Mehdi S. M. Sajjadi, Thomas Kipf", "tldr": "", "abstract": "Recent progress in 3D scene understanding enables scalable learning of representations across large datasets of diverse scenes. As a consequence, generalization to unseen scenes and objects, rendering novel views from just a single or a handful of input images, and controllable scene generation that supports editing, is now possible. However, training jointly on a large number of scenes typically compromises rendering quality when compared to single-scene optimized models such as NeRFs. In this paper, we leverage recent progress in diffusion models to equip 3D scene representation learning models with the ability to render high-fidelity novel views, while retaining benefits such as object-level scene editing to a large degree. In particular, we propose DORSal, which adapts a video diffusion architecture for 3D scene generation conditioned on frozen object-centric slot-based representations of scenes. On both complex synthetic multi-object scenes and on the real-world large-scale Street View dataset, we show that DORSal enables scalable neural rendering of 3D scenes with object-level editing and improves upon existing approaches.", "keywords": "novel view synthesis;object-centric scene representations;camera control;scene editing;3D;diffusion;generative models", "primary_area": "generative models", "supplementary_material": "/attachment/ca67852edc6143b7cc616754dc453757a5449790.zip", "author": "Allan Jabri;Sjoerd van Steenkiste;Emiel Hoogeboom;Mehdi S. M. Sajjadi;Thomas Kipf", "authorids": "~Allan_Jabri2;~Sjoerd_van_Steenkiste1;~Emiel_Hoogeboom1;~Mehdi_S._M._Sajjadi1;~Thomas_Kipf2", "gender": ";M;;Unspecified;M", "homepage": "http://ajabri.github.io;http://www.sjoerdvansteenkiste.com/;;http://msajjadi.com;http://tkipf.github.io/", "dblp": "172/0858;183/9326;217/1488;164/6190;186/8206", "google_scholar": ";i-AStBYAAAAJ;https://scholar.google.nl/citations?user=nkTd_BIAAAAJ;https://scholar.google.de/citations?user=rHF25YEAAAAJ;83HL5FwAAAAJ", "orcid": ";;;0000-0002-6002-2370;", "linkedin": ";;;;thomas-kipf-6b260410a", "or_profile": "~Allan_Jabri2;~Sjoerd_van_Steenkiste1;~Emiel_Hoogeboom1;~Mehdi_S._M._Sajjadi1;~Thomas_N._Kipf1", "aff": ";Google;Google;Google DeepMind;Google", "aff_domain": ";google.com;google.com;google.com;google.com", "position": ";Researcher;Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\njabri2024dorsal,\ntitle={{DORS}al: Diffusion for Object-centric Representations of Scenes \\${\\textbackslash}textit\\{et al.\\}\\$},\nauthor={Allan Jabri and Sjoerd van Steenkiste and Emiel Hoogeboom and Mehdi S. M. Sajjadi and Thomas Kipf},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=3zvB14IF6D}\n}", "github": "", "project": "", "reviewers": "16Uh;J43q;dJMA;jJcr", "pdf_size": 16061122, "rating": "5;6;6;6", "confidence": "5;4;4;3", "soundness": "3;3;4;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "99;56;43;69", "wc_strengths": "73;65;71;127", "wc_weaknesses": "138;50;270;126", "wc_questions": "49;99;376;2", "wc_review": "359;270;760;324", "wc_reply_reviewers": "189;156;242;112", "wc_reply_authors": "814;1386;2000;470", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;4;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.75, 20.765054779605084 ], "wc_strengths_avg": [ 84.0, 25.0 ], "wc_weaknesses_avg": [ 146.0, 79.14543574963751 ], "wc_questions_avg": [ 131.5, 145.26957699394598 ], "wc_review_avg": [ 428.25, 194.14218372110685 ], "wc_reply_reviewers_avg": [ 174.75, 47.47301865270419 ], "wc_reply_authors_avg": [ 1167.5, 581.435078061171 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "openreview": "https://openreview.net/forum?id=3zvB14IF6D", "pdf": "https://openreview.net/pdf?id=3zvB14IF6D", "email": ";google.com;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "41CYtxM2jQ", "title": "Boosting Fast and High-Quality Speech Synthesis with Linear Diffusion", "track": "main", "status": "Reject", "tldr": "", "abstract": "Denoising diffusion probabilistic models have shown extraordinary ability on various generative tasks. However, their slow inference speed renders them impractical in speech synthesis. This paper proposes a linear diffusion model (LinDiff) based on an ordinary differential equation to simultaneously reach fast inference and high sample quality. We employs linear interpolation between the target and noise to design a diffusion sequence for training, while previously the diffusion path that links the noise and target is a curved segment. When we decrease the number of sampling steps (i.e., the number of line segments used to fit the path), the ease of fitting straight lines compared to curves allows us to generate higher quality samples from a random noise with fewer iterations. To reduce computational complexity and achieve effective global modeling of noisy speech, LinDiff employs a patch-based processing approach that partitions the input signal into small patches. The patch-wise token leverages transformer architecture for effective modeling of global information. Additionally, the model seamlessly integrates the strengths of both transformer and convolutional neural networks by utilizing a post-convolution module for fine-grained detail restoration. Adversarial training is further used to improve the sample quality with decreased sampling steps. We test this model on speech synthesis conditioned on acoustic feature (Mel-spectrograms). Experimental results verify that our model can synthesize high-quality speech even with only one diffusion step. Both subjective and objective evaluations demonstrate that our model can synthesize speech of a quality comparable to that of autoregressive models with faster synthesis speed.", "keywords": "Diffusion; Adversarial training; Transformer-based backbone; Speech synthesis; Vocoder", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/e1481d25086b4492c384898eda709029122cd7b4.zip", "author": "Haogeng Liu;Tao Wang;Jie Cao;Ran He;Jianhua Tao", "authorids": "~Haogeng_Liu1;~Tao_Wang7;~Jie_Cao2;~Ran_He1;~Jianhua_Tao1", "gender": "M;M;M;M;", "homepage": "https://github.com/liuhaogeng;https://github.com/hairuo55;https://ttxsjie.github.io/;https://rhe-web.github.io/;", "dblp": ";12/5838-74;39/6191-2;61/6198-1;", "google_scholar": ";bCS6NUoAAAAJ;https://scholar.google.com/citations?hl=en;ayrg9AUAAAAJ;", "orcid": ";0000-0003-1490-6973;0000-0001-6368-4495;0000-0002-3807-991X;", "linkedin": ";;;;", "or_profile": "~Haogeng_Liu1;~Tao_Wang7;~Jie_Cao2;~Ran_He1;~Jianhua_Tao1", "aff": "University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;", "aff_domain": "ucas.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;", "position": "MS student;Researcher;Associate Professor;Full Professor;", "bibtex": "@misc{\nliu2024boosting,\ntitle={Boosting Fast and High-Quality Speech Synthesis with Linear Diffusion},\nauthor={Haogeng Liu and Tao Wang and Jie Cao and Ran He and Jianhua Tao},\nyear={2024},\nurl={https://openreview.net/forum?id=41CYtxM2jQ}\n}", "github": "", "project": "", "reviewers": "QZgE;JAcM;zJkF;2DZz", "site": "https://openreview.net/forum?id=41CYtxM2jQ", "pdf_size": 568620, "rating": "3;3;5;6", "confidence": "4;5;4;4", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "80;52;59;45", "wc_strengths": "90;45;52;50", "wc_weaknesses": "214;188;138;185", "wc_questions": "1;90;80;5", "wc_review": "385;375;329;285", "wc_reply_reviewers": "0;0;107;0", "wc_reply_authors": "449;463;411;292", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.0, 13.095800853708795 ], "wc_strengths_avg": [ 59.25, 17.93564885918544 ], "wc_weaknesses_avg": [ 181.25, 27.39867697535777 ], "wc_questions_avg": [ 44.0, 41.176449579826574 ], "wc_review_avg": [ 343.5, 39.834030677299026 ], "wc_reply_reviewers_avg": [ 26.75, 46.332359102467464 ], "wc_reply_authors_avg": [ 403.75, 67.26579740105666 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7815464375704137849&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ia.cas.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Neural Rate Control for Learned Video Compression", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19481", "id": "42lcaojZug", "author_site": "yiwei zhang, Guo Lu, Yunuo Chen, Shen Wang, Yibo Shi, Jing Wang, Li Song", "tldr": "", "abstract": "The learning-based video compression method has made significant progress in recent years, exhibiting promising compression performance compared with traditional video codecs. However, prior works have primarily focused on advanced compression architectures while neglecting the rate control technique. Rate control can precisely control the coding bitrate with optimal compression performance, which is a critical technique in practical deployment. To address this issue, we present a fully neural network-based rate control system for learned video compression methods. Our system accurately encodes videos at a given bitrate while enhancing the rate-distortion performance. Specifically, we first design a rate allocation model to assign optimal bitrates to each frame based on their varying spatial and temporal characteristics. Then, we propose a deep learning-based rate implementation network to perform the rate-parameter mapping, precisely predicting coding parameters for a given rate. Our proposed rate control system can be easily integrated into existing learning-based video compression methods. The extensive experimental results show that the proposed method achieves accurate rate control on several baseline methods while also improving overall rate-distortion performance.", "keywords": "Video compression;End-to-end;Learning-based video coding;Rate Control.", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yiwei Zhang;Guo Lu;Yunuo Chen;Shen Wang;Yibo Shi;Jing Wang;Li Song", "authorids": "~Yiwei_Zhang3;~Guo_Lu2;~Yunuo_Chen1;~Shen_Wang4;~Yibo_Shi1;~Jing_Wang18;~Li_Song3", "gender": "M;M;M;M;M;F;M", "homepage": ";https://guolusjtu.github.io/guoluhomepage/;;;https://github.com/wu6shen/;http://w3.huawei.com/next/indexa.html;http://medialab.sjtu.edu.cn", "dblp": ";76/7805;;;;;20/872-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;R9iwlJcAAAAJ;;https://scholar.google.com/citations?view_op=list_works;;;jKIoTVoAAAAJ", "orcid": ";;;;;;", "linkedin": ";;https://www.linkedin.cn/incareer/in/unochenyn;;;;", "or_profile": "~Yiwei_Zhang3;~Guo_Lu2;~Yunuo_Chen1;~Shen_Wang4;~Yibo_Shi1;~Jing_Wang18;~Li_Song3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;huawei.com;huawei.com;sjtu.edu.cn", "position": "MS student;Assistant Professor;PhD student;PhD student;Engineer;engineer;Full Professor", "bibtex": "@inproceedings{\nzhang2024neural,\ntitle={Neural Rate Control for Learned Video Compression},\nauthor={Yiwei Zhang and Guo Lu and Yunuo Chen and Shen Wang and Yibo Shi and Jing Wang and Li Song},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=42lcaojZug}\n}", "github": "", "project": "", "reviewers": "c8Ca;cdDb;gAxb;Tpyi", "pdf_size": 1652401, "rating": "5;6;8;8", "confidence": "5;4;5;4", "soundness": "3;3;3;2", "contribution": "2;2;4;3", "presentation": "3;2;4;4", "wc_summary": "102;60;73;122", "wc_strengths": "57;68;93;107", "wc_weaknesses": "224;39;77;113", "wc_questions": "4;53;45;60", "wc_review": "387;220;288;402", "wc_reply_reviewers": "0;0;0;29", "wc_reply_authors": "594;518;420;644", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 89.25, 24.262883175748097 ], "wc_strengths_avg": [ 81.25, 19.778460506318485 ], "wc_weaknesses_avg": [ 113.25, 69.08825877093734 ], "wc_questions_avg": [ 40.5, 21.73131381210073 ], "wc_review_avg": [ 324.25, 74.43915300431621 ], "wc_reply_reviewers_avg": [ 7.25, 12.55736835487436 ], "wc_reply_authors_avg": [ 544.0, 84.4866853415377 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13934198905050538246&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=42lcaojZug", "pdf": "https://openreview.net/pdf?id=42lcaojZug", "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;huawei.com;huawei.com;sjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SJTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "43WKxTuJxu", "title": "Orthogonal Function Representations for Continuous Armed Bandits", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper addresses the continuous-armed bandit problem, which is a generalization of the standard bandit problem where the action space is a d\u2212dimensional\nhypercube $X = [\u22121, 1]^d$ and the reward is an s\u2212times differentiable function\n$f : \\mathcal X \u2192 \\mathbb R$. Traditionally, this problem is solved by assuming an implicit feature\nrepresentation in a Reproducing Kernel Hilbert Space (RKHS), where the objective\nfunction is linear in this transformation of $\\mathcal X$ . In addition to this additional intake,\nthis comes at the cost of overwhelming computational complexity. In contrast, we\npropose an explicit representation using an orthogonal feature map (Fourier, Legendre) to reduce the problem to a linear bandit with misspecification. As a result,\nwe develop two algorithms _OB-LinUCB_ and _OB-PE_, achieving state-of-the-art\nperformance in terms of regret and computational complexity.", "keywords": "Continuous armed bandit;Orthogonal functions;Linear bandits;Smoothness", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/1e1138329da248be4fb0c3134d81d3369f288b3b.pdf", "author": "Davide Maran;Marcello Restelli", "authorids": "~Davide_Maran1;~Marcello_Restelli1", "gender": "M;M", "homepage": "https://davidezfc.github.io/;http://home.deib.polimi.it/restelli/", "dblp": "320/3835;64/1011", "google_scholar": "https://scholar.google.it/citations?user=a8i0X8oAAAAJ;https://scholar.google.com.tw/citations?user=xdgxRiEAAAAJ", "orcid": ";0000-0002-6322-1076", "linkedin": "davide-maran/;", "or_profile": "~Davide_Maran1;~Marcello_Restelli1", "aff": "Polytechnic Institute of Milan;Politecnico di Milano", "aff_domain": "polimi.it;polimi.it", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nmaran2024orthogonal,\ntitle={Orthogonal Function Representations for Continuous Armed Bandits},\nauthor={Davide Maran and Marcello Restelli},\nyear={2024},\nurl={https://openreview.net/forum?id=43WKxTuJxu}\n}", "github": "", "project": "", "reviewers": "DACT;hB5r;hchS;MY3R", "site": "https://openreview.net/forum?id=43WKxTuJxu", "pdf_size": 1833724, "rating": "5;5;5;6", "confidence": "3;3;3;3", "soundness": "2;3;2;4", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "69;110;65;79", "wc_strengths": "70;48;29;55", "wc_weaknesses": "211;226;239;61", "wc_questions": "151;223;84;89", "wc_review": "501;607;417;284", "wc_reply_reviewers": "106;74;66;58", "wc_reply_authors": "487;594;724;518", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 80.75, 17.640507362318125 ], "wc_strengths_avg": [ 50.5, 14.739402972983676 ], "wc_weaknesses_avg": [ 184.25, 71.84488499538433 ], "wc_questions_avg": [ 136.75, 56.35767472137224 ], "wc_review_avg": [ 452.25, 118.18920212946698 ], "wc_reply_reviewers_avg": [ 76.0, 18.2208671582886 ], "wc_reply_authors_avg": [ 580.75, 91.40944973032055 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:txoEUwjSWvIJ:scholar.google.com/&scioq=Orthogonal+Function+Representations+for+Continuous+Armed+Bandits&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Polytechnic Institute of Milan;Politecnico di Milano", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it/;https://www.polimi.it", "aff_unique_abbr": "Politecnico di Milano;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Italy" }, { "title": "Understanding Expressivity of GNN in Rule Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19480", "id": "43cYe4oogi", "author_site": "Haiquan Qiu, Yongqi Zhang, Yong Li, Quanming Yao", "tldr": "", "abstract": "Rule learning is critical to improving knowledge graph (KG) reasoning due to their ability to provide logical and interpretable explanations. Recently, Graph Neural Networks (GNNs) with tail entity scoring achieve the state-of-the-art performance on KG reasoning. However, the theoretical understandings for these GNNs are either lacking or focusing on single-relational graphs, leaving what the kind of rules these GNNs can learn an open problem. We propose to fill the above gap in this paper. Specifically, GNNs with tail entity scoring are unified into a common framework. Then, we analyze their expressivity by formally describing the rule structures they can learn and theoretically demonstrating their superiority. These results further inspire us to propose a novel labeling strategy to learn more rules in KG reasoning. Experimental results are consistent with our theoretical findings and verify the effectiveness of our proposed method. The code is publicly available at https://github.com/LARS-research/Rule-learning-expressivity.", "keywords": "Graph Neural Networks;KG reasoning;Link prediction;Rule learning;Expressivity", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Haiquan Qiu;Yongqi Zhang;Yong Li;quanming yao", "authorids": "~Haiquan_Qiu1;~Yongqi_Zhang2;~Yong_Li7;~quanming_yao1", "gender": "M;M;M;M", "homepage": ";https://yzhangee.github.io/;http://fi.ee.tsinghua.edu.cn/~liyong/;https://lars-group.github.io/", "dblp": "01/1435;;;158/1014", "google_scholar": "JoumqGMAAAAJ;https://scholar.google.com.hk/citations?user=nVk-7EAAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/schhp?hl=en", "orcid": "0000-0001-5202-4950;0000-0003-2085-7418;;", "linkedin": ";;;", "or_profile": "~Haiquan_Qiu1;~Yongqi_Zhang2;~Yong_Li7;~quanming_yao1", "aff": "Tsinghua University;4Paradigm. Inc;Tsinghua University;Department of Electronic Engineering", "aff_domain": "tsinghua.edu.cn;4paradigm.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nqiu2024understanding,\ntitle={Understanding Expressivity of {GNN} in Rule Learning},\nauthor={Haiquan Qiu and Yongqi Zhang and Yong Li and quanming yao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=43cYe4oogi}\n}", "github": "", "project": "", "reviewers": "NwzR;S7Jh;zRzb;TfWh", "pdf_size": 924751, "rating": "5;6;6;8", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "63;117;92;85", "wc_strengths": "115;138;60;56", "wc_weaknesses": "685;104;51;1", "wc_questions": "43;53;11;63", "wc_review": "906;412;214;205", "wc_reply_reviewers": "60;12;0;0", "wc_reply_authors": "1770;768;109;198", "reply_reviewers": "1;1;0;0", "reply_authors": "5;2;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.25, 19.266226926930972 ], "wc_strengths_avg": [ 92.25, 35.23049105533444 ], "wc_weaknesses_avg": [ 210.25, 276.506216024161 ], "wc_questions_avg": [ 42.5, 19.512816301087856 ], "wc_review_avg": [ 434.25, 284.6527489767137 ], "wc_reply_reviewers_avg": [ 18.0, 24.73863375370596 ], "wc_reply_authors_avg": [ 711.25, 661.4950396639418 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9819222418001393982&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=43cYe4oogi", "pdf": "https://openreview.net/pdf?id=43cYe4oogi", "email": "tsinghua.edu.cn;4paradigm.com;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Tsinghua University;4Paradigm;Institution Name Not Provided", "aff_unique_dep": ";;Department of Electronic Engineering", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.4paradigm.com/;", "aff_unique_abbr": "THU;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "id": "43flsheS4s", "title": "Improving Robustness and Accuracy with Retrospective Online Adversarial Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adversarial distillation (AD), transferring knowledge of a robust teacher model to a student model, has emerged as an advanced technique for improving robustness against adversarial attacks. However, AD in general suffers from the high computational complexity of pre-training the robust teacher, and the inherent trade-off between robustness and natural accuracy (i.e., accuracy on clean data). To address these issues, we propose retrospective online adversarial distillation (ROAD). ROAD exploits the student itself of the last epoch and a natural model (i.e., a model trained with clean data) as teachers, instead of a pre-trained robust teacher in the conventional AD. We revealed both theoretically and empirically that knowledge distillation from the student of the last epoch allows to penalize overly confident predictions on adversarial examples, leading to improved robustness and generalization. Also, the student and the natural model are trained together in a collaborative manner, which enables to improve natural accuracy of the student more effectively. We demonstrate by extensive experiments that ROAD achieved outstanding performance in both robustness and natural accuracy with substantially reduced training time and computation cost.", "keywords": "Adversarial Training;Adversarial Distillation;Knowledge Distillation", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/0749a3ea8221caa3779abfa552ee30856ee426b4.zip", "author": "Joongsu Kim;Junhyung Jo;Suha Kwak;Young-Joo Suh", "authorids": "~Joongsu_Kim2;~Junhyung_Jo1;~Suha_Kwak3;~Young-Joo_Suh1", "gender": ";M;M;M", "homepage": ";;https://suhakwak.github.io/;http://monet.postech.ac.kr/yjsuh/", "dblp": ";;65/6173;91/4495", "google_scholar": ";https://scholar.google.co.kr/citations?view_op=list_works;-gscDIEAAAAJ;https://scholar.google.com.tw/citations?user=MW-4uU4AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Joongsu_Kim2;~Junhyung_Jo1;~Suha_Kwak3;~Young-Joo_Suh1", "aff": ";Pohang University of Science and Technology;POSTECH;Pohang University of Science and Technology", "aff_domain": ";postech.edu;postech.ac.kr;postech.edu", "position": ";MS student;Associate Professor;Full Professor", "bibtex": "@misc{\nkim2024improving,\ntitle={Improving Robustness and Accuracy with Retrospective Online Adversarial Distillation},\nauthor={Joongsu Kim and Junhyung Jo and Suha Kwak and Young-Joo Suh},\nyear={2024},\nurl={https://openreview.net/forum?id=43flsheS4s}\n}", "github": "", "project": "", "reviewers": "m9Uu;FC1d;cBNA;zap7", "site": "https://openreview.net/forum?id=43flsheS4s", "pdf_size": 3313956, "rating": "3;5;5;6", "confidence": "5;5;4;4", "soundness": "2;3;3;3", "contribution": "2;2;3;3", "presentation": "3;3;1;3", "wc_summary": "90;50;102;75", "wc_strengths": "39;30;62;56", "wc_weaknesses": "376;104;300;169", "wc_questions": "282;2;86;60", "wc_review": "787;186;550;360", "wc_reply_reviewers": "296;59;0;0", "wc_reply_authors": "1285;684;728;559", "reply_reviewers": "1;1;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 79.25, 19.40843888621648 ], "wc_strengths_avg": [ 46.75, 12.833062767710599 ], "wc_weaknesses_avg": [ 237.25, 106.77400198550207 ], "wc_questions_avg": [ 107.5, 105.23663810669743 ], "wc_review_avg": [ 470.75, 223.4069996665279 ], "wc_reply_reviewers_avg": [ 88.75, 122.05608342069641 ], "wc_reply_authors_avg": [ 814.0, 278.9094835246733 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XhI7sl2au9wJ:scholar.google.com/&scioq=Improving+Robustness+and+Accuracy+with+Retrospective+Online+Adversarial+Distillation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "468KWV14ll", "title": "Exploration and Anti-Exploration with Distributional Random Network Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Exploration remains a critical issue in deep reinforcement learning for an agent to attain high returns in unknown environments. Although the prevailing exploration Random Network Distillation (RND) algorithm has been demonstrated to be effective in numerous environments, it often needs more discriminative power in bonus allocation. This paper highlights the ''bonus inconsistency'' issue within RND, pinpointing its primary limitation. To address this issue, we introduce the Distributional RND (DRND), a derivative of the RND. DRND enhances the exploration process by distilling a distribution of random networks and implicitly incorporates pseudo counts to improve the precision of bonus allocation. This refinement encourages agents to engage in more extensive exploration. Our method effectively mitigates the inconsistency issue without introducing significant computational overhead. Both theoretical analysis and experimental results demonstrate the superiority of our approach over the original RND algorithm. Our method excels in challenging online exploration scenarios and effectively serves as an anti-exploration mechanism in D4RL offline tasks.", "keywords": "Reinforcement learning;exploration;anti-exploration", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/b2ceaa769851ff9b67a51b8489c83e58247d08bc.zip", "author": "Kai Yang;Jian Tao;Jiafei Lyu;Xiu Li", "authorids": "~Kai_Yang6;~Jian_Tao5;~Jiafei_Lyu1;~Xiu_Li1", "gender": "M;M;M;F", "homepage": "https://github.com/yk7333;https://orcid.org/0009-0007-8439-3161;;https://thusigsiclab.github.io/thu.github.io/introduction.html", "dblp": ";;278/1503;13/1206-1", "google_scholar": ";;bfgCMr8AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0009-0007-8439-3161;0000-0001-6616-417X;0000-0003-0403-1923", "linkedin": ";;;", "or_profile": "~Kai_Yang6;~Jian_Tao5;~Jiafei_Lyu1;~Xiu_Li1", "aff": "Tsinghua University;Chaocanshu;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;chaocanshu.ai;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Intern;PhD student;Professor", "bibtex": "@misc{\nyang2024exploration,\ntitle={Exploration and Anti-Exploration with Distributional Random Network Distillation},\nauthor={Kai Yang and Jian Tao and Jiafei Lyu and Xiu Li},\nyear={2024},\nurl={https://openreview.net/forum?id=468KWV14ll}\n}", "github": "", "project": "", "reviewers": "4Wh1;u9ew;gfXh", "site": "https://openreview.net/forum?id=468KWV14ll", "pdf_size": 13044600, "rating": "6;6;6", "confidence": "3;2;3", "soundness": "3;2;3", "contribution": "3;2;3", "presentation": "3;3;2", "wc_summary": "70;43;113", "wc_strengths": "32;37;98", "wc_weaknesses": "55;253;343", "wc_questions": "46;153;82", "wc_review": "203;486;636", "wc_reply_reviewers": "0;48;493", "wc_reply_authors": "326;802;1282", "reply_reviewers": "0;2;3", "reply_authors": "2;4;6", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 75.33333333333333, 28.82514334550461 ], "wc_strengths_avg": [ 55.666666666666664, 30.00370347510824 ], "wc_weaknesses_avg": [ 217.0, 120.29962593458053 ], "wc_questions_avg": [ 93.66666666666667, 44.4547210341289 ], "wc_review_avg": [ 441.6666666666667, 179.52963234209804 ], "wc_reply_reviewers_avg": [ 180.33333333333334, 221.95545098560257 ], "wc_reply_authors_avg": [ 803.3333333333334, 390.286504449687 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.247219128924647 ], "reply_authors_avg": [ 4.0, 1.632993161855452 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13786006434311853287&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tsinghua University;Chaocanshu", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "OPTIMAL ROBUST MEMORIZATION WITH RELU NEURAL NETWORKS", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19479", "id": "47hDbAMLbc", "author_site": "Lijia Yu, XIAOSHAN GAO, Lijun Zhang", "tldr": "", "abstract": "Memorization with neural networks is to study the expressive power of neural networks to interpolate a finite classification data set, which is closely related to the generalizability of deep learning. However, the important problem of robust memorization has not been thoroughly studied. In this paper, several basic problems about robust memorization are solved. First, we prove that it is NP-hard to compute neural networks with certain simple structures, which are robust memorization. A network hypothesis space is called optimal robust memorization for a data set if it can achieve robust memorization for any budget less than half the separation bound of the data set. Second, we explicitly construct neural networks with O(N n) parameters for optimal robust memorization of any data set with dimension n and size N . We also give a lower bound for the width of networks to achieve optimal robust memorization. Finally, we explicitly construct neural networks with\nO(N n log n) parameters for optimal robust memorization of any binary classification data set by controlling the Lipschitz constant of the network.", "keywords": "Memorization;expressive power of network;optimal robust memorization;computation complexity;Lipschitz constant", "primary_area": "learning theory", "supplementary_material": "", "author": "Lijia Yu;Xiao-Shan Gao;Lijun Zhang", "authorids": "~Lijia_Yu2;~Xiao-Shan_Gao2;~Lijun_Zhang2", "gender": "M;M;M", "homepage": ";http://www.mmrc.iss.ac.cn/~xgao/;", "dblp": "175/8873.html;13/3109;76/4015-1", "google_scholar": ";_se7GmUAAAAJ;", "orcid": ";0000-0003-2021-9395;", "linkedin": ";;", "or_profile": "~Lijia_Yu2;~Xiao-Shan_Gao2;~Lijun_Zhang2", "aff": "Institute of Software, Chinese Academy of Sciences;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Chinese Academy of Sciences, Chinese Academy of Sciences", "aff_domain": "ios.ac.cn;amss.ac.cn;ios.ac.cn", "position": "Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyu2024optimal,\ntitle={{OPTIMAL} {ROBUST} {MEMORIZATION} {WITH} {RELU} {NEURAL} {NETWORKS}},\nauthor={Lijia Yu and Xiao-Shan Gao and Lijun Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=47hDbAMLbc}\n}", "github": "", "project": "", "reviewers": "LzNK;Sknc;MXq8", "pdf_size": 556613, "rating": "5;5;8", "confidence": "4;3;3", "soundness": "2;2;4", "contribution": "2;3;3", "presentation": "3;1;4", "wc_summary": "83;113;174", "wc_strengths": "55;47;120", "wc_weaknesses": "237;18;130", "wc_questions": "34;20;129", "wc_review": "409;198;553", "wc_reply_reviewers": "0;27;114", "wc_reply_authors": "636;193;1272", "reply_reviewers": "0;1;2", "reply_authors": "1;2;3", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 123.33333333333333, 37.86232369461172 ], "wc_strengths_avg": [ 74.0, 32.69046751985457 ], "wc_weaknesses_avg": [ 128.33333333333334, 89.4141425552406 ], "wc_questions_avg": [ 61.0, 48.42175819470692 ], "wc_review_avg": [ 386.6666666666667, 145.78599231598196 ], "wc_reply_reviewers_avg": [ 47.0, 48.641546028061235 ], "wc_reply_authors_avg": [ 700.3333333333334, 442.8425855262292 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16558959214918652101&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=47hDbAMLbc", "pdf": "https://openreview.net/pdf?id=47hDbAMLbc", "email": "ios.ac.cn;amss.ac.cn;ios.ac.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Software", "aff_unique_url": "http://www.ios.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Language Model Decoding as Direct Metrics Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19478", "id": "488A64eOf6", "author_site": "Haozhe Ji, Pei Ke, Hongning Wang, Minlie Huang", "tldr": "", "abstract": "Despite the remarkable advances in language modeling, current mainstream decoding methods still struggle to generate texts that align with human texts across different aspects. In particular, sampling-based methods produce less-repetitive texts which are often disjunctive in discourse, while search-based methods maintain topic coherence at the cost of increased repetition. Overall, these methods fall short in achieving holistic alignment across a broad range of aspects. In this work, we frame decoding from a language model as an optimization problem with the goal of strictly matching the expected performance with human texts measured by multiple metrics of desired aspects simultaneously. The resulting decoding distribution enjoys an analytical solution that scales the input language model distribution via a sequence-level energy function defined by these metrics. And most importantly, we prove that this induced distribution is guaranteed to improve the perplexity on human texts, which suggests a better approximation to the underlying distribution of human texts. To facilitate tractable sampling from this globally normalized distribution, we adopt the Sampling-Importance-Resampling technique. Experiments on various domains and model scales demonstrate the superiority of our method in metrics alignment with human texts and human evaluation over strong baselines.", "keywords": "language model;decoding algorithm;energy-based model", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/386a6d3b337b2bbacdf5441ce0b99bc70037d0a8.zip", "author": "Haozhe Ji;Pei Ke;Hongning Wang;Minlie Huang", "authorids": "~Haozhe_Ji2;~Pei_Ke2;~Hongning_Wang1;~Minlie_Huang1", "gender": "M;M;M;M", "homepage": "https://haozheji.github.io/;https://kepei1106.github.io/;http://www.cs.virginia.edu/~hw5x/;http://coai.cs.tsinghua.edu.cn/hml", "dblp": "222/9546;10/2179;05/6545;", "google_scholar": "EE5Z7mUAAAAJ;W_zPCtEAAAAJ;qkdvKNoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0002-6524-9195;", "linkedin": "%E6%98%8A%E5%93%B2-%E8%AE%A1-69722313b/;;;", "or_profile": "~Haozhe_Ji2;~Pei_Ke2;~Hongning_Wang1;~Minlie_Huang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nji2024language,\ntitle={Language Model Decoding as Direct Metrics Optimization},\nauthor={Haozhe Ji and Pei Ke and Hongning Wang and Minlie Huang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=488A64eOf6}\n}", "github": "", "project": "", "reviewers": "iYh2;3A4S;MQ6q;aJdn", "pdf_size": 1549612, "rating": "5;6;6;8", "confidence": "3;4;3;3", "soundness": "3;3;3;4", "contribution": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "59;110;51;93", "wc_strengths": "51;43;81;33", "wc_weaknesses": "91;180;113;24", "wc_questions": "5;168;2;42", "wc_review": "206;501;247;192", "wc_reply_reviewers": "309;14;0;99", "wc_reply_authors": "2232;1968;790;374", "reply_reviewers": "2;1;0;2", "reply_authors": "4;4;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 24.180312239505923 ], "wc_strengths_avg": [ 52.0, 17.916472867168917 ], "wc_weaknesses_avg": [ 102.0, 55.70008976653449 ], "wc_questions_avg": [ 54.25, 67.53656417082527 ], "wc_review_avg": [ 286.5, 125.48007810007132 ], "wc_reply_reviewers_avg": [ 105.5, 123.44735720135931 ], "wc_reply_authors_avg": [ 1341.0, 778.73294523861 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4952793167999616763&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=488A64eOf6", "pdf": "https://openreview.net/pdf?id=488A64eOf6", "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "48Abxtv9Mi", "title": "Missing Data Imputation for Large-Scale Longitudinal Physical Activity Data", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Missing data is ubiquitous in wearable device data, which stems from the combination of user errors and hardware issues, hindering researchers who seek to monitor users' physical activities to understand health related behaviors and perform appropriate interventions. All of Us dataset collects one of the largest longitudinal physical activity data in the world. However, due to the remarkable variability of missingness patterns, only few works leverage it, which loses the extremely valuable potential to deliver vital transformative health impacts. In this work, we consider the problem of imputing missing step counts in the large-scale longitudinal physical activity data. Thus, we explore the All of Us dataset and extract a novel cohort of 100 qualified participants with more than 3 million step count instances from it. To address the issue of missingness, we introduce a sparse self-attention model which captures both absolute and relative time information within the local context window around the missing hourly block. Our results show (1) the curated cohort is subject to the variability of both activity and missingness patterns which is challenging to model, (2) our model outperforms a carefully-crafted set of baseline methods with the statistical significance, solidifying its position as a foundation model which could be used in fine-tuning approaches for the downstream tasks. Hopefully our filling method can benefit the further research by making such a large scale physical activity dataset easier to use.", "keywords": "missing data;time series;imputation;wearable;physical activity;large-scale;novel cohort;self-attention model;sparse", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Hui Wei;Maxwell Xu;Colin Samplawski;James Matthew Rehg;Santosh Kumar;Benjamin Marlin", "authorids": "~Hui_Wei3;~Maxwell_Xu1;~Colin_Samplawski1;~James_Matthew_Rehg1;~Santosh_Kumar1;~Benjamin_Marlin1", "gender": "M;;;M;M;M", "homepage": "https://wll199566.github.io/davidhuiwei.github.io/;https://maxxu05.github.io/;;http://www.memphis.edu/cs/santosh-kumar/;https://groups.cs.umass.edu/marlin/;http://rehg.org/", "dblp": ";;;;03/7058.html;r/JMRehg", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;6ufrjW4AAAAJ;f0i_WNoAAAAJ;ey960FIAAAAJ;https://scholar.google.com.tw/citations?user=8kA3eDwAAAAJ", "orcid": ";;;0000-0002-9273-0291;0000-0002-2626-3410;0000-0003-1793-5462", "linkedin": ";;;santoshkumar4/;;", "or_profile": "~Hui_Wei3;~Maxwell_Xu1;~Colin_Samplawski1;~Santosh_Kumar1;~Benjamin_Marlin1;~James_Rehg1", "aff": "University of Massachusetts Amherst;University of Illinois, Urbana Champaign;Department of Computer Science, University of Massachusetts at Amherst;University of Memphis;University of Massachusetts at Amherst;University of Illinois, Urbana Champaign", "aff_domain": "cs.umass.edu;illinois.edu;cs.umass.edu;memphis.edu;umass.edu;illinois.edu", "position": "Researcher;PhD student;PhD student;Full Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nwei2024missing,\ntitle={Missing Data Imputation for Large-Scale Longitudinal Physical Activity Data},\nauthor={Hui Wei and Maxwell Xu and Colin Samplawski and James Matthew Rehg and Santosh Kumar and Benjamin Marlin},\nyear={2024},\nurl={https://openreview.net/forum?id=48Abxtv9Mi}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=48Abxtv9Mi", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eGSw128SH30J:scholar.google.com/&scioq=Missing+Data+Imputation+for+Large-Scale+Longitudinal+Physical+Activity+Data&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0;1", "aff_unique_norm": "University of Massachusetts Amherst;University of Illinois Urbana-Champaign;University of Memphis", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umass.edu;https://illinois.edu;https://www.memphis.edu", "aff_unique_abbr": "UMass Amherst;UIUC;UM", "aff_campus_unique_index": "0;1;0;0;1", "aff_campus_unique": "Amherst;Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "48CXLrx7K3", "title": "Revealing Unintentional Information Leakage in Low-Dimensional Facial Portrait Representations", "track": "main", "status": "Reject", "tldr": "", "abstract": "We evaluate the information that can unintentionally leak into the low dimensional output of a neural network, by reconstructing an input image from a 40- or 32-element feature vector that intends to only describe abstract attributes of a facial portrait. The reconstruction uses blackbox-access to the image encoder which generates the feature vector. Other than previous work, we leverage recent knowledge about image generation and facial similarity, implementing a method that outperforms the current state-of-the-art. Our strategy uses a pretrained StyleGAN and a new loss function that compares the perceptual similarity of portraits by mapping them into the latent space of a FaceNet embedding. Additionally, we present a new technique that fuses the output of an ensemble, to deliberately generate specific aspects of the recreated image.", "keywords": "feature vector reconstruction;face recognition;privacy", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Kathleen Anderson;Thomas Martinetz", "authorids": "~Kathleen_Anderson1;~Thomas_Martinetz1", "gender": ";M", "homepage": "https://www.inb.uni-luebeck.de/en/staff/staff-list/wissenschaftliche-mitarbeiter/kathleen-anderson.html;https://www.inb.uni-luebeck.de/mitarbeiter/mitarbeiter/professoren/thomas-martinetz", "dblp": ";", "google_scholar": ";https://scholar.google.de/citations?user=0-tDtUIAAAAJ", "orcid": ";", "linkedin": ";thomas-martinetz-10573a6/", "or_profile": "~Kathleen_Anderson1;~Thomas_Martinetz1", "aff": "Universit\u00e4t zu L\u00fcbeck;Universit\u00e4t zu L\u00fcbeck", "aff_domain": "uni-luebeck.de;uni-luebeck.de", "position": "PhD student;Full Professor", "bibtex": "@misc{\nanderson2024revealing,\ntitle={Revealing Unintentional Information Leakage in Low-Dimensional Facial Portrait Representations},\nauthor={Kathleen Anderson and Thomas Martinetz},\nyear={2024},\nurl={https://openreview.net/forum?id=48CXLrx7K3}\n}", "github": "", "project": "", "reviewers": "nZtS;5Uwq;3kp1", "site": "https://openreview.net/forum?id=48CXLrx7K3", "pdf_size": 47938970, "rating": "3;5;8", "confidence": "3;4;4", "soundness": "1;2;2", "contribution": "3;2;2", "presentation": "2;2;2", "wc_summary": "107;94;41", "wc_strengths": "49;243;31", "wc_weaknesses": "312;305;67", "wc_questions": "39;56;195", "wc_review": "507;698;334", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "343;689;65", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 80.66666666666667, 28.546258754675524 ], "wc_strengths_avg": [ 107.66666666666667, 95.97684906036223 ], "wc_weaknesses_avg": [ 228.0, 113.88005385784933 ], "wc_questions_avg": [ 96.66666666666667, 69.87767088912516 ], "wc_review_avg": [ 513.0, 148.66292969892214 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 365.6666666666667, 255.2506393505977 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8029550685469661, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ckxSbqTKJNAJ:scholar.google.com/&scioq=Revealing+Unintentional+Information+Leakage+in+Low-Dimensional+Facial+Portrait+Representations&hl=en&as_sdt=0,5", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of L\u00fcbeck", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-luebeck.de", "aff_unique_abbr": "UzL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "49CGs58v0J", "title": "Elevating Augmentation: Boosting Performance via Sub-Model Training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Image classification has improved with the development of training techniques. However, these techniques often require careful parameter tuning to balance the strength of regularization, limiting their potential benefits. In this paper, we propose a novel way to use regularization called Augmenting Sub-model (AugSub). AugSub consists of two models: the main model and the sub-model. While the main model employs conventional training recipes, the sub-model leverages the benefit of additional regularization. AugSub achieves this by mitigating adverse effects through a relaxed loss function similar to self-distillation loss. We demonstrate the effectiveness of AugSub with three drop techniques: dropout, drop-path, and random masking. Our analysis shows that all AugSub improves performance, with the training loss converging even faster than regular training. Among the three, AugMask is identified as the most practical method due to its performance and cost efficiency. We further validate AugMask across diverse training recipes, including DeiT-III, ResNet, MAE fine-tuning, and Swin Transformer. The results show that AugMask consistently provides significant performance gain. AugSub provides a practical and effective solution for introducing additional regularization under various training recipes. The code will be publicly available.", "keywords": "Training recipe;drop regularization;random masking;self-distillation;supervised learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Byeongho Heo;Taekyung Kim;Sangdoo Yun;Dongyoon Han", "authorids": "~Byeongho_Heo1;~Taekyung_Kim4;~Sangdoo_Yun1;~Dongyoon_Han1", "gender": "M;Not Specified;M;M", "homepage": "https://sites.google.com/view/byeongho-heo/home;;https://sangdooyun.github.io/;https://dongyoonhan.github.io/", "dblp": "142/2705;58/1619-2;124/3009.html;151/8876", "google_scholar": "https://scholar.google.co.kr/citations?user=4_7rLDIAAAAJ;https://scholar.google.co.kr/citations?user=u-9bdkwAAAAJ;o0qtjzYAAAAJ;jcP7m1QAAAAJ", "orcid": ";;;0000-0002-9130-8195", "linkedin": "byeongho-heo-1a7756122/;taekyung-kim-76b074335/;;https://linkedin.com/in/dongyoon-han-04961a120/en", "or_profile": "~Byeongho_Heo1;~Taekyung_Kim4;~Sangdoo_Yun1;~Dongyoon_Han1", "aff": "NAVER AI Lab;NAVER AI Lab;NAVER;NAVER", "aff_domain": "navercorp.com;navercorp.com;navercorp.com;navercorp.com", "position": "Researcher;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@misc{\nheo2024elevating,\ntitle={Elevating Augmentation: Boosting Performance via Sub-Model Training},\nauthor={Byeongho Heo and Taekyung Kim and Sangdoo Yun and Dongyoon Han},\nyear={2024},\nurl={https://openreview.net/forum?id=49CGs58v0J}\n}", "github": "", "project": "", "reviewers": "AdC8;FBYo;ZArF", "site": "https://openreview.net/forum?id=49CGs58v0J", "pdf_size": 564681, "rating": "3;5;6", "confidence": "4;4;4", "soundness": "2;2;3", "contribution": "2;2;2", "presentation": "2;3;3", "wc_summary": "63;54;70", "wc_strengths": "23;45;34", "wc_weaknesses": "214;156;128", "wc_questions": "35;5;2", "wc_review": "335;260;234", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 62.333333333333336, 6.548960901462833 ], "wc_strengths_avg": [ 34.0, 8.981462390204987 ], "wc_weaknesses_avg": [ 166.0, 35.81433604950212 ], "wc_questions_avg": [ 14.0, 14.89966442575134 ], "wc_review_avg": [ 276.3333333333333, 42.82003684673281 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1DQcn9Zlf_gJ:scholar.google.com/&scioq=Elevating+Augmentation:+Boosting+Performance+via+Sub-Model+Training&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "NAVER Corporation", "aff_unique_dep": "NAVER AI Lab", "aff_unique_url": "https://www.naver.com", "aff_unique_abbr": "NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "49N0ivEQHY", "title": "Towards Better Orthogonality Regularization with Disentangled Norm in Training Deep CNNs", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In addressing feature redundancy and training instability in CNNs, orthogonality regularization has emerged as a promising approach. \nSpecifically, a variant termed kernel orthogonality regularization \nseeks to optimize models by minimizing the residual between kernel functions of convolutional filters and the identity matrix.\n\nContrary to methods that measure the kernel residual as a holistic entity, \nour approach introduces a tailored measure that disentangles diagonal and correlation components from the kernel matrix, \nthereby mitigating their mutual interference during training.\nModels equipped with this strict kernel orthogonality measure outperform existing methods in near-orthogonality. \nNotably, we observe test accuracy improvements for shallow architectures.\nHowever, as model depth increases, the efficacy of our strict kernel orthogonality approach diminishes.\n\nGiven the challenges of strict kernel orthogonality in deeper models and the inherent non-compliance of specific convolutional layers with the kernel orthogonality definition, we introduce the concept of a relaxation theory, wherein strict orthogonality is a special case. \nBy adopting this relaxed kernel orthogonality regularization, we observe enhanced model performance in deeper architectures, \nsuggesting it as a robust alternative to the strict counterpart.\n\nTo validate our approach's efficacy in achieving near-orthogonality and enhancing model performance, we conduct rigorous experiments with our kernel orthogonality regularization toolkit on ResNet and WideResNet in CIFAR-10 and CIFAR-100 datasets. \nWe observe state-of-the-art gains in model performance from the toolkit and obtain more robust models with expressive features. \nThese experiments demonstrate the efficacy of our toolkit while highlighting the often overlooked challenges in orthogonality regularization.", "keywords": "Orthogonality Regularization;Disentangled Norm", "primary_area": "metric learning, kernel learning, and sparse coding", "supplementary_material": "", "author": "Changhao Wu;zhang shenan;Fangsong Long;Ziliang Yin;Tuo Leng", "authorids": "~Changhao_Wu1;~zhang_shenan1;~Fangsong_Long1;~Ziliang_Yin1;~Tuo_Leng1", "gender": ";M;M;;M", "homepage": "https://changhaowu.github.io;https://github.com/Andrewmatilde;https://longfangsong.github.io/en/;https://github.com/crazcell;https://euclidesprobationem.github.io/index.html", "dblp": ";;;;", "google_scholar": ";;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Changhao_Wu1;~zhang_shenan1;~Fangsong_Long1;~Ziliang_Yin1;~Tuo_Leng1", "aff": ";;;;Shanghai University", "aff_domain": ";;;;shu.edu.cn", "position": ";;;;Associate Professor", "bibtex": "@misc{\nwu2024towards,\ntitle={Towards Better Orthogonality Regularization with Disentangled Norm in Training Deep {CNN}s},\nauthor={Changhao Wu and zhang shenan and Fangsong Long and Ziliang Yin and Tuo Leng},\nyear={2024},\nurl={https://openreview.net/forum?id=49N0ivEQHY}\n}", "github": "", "project": "", "reviewers": "RR7G;ok4a;M627", "site": "https://openreview.net/forum?id=49N0ivEQHY", "pdf_size": 396195, "rating": "3;3;5", "confidence": "4;4;4", "soundness": "3;1;2", "contribution": "2;1;2", "presentation": "3;1;1", "wc_summary": "61;339;129", "wc_strengths": "31;44;88", "wc_weaknesses": "94;1080;105", "wc_questions": "2;86;1", "wc_review": "188;1549;323", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "446;699;528", "reply_reviewers": "0;0;0", "reply_authors": "2;3;2", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_summary_avg": [ 176.33333333333334, 118.32535184350347 ], "wc_strengths_avg": [ 54.333333333333336, 24.390344173235622 ], "wc_weaknesses_avg": [ 426.3333333333333, 462.233947500854 ], "wc_questions_avg": [ 29.666666666666668, 39.83577398380617 ], "wc_review_avg": [ 686.6666666666666, 612.247408233705 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 557.6666666666666, 105.39555124492789 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9426688353333722453&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Shanghai University", "aff_unique_dep": "", "aff_unique_url": "https://www.shu.edu.cn", "aff_unique_abbr": "SHU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "49Tn5mfTy5", "title": "Uncertainty Quantification Using a Codebook of Encoders", "track": "main", "status": "Reject", "tldr": "", "abstract": "Many machine learning applications are limited not by the accuracy of current models but by the inability of these models to assign confidence to their predictions \u2013 the models don\u2019t know what they don\u2019t know. Among methods that do provide uncertainty estimates, there remains a tradeoff between reliable yet expensive methods (e.g., deep ensembles) and lightweight alternatives that can be miscalibrated. In this paper, we propose a lightweight uncertainty quantification method with performance comparable to deep ensembles across a range of tasks and metrics. The key idea behind our approach is to revise and augment prior information bottleneck methods with a codebook to obtain a compressed representation of all inputs seen during training. Uncertainty over a new example can then be quantified by its distance from this codebook. The resulting method, the Uncertainty Aware Information Bottleneck (UA-IB), requires only a single forward pass to provide uncertainty estimates. Our experiments show that UA-IB can achieve better Out-of-Distribution (OOD) detection and calibration than prior methods, including those based on the standard information bottleneck.", "keywords": "uncertainty quantification;out-of-distribution detection;information theory;information bottleneck;variational information bottleneck;clustering;deterministic uncertainty methods;bregman divergence;rate-distortion theory;compression;quantization", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/26d1a3cbace49625185ee1426b424c0d0cceff72.zip", "author": "Ifigeneia Apostolopoulou;Benjamin Eysenbach;Frank Nielsen;Artur Dubrawski", "authorids": "~Ifigeneia_Apostolopoulou1;~Benjamin_Eysenbach1;~Frank_Nielsen1;~Artur_Dubrawski2", "gender": ";M;M;M", "homepage": ";https://ben-eysenbach.github.io/;https://franknielsen.github.io/;https://www.autonlab.org", "dblp": "145/9415.html;192/1863;http://dblp.uni-trier.de/pers/hd/n/Nielsen:Frank;76/48", "google_scholar": "xiJGHuwAAAAJ;DRnOvU8AAAAJ;c-cuO9cAAAAJ;O3gezzcAAAAJ", "orcid": ";0009-0000-7136-6307;0000-0001-5728-0726;0000-0002-2372-0831", "linkedin": ";benjamin-eysenbach-a7235775/;;artur-dubrawski-33a2a87/", "or_profile": "~Ifigeneia_Apostolopoulou1;~Benjamin_Eysenbach1;~Frank_Nielsen1;~Artur_Dubrawski2", "aff": "Carnegie Mellon University;Princeton University;Sony Computer Science Laboratories Inc (Tokyo);Carnegie Mellon University", "aff_domain": "cmu.edu;princeton.edu;sonycsl.co.jp;cmu.edu", "position": "PhD student;Assistant Professor;Fellow;Research Professor", "bibtex": "@misc{\napostolopoulou2024uncertainty,\ntitle={Uncertainty Quantification Using a Codebook of Encoders},\nauthor={Ifigeneia Apostolopoulou and Benjamin Eysenbach and Frank Nielsen and Artur Dubrawski},\nyear={2024},\nurl={https://openreview.net/forum?id=49Tn5mfTy5}\n}", "github": "", "project": "", "reviewers": "rmmd;fXda;nABa;81b3", "site": "https://openreview.net/forum?id=49Tn5mfTy5", "pdf_size": 769799, "rating": "1;5;6;8", "confidence": "5;3;4;3", "soundness": "2;2;3;3", "contribution": "1;2;2;2", "presentation": "2;2;3;3", "wc_summary": "68;71;62;45", "wc_strengths": "43;39;74;141", "wc_weaknesses": "226;46;501;238", "wc_questions": "114;93;192;116", "wc_review": "451;249;829;540", "wc_reply_reviewers": "886;218;317;95", "wc_reply_authors": "1466;1117;1243;630", "reply_reviewers": "2;1;2;1", "reply_authors": "4;3;4;1", "rating_avg": [ 5.0, 2.5495097567963922 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 61.5, 10.062305898749054 ], "wc_strengths_avg": [ 74.25, 40.84957160118084 ], "wc_weaknesses_avg": [ 252.75, 162.25500762688344 ], "wc_questions_avg": [ 128.75, 37.6123317543595 ], "wc_review_avg": [ 517.25, 208.59814836186825 ], "wc_reply_reviewers_avg": [ 379.0, 303.096519280575 ], "wc_reply_authors_avg": [ 1114.0, 306.1086408450438 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8278373543847156, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FgwwTZyP_8UJ:scholar.google.com/&scioq=Uncertainty+Quantification+Using+a+Codebook+of+Encoders&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Carnegie Mellon University;Princeton University;Sony Computer Science Laboratories Inc", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.princeton.edu;https://www.sony.net/", "aff_unique_abbr": "CMU;Princeton;Sony CSL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Japan" }, { "id": "49ZYkhEGmv", "title": "Scalabale AI Safety via Doubly-Efficient Debate", "track": "main", "status": "Reject", "tldr": "", "abstract": "The emergence of pre-trained AI systems with powerful capabilities across a diverse and ever-increasing set of complex domains has raised a critical challenge for AI safety, as tasks can become too complicated for humans to judge directly. Irving et al. (2018) proposed a debate method in this direction with the goal of pitting the power of such AI models against each other until the problem of identifying (mis)-alignment is broken down into a manageable subtask. While the promise of this approach is clear, the original framework was based on the assumption that the honest strategy is able to simulate deterministic AI systems for an exponential number of steps, limiting its applicability. In this paper, we show how to address these challenges by designing a new set of debate protocols where the honest strategy can always succeed using a simulation of a polynomial number of steps, whilst being able to verify the alignment of stochastic AI systems, even when the dishonest strategy is allowed to use exponentially many simulation steps.", "keywords": "AI Safety;Interactive Proofs;Algorithms and Complexity Theory", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/e78f80ec774ae167a5329191d6cdbd1c4e29aa82.zip", "author": "Jonah Brown-Cohen;Geoffrey Irving;Georgios Piliouras", "authorids": "~Jonah_Brown-Cohen1;~Geoffrey_Irving2;~Georgios_Piliouras1", "gender": "M;M;", "homepage": "https://jonahbc.github.io/;https://naml.us;", "dblp": "157/1513;95/4978;62/1236", "google_scholar": "fRc3A80AAAAJ;TrdtzgwAAAAJ;", "orcid": ";;", "linkedin": ";geoffreyirving;", "or_profile": "~Jonah_Brown-Cohen1;~Geoffrey_Irving2;~Georgios_Piliouras1", "aff": "Google DeepMind;Google DeepMind;Singapore University of Technology and Design", "aff_domain": "deepmind.com;deepmind.com;sutd.edu.sg", "position": "Researcher;Safety Researcher;Associate Professor", "bibtex": "@misc{\nbrown-cohen2024scalabale,\ntitle={Scalabale {AI} Safety via Doubly-Efficient Debate},\nauthor={Jonah Brown-Cohen and Geoffrey Irving and Georgios Piliouras},\nyear={2024},\nurl={https://openreview.net/forum?id=49ZYkhEGmv}\n}", "github": "", "project": "", "reviewers": "a5ph;zvu9;TQe8;5QN9", "site": "https://openreview.net/forum?id=49ZYkhEGmv", "pdf_size": 378790, "rating": "6;6;6;8", "confidence": "2;3;2;2", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "139;154;34;29", "wc_strengths": "122;159;3;60", "wc_weaknesses": "121;130;4;191", "wc_questions": "38;53;90;314", "wc_review": "420;496;131;594", "wc_reply_reviewers": "0;17;0;0", "wc_reply_authors": "226;384;532;574", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.0, 57.77110004145671 ], "wc_strengths_avg": [ 86.0, 59.56089321022645 ], "wc_weaknesses_avg": [ 111.5, 67.65537672646572 ], "wc_questions_avg": [ 123.75, 111.45935357788507 ], "wc_review_avg": [ 410.25, 172.6215151711976 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 429.0, 136.81008734738825 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17236456443179009770&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1", "aff_unique_norm": "Google;Singapore University of Technology and Design", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.sutd.edu.sg", "aff_unique_abbr": "DeepMind;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;Singapore" }, { "title": "LCOT: Linear Circular Optimal Transport", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19477", "id": "49z97Y9lMq", "author_site": "ROCIO DIAZ MARTIN, Ivan Medri, Yikun Bai, Xinran Liu, Kangbai Yan, Gustavo Rohde, Soheil Kolouri", "tldr": "", "abstract": "The optimal transport problem for measures supported on non-Euclidean spaces has recently gained ample interest in diverse applications involving representation learning. In this paper, we focus on circular probability measures, i.e., probability measures supported on the unit circle, and introduce a new computationally efficient metric for these measures, denoted as Linear Circular Optimal Transport (LCOT). The proposed metric comes with an explicit linear embedding that allows one to apply Machine Learning (ML) algorithms to the embedded measures and seamlessly modify the underlying metric for the ML algorithm to LCOT. We show that the proposed metric is rooted in the Circular Optimal Transport (COT) and can be considered the linearization of the COT metric with respect to a fixed reference measure. We provide a theoretical analysis of the proposed metric and derive the computational complexities for pairwise comparison of circular probability measures. Lastly, through a set of numerical experiments, we demonstrate the benefits of LCOT in learning representations from circular measures.", "keywords": "Optimal Transport;Circular Measure;Probability Metrics", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/33dbd9b75005fd214a8b1afb996888a6e7fc7377.zip", "author": "Rocio P Diaz Martin;Ivan Vladimir Medri;Yikun Bai;Xinran Liu;Kangbai Yan;Gustavo Rohde;Soheil Kolouri", "authorids": "~Rocio_P_Diaz_Martin1;~Ivan_Vladimir_Medri1;~Yikun_Bai2;~Xinran_Liu2;~Kangbai_Yan1;~Gustavo_Rohde1;~Soheil_Kolouri1", "gender": "M;M;;M;M;M;F", "homepage": ";;;https://www.linkedin.com/in/kangbai-yan-226829169/;https://www.imagedatascience.com/;https://skolouri.github.io/;", "dblp": ";273/3993.html;;;;143/9637;", "google_scholar": "https://scholar.google.com/citations?hl=en;zLm6JOAAAAAJ;ZHz5VScAAAAJ;;;yREBSy0AAAAJ;7RHakmMAAAAJ", "orcid": ";;;;;0000-0001-8495-5362;0000-0002-3732-6296", "linkedin": ";yikun-bai-b70050138/?trk=public_profile_browsemap;xinran-l-5777a0205/;;;skolouri/;", "or_profile": "~Ivan_Vladimir_Medri1;~Yikun_Bai2;~Xinran_Liu2;~Kangbai_Yan1;~Gustavo_Rohde1;~Soheil_Kolouri1;~ROCIO_DIAZ_MARTIN1", "aff": "Tennessee State University;Vanderbilt University;Vanderbilt University;Vanderbilt University;University of Virginia Main Campus;Vanderbilt University;Tufts University", "aff_domain": "tnstate.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;;vanderbilt.edu;tufts.edu", "position": "Postdoc;Postdoc;PhD student;Undergrad student;;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nmartin2024lcot,\ntitle={{LCOT}: Linear Circular Optimal Transport},\nauthor={Rocio P Diaz Martin and Ivan Vladimir Medri and Yikun Bai and Xinran Liu and Kangbai Yan and Gustavo Rohde and Soheil Kolouri},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=49z97Y9lMq}\n}", "github": "", "project": "", "reviewers": "8oTS;Hihy;5tYi", "pdf_size": 10743012, "rating": "6;6;6", "confidence": "4;4;3", "soundness": "4;3;2", "contribution": "3;2;2", "presentation": "4;3;2", "wc_summary": "80;188;88", "wc_strengths": "73;65;49", "wc_weaknesses": "132;159;71", "wc_questions": "118;69;44", "wc_review": "403;481;252", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "477;411;543", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 118.66666666666667, 49.13473539383541 ], "wc_strengths_avg": [ 62.333333333333336, 9.977753031397176 ], "wc_weaknesses_avg": [ 120.66666666666667, 36.80881536926839 ], "wc_questions_avg": [ 77.0, 30.735430152621365 ], "wc_review_avg": [ 378.6666666666667, 95.05904597786694 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 477.0, 53.88877434122992 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18019738448181464318&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=49z97Y9lMq", "pdf": "https://openreview.net/pdf?id=49z97Y9lMq", "email": "tnstate.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;;vanderbilt.edu;tufts.edu", "author_num": 7, "aff_unique_index": "0;1;1;1;2;1;3", "aff_unique_norm": "Tennessee State University;Vanderbilt University;University of Virginia;Tufts University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tnstate.edu;https://www.vanderbilt.edu;https://www.virginia.edu;https://www.tufts.edu", "aff_unique_abbr": "TSU;Vanderbilt;UVA;Tufts", "aff_campus_unique_index": "1", "aff_campus_unique": ";Main Campus", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4A5D1nsdtj", "title": "An Effective Universal Polynomial Basis for Spectral Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Spectral Graph Neural Networks (GNNs), also referred to as *graph filters* have gained increasing prevalence for heterophily graphs. Optimal graph filters rely on Laplacian eigendecomposition for Fourier transform. In an attempt to avert the prohibitive computations, numerous polynomial filters by leveraging distinct polynomials have been proposed to approximate the desired graph filters. However, polynomials in the majority of polynomial filters are *predefined* and remain *fixed* across all graphs, failing to accommodate the diverse heterophily degrees across different graphs. To tackle this issue, we first investigate the correlation between polynomial bases of desired graph filters and the degrees of graph heterophily via a thorough theoretical analysis. Afterward, we develop an adaptive heterophily basis by incorporating graph heterophily degrees. Subsequently, we integrate this heterophily basis with the homophily basis, creating a universal polynomial basis *UniBasis*. In consequence, we devise a general polynomial filter *UniFilter*. Comprehensive experiments on both real-world and synthetic datasets with varying heterophily degrees significantly support the superiority of UniFilter, demonstrating the effectiveness and generality of UniBasis, as well as its promising capability as a new method for graph analysis.", "keywords": "Graph neural networks;Spectral graph filter", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/a4660f5e25d355e4c9c1eda9f45e004275b2a204.zip", "author": "Keke Huang;Pietro Lio", "authorids": "~Keke_Huang1;~Pietro_Lio1", "gender": "M;M", "homepage": "https://sites.google.com/view/kekehuang/;https://www.cst.cam.ac.uk/people/pl219", "dblp": ";l/PietroLio.html", "google_scholar": "https://scholar.google.com.sg/citations?user=OsceCbcAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ", "orcid": "0000-0003-2190-7114;0000-0002-0540-5053", "linkedin": "keke-huang-4594b9135/?originalSubdomain=sg;", "or_profile": "~Keke_Huang1;~Pietro_Lio1", "aff": "National University of Singapore;University of Cambridge", "aff_domain": "nus.edu.sg;cam.ac.uk", "position": "Postdoc;Full Professor", "bibtex": "@misc{\nhuang2024an,\ntitle={An Effective Universal Polynomial Basis for Spectral Graph Neural Networks},\nauthor={Keke Huang and Pietro Lio},\nyear={2024},\nurl={https://openreview.net/forum?id=4A5D1nsdtj}\n}", "github": "", "project": "", "reviewers": "bWo5;NCEm;SYRx;vpd8", "site": "https://openreview.net/forum?id=4A5D1nsdtj", "pdf_size": 453004, "rating": "3;3;6;6", "confidence": "5;4;4;2", "soundness": "2;1;3;3", "contribution": "2;2;3;4", "presentation": "2;2;3;3", "wc_summary": "57;137;216;108", "wc_strengths": "40;47;9;80", "wc_weaknesses": "227;143;166;233", "wc_questions": "39;3;82;126", "wc_review": "363;330;473;547", "wc_reply_reviewers": "158;64;0;0", "wc_reply_authors": "1019;1153;1289;2040", "reply_reviewers": "2;1;0;0", "reply_authors": "3;3;3;4", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 129.5, 57.56952318718646 ], "wc_strengths_avg": [ 44.0, 25.228951623085727 ], "wc_weaknesses_avg": [ 192.25, 38.674119253061214 ], "wc_questions_avg": [ 62.5, 46.11127844681819 ], "wc_review_avg": [ 428.25, 86.62382755339318 ], "wc_reply_reviewers_avg": [ 55.5, 64.6896436842869 ], "wc_reply_authors_avg": [ 1375.25, 395.4872785564663 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18007601608947293203&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "National University of Singapore;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.cam.ac.uk", "aff_unique_abbr": "NUS;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1", "aff_country_unique": "Singapore;United Kingdom" }, { "title": "Mixed-Type Tabular Data Synthesis with Score-based Diffusion in Latent Space", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19476", "id": "4Ay23yeuz0", "author_site": "Hengrui Zhang, Jiani Zhang, Zhengyuan Shen, Balasubramaniam Srinivasan, Xiao Qin, Christos Faloutsos, Huzefa Rangwala, George Karypis", "tldr": "", "abstract": "Recent advances in tabular data generation have greatly enhanced synthetic data quality. However, extending diffusion models to tabular data is challenging due to the intricately varied distributions and a blend of data types of tabular data. This paper introduces TabSyn, a methodology that synthesizes tabular data by leveraging a diffusion model within a variational autoencoder (VAE) crafted latent space. The key advantages of the proposed Tabsyn include (1) Generality: the ability to handle a broad spectrum of data types by converting them into a single unified space and explicitly capturing inter-column relations; (2) Quality: optimizing the distribution of latent embeddings to enhance the subsequent training of diffusion models, which helps generate high-quality synthetic data; (3) Speed: much fewer number of reverse steps and faster synthesis speed than existing diffusion-based methods. Extensive experiments on six datasets with five metrics demonstrate that Tabsyn outperforms existing methods. Specifically, it reduces the error rates by 86% and 67% for column-wise distribution and pair-wise column correlation estimations compared with the most competitive baselines. The code has been made available at https://github.com/amazon-science/tabsyn.", "keywords": "Tabular data;tabular generation;diffusion models", "primary_area": "generative models", "supplementary_material": "", "author": "Hengrui Zhang;Jiani Zhang;Zhengyuan Shen;Balasubramaniam Srinivasan;Xiao Qin;Christos Faloutsos;Huzefa Rangwala;George Karypis", "authorids": "~Hengrui_Zhang1;~Jiani_Zhang2;~Zhengyuan_Shen1;~Balasubramaniam_Srinivasan1;~Xiao_Qin3;~Christos_Faloutsos1;~Huzefa_Rangwala2;~George_Karypis1", "gender": "M;F;M;;M;M;M;M", "homepage": "https://hengruizhang98.github.io;https://jennyzhang0215.github.io/;;;https://web.cs.wpi.edu/~xqin/;https://www.cs.cmu.edu/~christos/;http://www.cs.gmu.edu/~rangwala;", "dblp": ";186/6870;;230/3792;199/4704-3;f/CFaloutsos;30/444;", "google_scholar": "iwffiD0AAAAJ;CBmDAOEAAAAJ;mX2LPRwAAAAJ;uM4EhgEAAAAJ;https://scholar.google.com/citations?hl=en;nd8lQQIAAAAJ;yWJ9BqEAAAAJ;ElqwScwAAAAJ", "orcid": "0009-0006-1330-0899;0000-0003-0074-6761;;;;0000-0003-2996-9790;;", "linkedin": ";;donshen16/;;;christos-faloutsos-43a7aa2/;;", "or_profile": "~Hengrui_Zhang1;~Jiani_Zhang2;~Zhengyuan_Shen1;~Balasubramaniam_Srinivasan1;~Xiao_Qin3;~Christos_Faloutsos1;~Huzefa_Rangwala2;~George_Karypis1", "aff": "University of Illinois, Chicago;AWS;Amazon;Amazon;Amazon;Carnegie Mellon University;Computer Science, George Mason University;University of Minnesota, Minneapolis", "aff_domain": "uic.edu;amazon.com;amazon.com;amazon.com;amazon.com;cmu.edu;cs.gmu.edu;umn.edu", "position": "PhD student;Researcher;Researcher;Senior Applied Scientist;Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024mixedtype,\ntitle={Mixed-Type Tabular Data Synthesis with Score-based Diffusion in Latent Space},\nauthor={Hengrui Zhang and Jiani Zhang and Zhengyuan Shen and Balasubramaniam Srinivasan and Xiao Qin and Christos Faloutsos and Huzefa Rangwala and George Karypis},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4Ay23yeuz0}\n}", "github": "", "project": "", "reviewers": "q7Xg;aVor;dVGm;Fs1y", "pdf_size": 1950574, "rating": "5;6;8;8", "confidence": "3;4;4;3", "soundness": "3;4;3;3", "contribution": "3;2;3;4", "presentation": "3;4;4;3", "wc_summary": "41;44;182;99", "wc_strengths": "31;25;92;128", "wc_weaknesses": "69;60;165;284", "wc_questions": "41;73;94;241", "wc_review": "182;202;533;752", "wc_reply_reviewers": "0;17;64;59", "wc_reply_authors": "804;833;1488;1619", "reply_reviewers": "0;1;2;1", "reply_authors": "3;3;2;3", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 91.5, 57.124863238348325 ], "wc_strengths_avg": [ 69.0, 42.98255460067491 ], "wc_weaknesses_avg": [ 144.5, 90.44473450676938 ], "wc_questions_avg": [ 112.25, 76.69216061632375 ], "wc_review_avg": [ 417.25, 238.2911821700501 ], "wc_reply_reviewers_avg": [ 35.0, 27.230497608380205 ], "wc_reply_authors_avg": [ 1186.0, 370.54891714859997 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9946646751213130183&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=4Ay23yeuz0", "pdf": "https://openreview.net/pdf?id=4Ay23yeuz0", "email": "uic.edu;amazon.com;amazon.com;amazon.com;amazon.com;cmu.edu;cs.gmu.edu;umn.edu", "author_num": 8, "aff_unique_index": "0;1;1;1;1;2;3;4", "aff_unique_norm": "University of Illinois at Chicago;Amazon;Carnegie Mellon University;George Mason University;University of Minnesota", "aff_unique_dep": ";Amazon Web Services;;Computer Science;", "aff_unique_url": "https://www.uic.edu;https://aws.amazon.com;https://www.cmu.edu;https://www.gmu.edu;https://www.minnesota.edu", "aff_unique_abbr": "UIC;AWS;CMU;GMU;UMN", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Chicago;;Minneapolis", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Chameleon: Increasing Label-Only Membership Leakage with Adaptive Poisoning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19475", "id": "4DoSULcfG6", "author_site": "Harsh Chaudhari, Giorgio Severi, Alina Oprea, Jonathan Ullman", "tldr": "", "abstract": "The integration of Machine Learning (ML) in numerous critical applications introduces a range of privacy concerns for individuals who provide their datasets for ML training purposes. One such privacy risk is Membership Inference (MI), in which an adversary seeks to determine whether a particular data point was included in the training dataset of a model. Current state-of-the-art MI approaches capitalize on access to the model\u2019s predicted confidence scores to successfully perform membership inference, and employ data poisoning to further enhance their effectiveness. \nIn this work, we focus on the less explored and more realistic label-only setting, where the model provides only the predicted label as output. We show that existing label-only attacks are ineffective at inferring membership in the low False Positive Rate (FPR) regime. To address this challenge, we propose a new attack Chameleon that leverages a novel data poisoning strategy and an efficient query selection method to achieve significantly more accurate membership inference than existing label-only attacks, especially for low FPRs.", "keywords": "Privacy Attack;Membership Inference;Data Poisoning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Harsh Chaudhari;Giorgio Severi;Alina Oprea;Jonathan Ullman", "authorids": "~Harsh_Chaudhari1;~Giorgio_Severi1;~Alina_Oprea2;~Jonathan_Ullman1", "gender": "M;;M;F", "homepage": ";http://severi.xyz;https://jonathan-ullman.github.io/;http://www.ccs.neu.edu/home/alina/", "dblp": "240/8222.html;221/4233.html;02/8164;35/3425", "google_scholar": "https://scholar.google.co.in/citations?user=w1lHWJ4AAAAJ;ClHeQx0AAAAJ;https://scholar.google.com.tw/citations?user=WfS41RAAAAAJ;https://scholar.google.com.tw/citations?user=16J3izoAAAAJ", "orcid": ";;;0000-0002-4979-5292", "linkedin": ";;;alina-oprea-9588bb1", "or_profile": "~Harsh_Chaudhari1;~Giorgio_Severi1;~Jonathan_Ullman1;~Alina_Oprea1", "aff": "Northeastern University;Northeastern University;Northeastern University;Northeastern University", "aff_domain": "northeastern.edu;northeastern.edu;northeastern.edu;northeastern.edu", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nchaudhari2024chameleon,\ntitle={Chameleon: Increasing Label-Only Membership Leakage with Adaptive Poisoning},\nauthor={Harsh Chaudhari and Giorgio Severi and Alina Oprea and Jonathan Ullman},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4DoSULcfG6}\n}", "github": "", "project": "", "reviewers": "ECEy;uopX;JKci", "pdf_size": 1013106, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "3;3;2", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "107;107;92", "wc_strengths": "30;68;20", "wc_weaknesses": "381;244;40", "wc_questions": "11;6;263", "wc_review": "529;425;415", "wc_reply_reviewers": "0;0;431", "wc_reply_authors": "646;396;1311", "reply_reviewers": "0;0;2", "reply_authors": "1;1;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 102.0, 7.0710678118654755 ], "wc_strengths_avg": [ 39.333333333333336, 20.677416559027765 ], "wc_weaknesses_avg": [ 221.66666666666666, 140.10551579276083 ], "wc_questions_avg": [ 93.33333333333333, 119.98981438253648 ], "wc_review_avg": [ 456.3333333333333, 51.545018080207214 ], "wc_reply_reviewers_avg": [ 143.66666666666666, 203.17534846093466 ], "wc_reply_authors_avg": [ 784.3333333333334, 386.1418852644136 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4654850171827019168&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=4DoSULcfG6", "pdf": "https://openreview.net/pdf?id=4DoSULcfG6", "email": "northeastern.edu;northeastern.edu;northeastern.edu;northeastern.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "4FUa5dxiiA", "title": "Risk-Sensitive Variational Model-Based Policy Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "RL-as-inference casts reinforcement learning (RL) as Bayesian inference in a probabilistic graphical model. While this framework allows efficient variational approximations it is known that model-based RL-as-inference learns optimistic dynamics and risk-seeking policies that can exhibit catastrophic behavior. By exploiting connections between the variational objective and a well-known risk-sensitive utility function we adaptively adjust policy risk based on the environment dynamics. Our method, $\\beta$-VMBPO, extends the variational model-based policy optimization (VMBPO) algorithm to perform dual descent on risk parameter $\\beta$. We provide a thorough theoretical analysis that fills gaps in the theory of model-based RL-as-inference by establishing a generalization of policy improvement, value iteration, and guarantees on policy determinism. Our experiments demonstrate that this risk-sensitive approach yields improvements in simple tabular and complex continuous tasks, such as the DeepMind Control Suite.", "keywords": "Reinforcement Learning;Variational Inference;Risk Sensitive RL;Probabilistic Inference", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/24e46d88030e1c2a3bfbbfa03d080eaefe3b39a0.zip", "author": "Alonso Granados;Jason Pacheco;Mohammadreza Ebrahimi", "authorids": "~Alonso_Granados1;~Jason_Pacheco1;~Mohammadreza_Ebrahimi2", "gender": "M;M;M", "homepage": ";http://www.pachecoj.com;https://star-ailab.github.io/", "dblp": "230/2003;126/1745;26/7531", "google_scholar": ";71ZEsnEAAAAJ;4DmURbEAAAAJ", "orcid": ";;0000-0003-1367-3338", "linkedin": ";;", "or_profile": "~Alonso_Granados1;~Jason_Pacheco1;~Mohammadreza_Ebrahimi2", "aff": "University of Arizona;University of Arizona;University of South Florida", "aff_domain": "arizona.edu;arizona.edu;usf.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\ngranados2024risksensitive,\ntitle={Risk-Sensitive Variational Model-Based Policy Optimization},\nauthor={Alonso Granados and Jason Pacheco and Mohammadreza Ebrahimi},\nyear={2024},\nurl={https://openreview.net/forum?id=4FUa5dxiiA}\n}", "github": "", "project": "", "reviewers": "wKFu;dkQy;9Tpq", "site": "https://openreview.net/forum?id=4FUa5dxiiA", "pdf_size": 1192745, "rating": "3;6;6", "confidence": "4;3;3", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "3;4;3", "wc_summary": "197;200;97", "wc_strengths": "56;83;28", "wc_weaknesses": "531;607;89", "wc_questions": "88;35;194", "wc_review": "872;925;408", "wc_reply_reviewers": "970;152;114", "wc_reply_authors": "708;390;404", "reply_reviewers": "2;1;1", "reply_authors": "2;2;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 164.66666666666666, 47.86323107447813 ], "wc_strengths_avg": [ 55.666666666666664, 22.45489305746572 ], "wc_weaknesses_avg": [ 409.0, 228.39147678200837 ], "wc_questions_avg": [ 105.66666666666667, 66.1026138330063 ], "wc_review_avg": [ 735.0, 232.23407731568307 ], "wc_reply_reviewers_avg": [ 412.0, 394.87044288812837 ], "wc_reply_authors_avg": [ 500.6666666666667, 146.71817277427573 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xRayuBBRm3sJ:scholar.google.com/&scioq=Risk-Sensitive+Variational+Model-Based+Policy+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Arizona;University of South Florida", "aff_unique_dep": ";", "aff_unique_url": "https://www.arizona.edu;https://www.usf.edu", "aff_unique_abbr": "UA;USF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4GfEOQlBoc", "title": "Disentangling the Link Between Image Statistics and Human Perception", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the 1950s, Barlow and Attneave hypothesised a link between biological vision and information maximisation. Following Shannon, information was defined using the probability of natural images. A number of physiological and psychophysical phenomena have been derived ever since from principles like info-max, efficient coding, or optimal denoising. However, it remains unclear how this link is expressed in mathematical terms from image probability. First, classical derivations were subjected to strong assumptions on the probability models and on the behaviour of the sensors. Moreover, the direct evaluation of the hypothesis was limited by the inability of the classical image models to deliver accurate estimates of the probability. In this work we directly evaluate image probabilities using an advanced generative model for natural images, and we analyse how probability-related factors can be combined to predict human perception via sensitivity of state-of-the-art subjective image quality metrics. We use information theory and regression analysis to find a combination of just two probability-related factors that achieves 0.8 correlation with subjective metrics. This probability-based sensitivity is psychophysically validated by reproducing the basic trends of the Contrast Sensitivity Function, its suprathreshold variation, and trends of the Weber-law and masking.", "keywords": "perception;vision science;probability", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/d6bfbb96859a9b4cb9c3a1b8396b2c59a62c79bf.pdf", "author": "Alexander Hepburn;Valero Laparra;Raul Santos-Rodriguez;Jesus Malo", "authorids": "~Alexander_Hepburn2;~Valero_Laparra1;~Raul_Santos-Rodriguez1;~Jesus_Malo1", "gender": "M;;;M", "homepage": ";https://www.uv.es/lapeva/;https://www.bristol.ac.uk/people/person/Raul-Santos-Rodriguez-1d708791-ea39-4078-89e6-c5c81b8c1a22/;http://isp.uv.es/excathedra.html", "dblp": ";;24/7253;31/4807", "google_scholar": "CzZTCN4AAAAJ;dNt_xikAAAAJ;U_ldrLcAAAAJ;https://scholar.google.es/citations?user=0pgrklEAAAAJ", "orcid": ";;0000-0001-9576-3905;0000-0002-5684-8591", "linkedin": ";;;", "or_profile": "~Alexander_Hepburn2;~Valero_Laparra1;~Raul_Santos-Rodriguez1;~Jesus_Malo1", "aff": "University of Bristol;Universitat de Val\u00e8ncia;University of Bristol;Universitat de Valencia", "aff_domain": "bristol.ac.uk;uv.es;bristol.ac.uk;uv.es", "position": "Postdoc;Postdoc;Full Professor;Full Professor", "bibtex": "@misc{\nhepburn2024disentangling,\ntitle={Disentangling the Link Between Image Statistics and Human Perception},\nauthor={Alexander Hepburn and Valero Laparra and Raul Santos-Rodriguez and Jesus Malo},\nyear={2024},\nurl={https://openreview.net/forum?id=4GfEOQlBoc}\n}", "github": "", "project": "", "reviewers": "Cqjc;6JJg;RwsQ;Y3LV", "site": "https://openreview.net/forum?id=4GfEOQlBoc", "pdf_size": 599460, "rating": "5;5;5;6", "confidence": "5;4;4;3", "soundness": "2;2;2;3", "contribution": "2;3;2;3", "presentation": "3;2;2;3", "wc_summary": "20;55;72;87", "wc_strengths": "23;53;33;16", "wc_weaknesses": "194;280;471;1", "wc_questions": "11;217;2;146", "wc_review": "248;605;578;250", "wc_reply_reviewers": "0;82;0;29", "wc_reply_authors": "990;687;1253;135", "reply_reviewers": "0;1;0;2", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 58.5, 24.944939366532843 ], "wc_strengths_avg": [ 31.25, 13.935117509371782 ], "wc_weaknesses_avg": [ 236.5, 168.92971911419258 ], "wc_questions_avg": [ 94.0, 91.08512502049936 ], "wc_review_avg": [ 420.25, 171.51730962209032 ], "wc_reply_reviewers_avg": [ 27.75, 33.48413803579241 ], "wc_reply_authors_avg": [ 766.25, 415.85657082701005 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17297146322820130188&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Bristol;Universitat de Val\u00e8ncia;University of Valencia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bristol.ac.uk;https://www.uv.es;https://www.uv.es", "aff_unique_abbr": "Bristol;UV;UV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United Kingdom;Spain" }, { "id": "4Hf5pbk74h", "title": "Improving classifier decision boundaries using nearest neighbors", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we show that neural networks are not learning optimal decision boundaries. Decision boundaries go through areas of low training data density. They are impacted by few training samples which can easily lead to overfitting. We show that performing a weighted average of the prediction of a sample and its nearest neighbors' (computed in latent space) leads to a variety of minor favorable outcomes. In our evaluation, we employ various self-trained and pre-trained convolutional neural networks to show that our approach improves (i) resistance to label noise, (ii) robustness against adversarial attacks, (iii) classification accuracy, and to some degree even (iv) interpretability. While improvements are not necessarily large in all four areas, our approach is conceptually simple, i.e., improvements come without any modification to network architecture, training procedure or dataset. Furthermore, they are in stark contrast to prior works that often require trade-offs among the four objectives or provides only non-actionable insights.", "keywords": "decision boundary;computer vision;CNN;kNN", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/cb301cd78d038d8bfd16492018cc1be5ece42e0c.zip", "author": "Johannes Schneider", "authorids": "~Johannes_Schneider2", "gender": "", "homepage": "", "dblp": "31/4013-2", "google_scholar": "hgXFYMUAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Johannes_Schneider2", "aff": "Universit\u00e4t Liechtenstein", "aff_domain": "uni.li", "position": "Associate Professor", "bibtex": "@misc{\nschneider2024improving,\ntitle={Improving classifier decision boundaries using nearest neighbors},\nauthor={Johannes Schneider},\nyear={2024},\nurl={https://openreview.net/forum?id=4Hf5pbk74h}\n}", "github": "", "project": "", "reviewers": "RTTt;H15T;N6Bu", "site": "https://openreview.net/forum?id=4Hf5pbk74h", "pdf_size": 722593, "rating": "1;3;3", "confidence": "4;4;4", "soundness": "2;2;2", "contribution": "1;2;2", "presentation": "1;2;2", "wc_summary": "33;52;136", "wc_strengths": "37;33;109", "wc_weaknesses": "287;1270;111", "wc_questions": "31;7;671", "wc_review": "388;1362;1027", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "313;22;22", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 73.66666666666667, 44.75364665464579 ], "wc_strengths_avg": [ 59.666666666666664, 34.92213560989012 ], "wc_weaknesses_avg": [ 556.0, 509.9614364505091 ], "wc_questions_avg": [ 236.33333333333334, 307.51187872268537 ], "wc_review_avg": [ 925.6666666666666, 404.03822701433694 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 119.0, 137.17871555019022 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zLNUHp3DI7oJ:scholar.google.com/&scioq=Improving+classifier+decision+boundaries+using+nearest+neighbors&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Universit\u00e4t Liechtenstein", "aff_unique_dep": "", "aff_unique_url": "https://www.unili.ch", "aff_unique_abbr": "Uni Li", "aff_country_unique_index": "0", "aff_country_unique": "Liechtenstein" }, { "id": "4Hv5DLTJLF", "title": "Consensus Optimization at Representation: Improving Personalized Federated Learning via Data-Centric Regularization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated learning is a large scale machine learning training paradigm where data is distributed across clients, and can be highly heterogeneous from one client to another. To ensure personalization in client models, and at the same time to ensure that the local models have enough commonality (i.e., prevent ``client-drift''), it has been recently proposed to cast the federated learning problem as a consensus optimization problem, where local models are trained on local data, but are forced to be similar via a regularization term. In this paper we propose an improved federated learning algorithm, where we ensure consensus optimization at the representation part of each local client, and not on whole local models. This algorithm naturally takes into account that today's deep networks are often partitioned into a feature extraction part (representation) and a prediction part. Our algorithm ensures greater flexibility compared to previous works on exact shared representation in highly heterogeneous settings, as it has been seen that the representation part can differ substantially with data distribution. Our method is quite stable to noise, and can be made differentially private with strong privacy guarantee without much loss of accuracy. We provide a complete convergence analysis of our algorithm under general nonconvex loss functions, and validate its good performance experimentally in standard datasets.", "keywords": "Personalized federated learning;Consensus optimization;Representation learning;Variance reduction", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/d4f6b12bb772b062a6a40c7698cec4367fef0c88.pdf", "author": "Heng Zhu;Arya Mazumdar", "authorids": "~Heng_Zhu1;~Arya_Mazumdar1", "gender": "M;M", "homepage": ";http://www.cs.umass.edu/~arya", "dblp": "01/7106;77/6050", "google_scholar": ";https://scholar.google.com.tw/citations?user=9tjQU1EAAAAJ", "orcid": "0000-0002-0230-672X;", "linkedin": ";", "or_profile": "~Heng_Zhu1;~Arya_Mazumdar1", "aff": "University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nzhu2024consensus,\ntitle={Consensus Optimization at Representation: Improving Personalized Federated Learning via Data-Centric Regularization},\nauthor={Heng Zhu and Arya Mazumdar},\nyear={2024},\nurl={https://openreview.net/forum?id=4Hv5DLTJLF}\n}", "github": "", "project": "", "reviewers": "QkUY;EJeM;qMXj;R9Ht", "site": "https://openreview.net/forum?id=4Hv5DLTJLF", "pdf_size": 470136, "rating": "3;3;5;5", "confidence": "4;4;3;3", "soundness": "2;2;4;3", "contribution": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "45;84;94;35", "wc_strengths": "22;44;50;25", "wc_weaknesses": "348;634;142;76", "wc_questions": "553;100;389;63", "wc_review": "968;862;675;199", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.5, 25.004999500099974 ], "wc_strengths_avg": [ 35.25, 11.986972094736853 ], "wc_weaknesses_avg": [ 300.0, 217.3706511928416 ], "wc_questions_avg": [ 276.25, 203.61897627677044 ], "wc_review_avg": [ 676.0, 294.6989989803155 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Z2icTKDWILkJ:scholar.google.com/&scioq=Consensus+Optimization+at+Representation:+Improving+Personalized+Federated+Learning+via+Data-Centric+Regularization&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "One For All: Towards Training One Graph Model For All Classification Tasks", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19474", "id": "4IT2pgc9v6", "author_site": "Hao Liu, Jiarui Feng, Lecheng Kong, Ningyue Liang, Dacheng Tao, Yixin Chen, Muhan Zhang", "tldr": "", "abstract": "Designing a single model to address multiple tasks has been a long-standing objective in artificial intelligence. Recently, large language models have demonstrated exceptional capability in solving different tasks within the language domain. However, a unified model for various graph tasks remains underexplored, primarily due to the challenges unique to the graph learning domain. First, graph data from different areas carry distinct attributes and follow different distributions. Such discrepancy makes it hard to represent graphs in a single representation space. Second, tasks on graphs diversify into node, link, and graph tasks, requiring distinct embedding strategies. Finally, an appropriate graph prompting paradigm for in-context learning is unclear. We propose **One for All (OFA)**, the first general framework that can use a single graph model to address the above challenges. Specifically, OFA proposes text-attributed graphs to unify different graph data by describing nodes and edges with natural language and uses language models to encode the diverse and possibly cross-domain text attributes to feature vectors in the same embedding space. Furthermore, OFA introduces the concept of nodes-of-interest to standardize different tasks with a single task representation. For in-context learning on graphs, OFA introduces a novel graph prompting paradigm that appends prompting substructures to the input graph, which enables it to address varied tasks without fine-tuning. We train the OFA model using graph data from multiple domains (including citation networks, molecular graphs, knowledge graphs, etc.) simultaneously and evaluate its ability in supervised, few-shot, and zero-shot learning scenarios. OFA performs well across different tasks, making it the first general-purpose across-domains classification model on graphs.", "keywords": "Graph Neural Network;Large Language Model;In-context Learning", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/30bf1403d2e456553faf2a34a7acf3f0b2f125c4.zip", "author": "Hao Liu;Jiarui Feng;Lecheng Kong;Ningyue Liang;Dacheng Tao;Yixin Chen;Muhan Zhang", "authorids": "~Hao_Liu25;~Jiarui_Feng1;~Lecheng_Kong1;~Ningyue_Liang1;~Dacheng_Tao1;~Yixin_Chen1;~Muhan_Zhang1", "gender": "F;M;M;M;;M;M", "homepage": "https://haoliu-cola.github.io/;https://jiaruifeng.github.io/;https://LechengKong.github.io/;https://github.com/NingyueLiang;;https://www.cse.wustl.edu/~yixin.chen/;https://muhanzhang.github.io/", "dblp": "09/3214-57;77/8797;319/5576;;;59/983;157/5518", "google_scholar": ";6CSGUR8AAAAJ;yk3-_EgAAAAJ;;;NByrsK0AAAAJ;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ", "orcid": ";0000-0002-3409-6819;0000-0001-9427-8799;;;;0000-0002-7680-6401", "linkedin": ";;;ningyue-liang-frank;;;jerry-muhan-zhang-a33a1777/", "or_profile": "~Hao_Liu25;~Jiarui_Feng1;~Lecheng_Kong1;~Ningyue_Liang1;~Dacheng_Tao1;~Yixin_Chen1;~Muhan_Zhang1", "aff": "Washington University in St. Louis;Washington University, Saint Louis;Amazon;Washington University, Saint Louis;;Washington University, Saint Louis;Peking University", "aff_domain": "wustl.edu;wustl.edu;amazon.com;wustl.edu;;wustl.edu;pku.edu.cn", "position": "PhD student;PhD student;Researcher;Undergrad student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2024one,\ntitle={One For All: Towards Training One Graph Model For All Classification Tasks},\nauthor={Hao Liu and Jiarui Feng and Lecheng Kong and Ningyue Liang and Dacheng Tao and Yixin Chen and Muhan Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4IT2pgc9v6}\n}", "github": "", "project": "", "reviewers": "A8Qx;RxxK;LMu3;WF3H", "pdf_size": 753506, "rating": "6;6;6;10", "confidence": "4;4;3;5", "soundness": "3;3;2;3", "contribution": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "39;154;26;67", "wc_strengths": "44;24;42;44", "wc_weaknesses": "462;270;77;78", "wc_questions": "2;64;7;2", "wc_review": "547;512;152;191", "wc_reply_reviewers": "30;0;43;0", "wc_reply_authors": "1946;855;854;481", "reply_reviewers": "1;0;1;0", "reply_authors": "5;2;3;2", "rating_avg": [ 7.0, 1.7320508075688772 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 49.882361612096915 ], "wc_strengths_avg": [ 38.5, 8.411301920630361 ], "wc_weaknesses_avg": [ 221.75, 159.42455112058494 ], "wc_questions_avg": [ 18.75, 26.20472285676763 ], "wc_review_avg": [ 350.5, 179.95624468186705 ], "wc_reply_reviewers_avg": [ 18.25, 18.819869818890883 ], "wc_reply_authors_avg": [ 1034.0, 548.177434778193 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18088863771521081810&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=4IT2pgc9v6", "pdf": "https://openreview.net/pdf?id=4IT2pgc9v6", "email": "wustl.edu;wustl.edu;amazon.com;wustl.edu;;wustl.edu;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Washington University in St. Louis;Amazon;Peking University", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://wustl.edu;https://www.amazon.com;http://www.pku.edu.cn", "aff_unique_abbr": "WashU;Amazon;Peking U", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "St. Louis;Saint Louis;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;China" }, { "id": "4IxtmklIym", "title": "FruitBin: A tunable large-scale dataset for advancing 6D Pose estimation in fruit bin picking automation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Bin picking is a ubiquitous application spanning across diverse industries, demanding automated solutions facilitated by robots. These automation systems hinge upon intricate components, including object instance-level segmentation and 6D pose estimation, which are pivotal for predicting future grasping and manipulation success. Contemporary computer vision approaches predominantly rely on deep learning methodologies and necessitate access to extensive instance-level datasets. However, prevailing datasets and benchmarks tend to be confined to oversimplified scenarios, such as those with singular objects on tables or low levels of object clustering. In this research, we introduce FruitBin. It emerges as an unparalleled resource, boasting an extensive collection of over a million images and 40 million instance-level 6D poses. Additionally FruitBin differs with other datasets whith its inclusive representation of a wide spectrum of challenges, encompassing symmetric and asymmetric fruits, objects with and without discernible texture, and diverse lighting conditions, all enriched with extended annotations and metadata. Leveraging the inherent challenges and the sheer scale of FruitBin, we highlight its potential as a versatile benchmarking tool that can be customized to suit various evaluation scenarios. As a demonstration of this adaptability, we have created two distinct types of benchmarks: one centered on novel scene generalization and another focusing on novel camera viewpoint generalization. Both benchmark types offer four levels of occlusion to facilitate the study of occlusion robustness. Notably, our study showcases the difficulty of FruitBin dataset, with two baseline 6D pose estimation models, one utilizing RGB images and the other RGB-D data, across these eight distinct benchmarks. FruitBin emerges as a pioneering dataset distinguishing itself by seamlessly integrating with robotic software. That enable direct testing of trained models in dynamic grasping tasks for the purpose of robot learning. Samples of the dataset with its associated code are provided in the supplementary materials. FruitBin promises to be a catalyst for advancing the field of robotics and automation, providing researchers and practitioners with a comprehensive resource to push the boundaries of 6D pose estimation in the context of fruit bin picking and beyond.", "keywords": "Datasets and Benchmarks;6D Pose estimation;Robotic;Bin Picking;Occlusion", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/83a8bb39727c49cb6c38ef940d278d5b7dca8cef.zip", "author": "Guillaume Duret;Mahmoud ALI;NICOLAS CAZIN;Alexandre Chapin;Florence Zara;Emmanuel Dellandrea;Jan Peters;Liming Chen", "authorids": "~Guillaume_Duret1;~Mahmoud_ALI1;~NICOLAS_CAZIN1;~Alexandre_Chapin1;~Florence_Zara1;~Emmanuel_Dellandrea2;~Jan_Peters3;~Liming_Chen1", "gender": "M;M;M;M;F;M;M;M", "homepage": ";https://mahmoud-ali.netlify.app/;;https://liris.cnrs.fr/en/member-page/alexandre-chapin;https://perso.liris.cnrs.fr/fzara/Web/index.html;http://perso.ec-lyon.fr/emmanuel.dellandrea/;https://www.jan-peters.net;https://sites.google.com/view/limingchen/accueil", "dblp": ";260/9430;;;87/851;79/5140.html;p/JanPeters1;32/7029-2", "google_scholar": ";;;;https://scholar.google.fr/citations?hl=fr;https://scholar.google.fr/citations?user=lK9Pa0MAAAAJ;https://scholar.google.de/citations?user=-kIVAcAAAAAJ;VOPW5YYAAAAJ", "orcid": ";0009-0002-7658-0446;0000-0002-8382-7227;;0000-0002-0118-7204;;0000-0002-5266-8091;0000-0002-3654-9498", "linkedin": "guillaume-duret-41ab70169/;mahmoud-ali-5b1938b8/;nicolas-cazin-phd-a61b54172/;;;;janrpeters/;liming-chen-039455a/", "or_profile": "~Guillaume_Duret1;~Mahmoud_ALI1;~NICOLAS_CAZIN1;~Alexandre_Chapin1;~Florence_Zara1;~Emmanuel_Dellandrea2;~Jan_Peters3;~Liming_Chen1", "aff": "Ecole Centrale de Lyon;INRIA;;Ecole Centrale de Lyon;LIRIS, CNRS;Ecole Centrale de Lyon;TU Darmstadt;Ecole Centrale de Lyon", "aff_domain": "ec-lyon.fr;inria.fr;;ec-lyon.fr;liris.cnrs.fr;ec-lyon.fr;tu-darmstadt.de;ec-lyon.fr", "position": "PhD student;PhD student;;PhD student;Associate Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nduret2024fruitbin,\ntitle={FruitBin: A tunable large-scale dataset for advancing 6D Pose estimation in fruit bin picking automation},\nauthor={Guillaume Duret and Mahmoud ALI and NICOLAS CAZIN and Alexandre Chapin and Florence Zara and Emmanuel Dellandrea and Jan Peters and Liming Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=4IxtmklIym}\n}", "github": "", "project": "", "reviewers": "J2Gb;Ksan;bGzb;gFnj", "site": "https://openreview.net/forum?id=4IxtmklIym", "pdf_size": 35938534, "rating": "3;3;5;8", "confidence": "4;4;5;5", "soundness": "2;2;3;4", "contribution": "1;2;2;3", "presentation": "2;3;3;4", "wc_summary": "35;71;36;30", "wc_strengths": "22;83;18;41", "wc_weaknesses": "116;378;180;11", "wc_questions": "12;63;35;1", "wc_review": "185;595;269;83", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "439;884;724;223", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 43.0, 16.32482771731451 ], "wc_strengths_avg": [ 41.0, 25.758493744782516 ], "wc_weaknesses_avg": [ 171.25, 133.74859812349436 ], "wc_questions_avg": [ 27.75, 23.763154251908563 ], "wc_review_avg": [ 283.0, 191.79676743887003 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 567.5, 254.88085451834158 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8551861104941366, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GPohUnboD54J:scholar.google.com/&scioq=FruitBin:+A+tunable+large-scale+dataset+for+advancing+6D+Pose+estimation+in+fruit+bin+picking+automation&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1;0;2;0;3;0", "aff_unique_norm": "Ecole Centrale de Lyon;INRIA;CNRS;Technische Universit\u00e4t Darmstadt", "aff_unique_dep": ";;LIRIS;", "aff_unique_url": "https://www.ec-lyon.fr;https://www.inria.fr;https://www.cnrs.fr;https://www.tu-darmstadt.de", "aff_unique_abbr": "ECL;INRIA;CNRS;TU Darmstadt", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "France;Germany" }, { "title": "The Devil is in the Object Boundary: Towards Annotation-free Instance Segmentation using Foundation Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19473", "id": "4JbrdrHxYy", "author_site": "cheng shi, Sibei Yang", "tldr": "", "abstract": "Foundation models, pre-trained on a large amount of data have demonstrated impressive zero-shot capabilities in various downstream tasks. However, in object detection and instance segmentation, two fundamental computer vision tasks heavily reliant on extensive human annotations, foundation models such as SAM and DINO struggle to achieve satisfactory performance. \nIn this study, we reveal that the devil is in the object boundary, $\\textit{i.e.}$, these foundation models fail to discern boundaries between individual objects. \nFor the first time, we probe that CLIP, which has never accessed any instance-level annotations, can provide a highly beneficial and strong instance-level boundary prior in the clustering results of its particular intermediate layer. Following this surprising observation, we propose $\\textbf{\\textit{Zip}}$ which $\\textbf{Z}$ips up CL$\\textbf{ip}$ and SAM in a novel classification-first-then-discovery pipeline, enabling annotation-free, complex-scene-capable, open-vocabulary object detection and instance segmentation. \nOur Zip significantly boosts SAM's mask AP on COCO dataset by 12.5\\% and establishes state-of-the-art performance in various settings, including training-free, self-training, and label-efficient finetuning. Furthermore, annotation-free Zip even achieves comparable performance to the best-performing open-vocabulary object detecters using base annotations. Code is released at https://github.com/ChengShiest/Zip-Your-CLIP", "keywords": "object detection;annotation-free;instance segmentation;open-vocabulary;SAM;CLIP", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Cheng Shi;Sibei Yang", "authorids": "~Cheng_Shi4;~Sibei_Yang1", "gender": "M;F", "homepage": "https://github.com/ChengShiest;https://sibeiyang.github.io/", "dblp": ";215/4885", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;user=4pg3rtYAAAAJ", "orcid": "0000-0002-6942-8481;", "linkedin": ";", "or_profile": "~Cheng_Shi4;~Sibei_Yang1", "aff": "ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cm;shanghaitech.edu.cn", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nshi2024the,\ntitle={The Devil is in the Object Boundary: Towards Annotation-free Instance Segmentation using Foundation Models},\nauthor={Cheng Shi and Sibei Yang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4JbrdrHxYy}\n}", "github": "", "project": "", "reviewers": "CLaV;11pM;5mwu;g6s6;WVZD", "pdf_size": 18169881, "rating": "5;5;6;6;8", "confidence": "4;4;4;4;4", "soundness": "4;2;3;3;4", "contribution": "4;2;3;3;4", "presentation": "4;2;3;3;3", "wc_summary": "108;50;98;54;58", "wc_strengths": "142;27;42;51;45", "wc_weaknesses": "195;270;154;154;46", "wc_questions": "27;70;96;327;335", "wc_review": "472;417;390;586;484", "wc_reply_reviewers": "0;0;0;61;98", "wc_reply_authors": "676;1701;1166;1405;1180", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;3;3;3;3", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "contribution_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 73.6, 24.344198487524704 ], "wc_strengths_avg": [ 61.4, 41.06872289224489 ], "wc_weaknesses_avg": [ 163.8, 72.5545312161825 ], "wc_questions_avg": [ 171.0, 132.50962229211885 ], "wc_review_avg": [ 469.8, 67.62366449697916 ], "wc_reply_reviewers_avg": [ 31.8, 40.66644808684427 ], "wc_reply_authors_avg": [ 1225.6, 336.41676533728224 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.6, 0.8000000000000002 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1971090372667658008&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4JbrdrHxYy", "pdf": "https://openreview.net/pdf?id=4JbrdrHxYy", "email": "shanghaitech.edu.cm;shanghaitech.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "4JjSJyT15z", "title": "NaturalSigner: Diffusion Models are Natural Sign Language Generator", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Generating natural and expressive sign language pose sequences from text has important practical significance.\nHowever, current sign language generation (SLG) methods suffer from low quality and limited expressiveness.\nIn this work, we propose NaturalSigner, a classifier-free diffusion-based generative model designed specifically for SLG.\n Specifically, it consists of a mixed semantic encoder that enhances the semantic consistency and expressiveness of the generated sign language, which takes both text and gloss as input; and a novel sign language denoiser that generates natural sign language pose sequences according to the output of the semantic encoder.\nIn addition, to achieve more natural and high-quality SLG, we design a sign language prompting mechanism to facilitate in-context learning in the diffusion model and duration predictor.\n Experiments on two datasets show that NaturalSigner significantly outperforms the state-of-the-art methods in terms of semantic consistency, naturalness, and expressiveness.\n On the Phoenix-2014T dataset, compared with the previous best end-to-end SLG method, NaturalSigner improves the BLEU-4 score of the back translation metric by more than **40\\%** and reduces the Frechet Inception Distance (FID) by more than **12 times**.", "keywords": "Sign Language Generation;Deep Learning", "primary_area": "generative models", "supplementary_material": "/attachment/5cc75085b25919d113fa52a3f04582aa7bc125e2.zip", "author": "Aoxiong Yin;Jiahao Xun;Xize Cheng;Tao Jin;Shengyu Zhang;Zhou Zhao;Siliang Tang;Fei Wu", "authorids": "~Aoxiong_Yin1;~Jiahao_Xun1;~Xize_Cheng1;~Tao_Jin2;~Shengyu_Zhang2;~Zhou_Zhao3;~Siliang_Tang1;~Fei_Wu1", "gender": ";M;M;M;M;;M;M", "homepage": ";;https://exgc.github.io/;https://hugddygff.github.io/;https://shengyuzhang.github.io/;;https://person.zju.edu.cn/en/siliang;https://person.zju.edu.cn/wufei", "dblp": ";303/0655;334/2167;88/4850-4.html;47/3459-1;;44/5693;84/3254-1", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;;l4Dyt7EAAAAJ;;8e7H3PcAAAAJ;XJLn4MYAAAAJ", "orcid": ";0009-0000-6141-4125;0000-0001-9708-3225;0000-0003-3564-1628;0000-0002-0030-8289;;0000-0002-7356-9711;", "linkedin": ";;;;;;siliang-tang-4734272a/;", "or_profile": "~Aoxiong_Yin1;~Jiahao_Xun1;~Xize_Cheng1;~Tao_Jin2;~Shengyu_Zhang2;~Zhou_Zhao3;~Siliang_Tang1;~Fei_Wu1", "aff": ";Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;;Zhejiang University;Zhejiang University", "aff_domain": ";zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn", "position": ";MS student;PhD student;Assistant Professor;ZJU100 Young Professor;;Full Professor;Full Professor", "bibtex": "@misc{\nyin2024naturalsigner,\ntitle={NaturalSigner: Diffusion Models are Natural Sign Language Generator},\nauthor={Aoxiong Yin and Jiahao Xun and Xize Cheng and Tao Jin and Shengyu Zhang and Zhou Zhao and Siliang Tang and Fei Wu},\nyear={2024},\nurl={https://openreview.net/forum?id=4JjSJyT15z}\n}", "github": "", "project": "", "reviewers": "UZBd;BJCa;s9QL;9cnq", "site": "https://openreview.net/forum?id=4JjSJyT15z", "pdf_size": 1363283, "rating": "3;3;5;8", "confidence": "5;5;4;5", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "70;35;66;61", "wc_strengths": "31;36;53;108", "wc_weaknesses": "321;86;219;109", "wc_questions": "2;5;27;84", "wc_review": "424;162;365;362", "wc_reply_reviewers": "223;0;112;0", "wc_reply_authors": "1150;729;1353;535", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;3;1", "rating_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 58.0, 13.656500283747663 ], "wc_strengths_avg": [ 57.0, 30.553232234904378 ], "wc_weaknesses_avg": [ 183.75, 93.83862477679433 ], "wc_questions_avg": [ 29.5, 32.912763481664676 ], "wc_review_avg": [ 328.25, 99.11703940292001 ], "wc_reply_reviewers_avg": [ 83.75, 92.48885067941973 ], "wc_reply_authors_avg": [ 941.75, 325.2701761612952 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.07053456158585983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZHwXzOEg9YIJ:scholar.google.com/&scioq=NaturalSigner:+Diffusion+Models+are+Natural+Sign+Language+Generator&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "4JtwtT4nYC", "title": "Multi-Task Reinforcement Learning with Shared-Unique Features and Task-Aware Prioritized Experience Replay", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multi-task reinforcement learning (MTRL) has emerged as a challenging problem to reduce the computational cost of reinforcement learning and leverage shared features among tasks to improve the performance of individual tasks. \nHowever, a key challenge lies in determining which features should be shared across tasks and how to preserve the unique features that differentiate each task. This challenge often leads to the problem of task performance imbalance, where certain tasks may dominate the learning process while others are neglected. \nIn this paper, we propose a novel approach called shared-unique features along with task-aware prioritized experience replay to improve training stability and leverage shared and unique features effectively. \nWe incorporate a simple yet effective task-specific embeddings to preserve the unique features of each task to mitigate the potential problem of task performance imbalance. \nAdditionally, we introduce task-aware settings to the prioritized experience replay (PER) algorithm to accommodate multi-task training and enhancing training stability. \nOur approach achieves state-of-the-art average success rates on the Meta-World benchmark, while maintaining stable performance across all tasks, avoiding task performance imbalance issues. The results demonstrate the effectiveness of our method in addressing the challenges of MTRL.", "keywords": "Multi-task reinforcement learning;Experience replay;Shared-unique features", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Po-Shao Lin;Jia-Fong Yeh;Yi-Ting Chen;Winston H. Hsu", "authorids": "~Po-Shao_Lin2;~Jia-Fong_Yeh1;~Yi-Ting_Chen2;~Winston_H._Hsu2", "gender": ";M;M;M", "homepage": "https://www.cmlab.csie.ntu.edu.tw/~jiafongyeh/;https://sites.google.com/site/yitingchen0524/;https://github.com/Tonylin1998;https://winstonhsu.info/", "dblp": "198/7831;12/5268-1;;16/5668.html", "google_scholar": "kS-oZ20AAAAJ;8tRH7RMAAAAJ;;https://scholar.google.com.tw/citations?user=NOvDH3QAAAAJ", "orcid": ";;;0000-0002-3330-0638", "linkedin": ";;;", "or_profile": "~Jia-Fong_Yeh1;~Yi-Ting_Chen2;~\u67cf\u52ad_\u67971;~Winston_Hsu1", "aff": "Sony Group Corporation;National Yang Ming Chiao Tung University;;National Taiwan University", "aff_domain": "sony.com;nycu.edu.tw;;ntu.edu.tw", "position": "Intern;Assistant Professor;;Professor", "bibtex": "@misc{\nlin2024multitask,\ntitle={Multi-Task Reinforcement Learning with Shared-Unique Features and Task-Aware Prioritized Experience Replay},\nauthor={Po-Shao Lin and Jia-Fong Yeh and Yi-Ting Chen and Winston H. Hsu},\nyear={2024},\nurl={https://openreview.net/forum?id=4JtwtT4nYC}\n}", "github": "", "project": "", "reviewers": "89dx;MxpY;7qsQ", "site": "https://openreview.net/forum?id=4JtwtT4nYC", "pdf_size": 765199, "rating": "3;3;3", "confidence": "4;4;4", "soundness": "3;2;2", "contribution": "2;2;2", "presentation": "3;1;2", "wc_summary": "88;155;54", "wc_strengths": "112;48;30", "wc_weaknesses": "163;320;259", "wc_questions": "35;31;34", "wc_review": "398;554;377", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 99.0, 41.960298696108765 ], "wc_strengths_avg": [ 63.333333333333336, 35.188381921057726 ], "wc_weaknesses_avg": [ 247.33333333333334, 64.62369706401996 ], "wc_questions_avg": [ 33.333333333333336, 1.699673171197595 ], "wc_review_avg": [ 443.0, 78.95568377260753 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xvq29vV_NhAJ:scholar.google.com/&scioq=Multi-Task+Reinforcement+Learning+with+Shared-Unique+Features+and+Task-Aware+Prioritized+Experience+Replay&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Sony Group Corporation;National Yang Ming Chiao Tung University;National Taiwan University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sony.com;https://www.nycu.edu.tw;https://www.ntu.edu.tw", "aff_unique_abbr": "Sony;NYCU;NTU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Japan;China" }, { "title": "Neurosymbolic Grounding for Compositional World Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19472", "id": "4KZpDGD4Nh", "author_site": "Atharva Sehgal, Arya Grayeli, Jennifer Sun, Swarat Chaudhuri", "tldr": "", "abstract": "We introduce Cosmos, a framework for object-centric world modeling that is designed for compositional generalization (CompGen), i.e., high performance on unseen input scenes obtained through the composition of known visual \"atoms.\" The central insight behind Cosmos is the use of a novel form of neurosymbolic grounding. Specifically, the framework introduces two new tools: (i) neurosymbolic scene encodings, which represent each entity in a scene using a real vector computed using a neural encoder, as well as a vector of composable symbols describing attributes of the entity, and (ii) a neurosymbolic attention mechanism that binds these entities to learned rules of interaction. Cosmos is end-to-end differentiable; also, unlike traditional neurosymbolic methods that require representations to be manually mapped to symbols, it computes an entity's symbolic attributes using vision-language foundation models. Through an evaluation that considers two different forms of CompGen on an established blocks-pushing domain, we show that the framework establishes a new state-of-the-art for CompGen in world modeling. Artifacts are available at: https://trishullab.github.io/cosmos-web/", "keywords": "neurosymbolic learning;machine learning;world modeling;compositional generalization", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "/attachment/821493eb5c04dce714ca9b717b1be29b91ca9837.zip", "author": "Atharva Sehgal;Arya Grayeli;Jennifer J. Sun;Swarat Chaudhuri", "authorids": "~Atharva_Sehgal1;~Arya_Grayeli1;~Jennifer_J._Sun1;~Swarat_Chaudhuri1", "gender": "M;M;M;F", "homepage": "https://www.atharvas.net;;http://www.cs.utexas.edu/~swarat;https://jenjsun.com/", "dblp": ";;37/6100;232/1563", "google_scholar": ";;9j6RBYQAAAAJ;", "orcid": ";;0000-0002-6859-1391;", "linkedin": "atharvas/;aryagrayeli/;swarat-chaudhuri-609b3092/;jennifer-sun-224778a3/", "or_profile": "~Atharva_Sehgal1;~Arya_Grayeli1;~Swarat_Chaudhuri1;~Jennifer_Jianing_Sun1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;Google", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;google.com", "position": "PhD student;Undergrad student;Full Professor;Researcher", "bibtex": "@inproceedings{\nsehgal2024neurosymbolic,\ntitle={Neurosymbolic Grounding for Compositional World Models},\nauthor={Atharva Sehgal and Arya Grayeli and Jennifer J. Sun and Swarat Chaudhuri},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4KZpDGD4Nh}\n}", "github": "", "project": "", "reviewers": "QCEm;CQ89;5cti", "pdf_size": 1745886, "rating": "6;6;6", "confidence": "4;4;2", "soundness": "3;3;2", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "62;71;27", "wc_strengths": "64;78;44", "wc_weaknesses": "310;457;109", "wc_questions": "41;21;5", "wc_review": "477;627;185", "wc_reply_reviewers": "66;23;26", "wc_reply_authors": "1438;1728;912", "reply_reviewers": "1;1;1", "reply_authors": "3;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 53.333333333333336, 18.979521127315678 ], "wc_strengths_avg": [ 62.0, 13.9522996909709 ], "wc_weaknesses_avg": [ 292.0, 142.63940549511554 ], "wc_questions_avg": [ 22.333333333333332, 14.72714802291635 ], "wc_review_avg": [ 429.6666666666667, 183.52353769718175 ], "wc_reply_reviewers_avg": [ 38.333333333333336, 19.601587237318874 ], "wc_reply_authors_avg": [ 1359.3333333333333, 337.7428344498551 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18138122186326119791&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=4KZpDGD4Nh", "pdf": "https://openreview.net/pdf?id=4KZpDGD4Nh", "email": "utexas.edu;utexas.edu;utexas.edu;google.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Texas at Austin;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.utexas.edu;https://www.google.com", "aff_unique_abbr": "UT Austin;Google", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Austin;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Curiosity-driven Red-teaming for Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19471", "id": "4KqkizXgXU", "author_site": "Zhang-Wei Hong, Idan Shenfeld, Johnson (Tsun-Hsuan) Wang, Yung-Sung Chuang, Aldo Pareja, James R Glass, Akash Srivastava, Pulkit Agrawal", "tldr": "", "abstract": "Large language models (LLMs) hold great potential for many natural language applications but risk generating incorrect or toxic content. To probe when an LLM generates unwanted content, the current paradigm is to recruit a $\\textit{red team}$ of human testers to design input prompts (i.e., test cases) that elicit undesirable responses from LLMs. \nHowever, relying solely on human testers is expensive and time-consuming. Recent works automate red teaming by training a separate red team LLM with reinforcement learning (RL) to generate test cases that maximize the chance of eliciting undesirable responses from the target LLM. However, current RL methods are only able to generate a small number of effective test cases resulting in a low coverage of the span of prompts that elicit undesirable responses from the target LLM.\nTo overcome this limitation, we draw a connection between the problem of increasing the coverage of generated test cases and the well-studied approach of curiosity-driven exploration that optimizes for novelty. \nOur method of curiosity-driven red teaming (CRT) achieves greater coverage of test cases while mantaining or increasing their effectiveness compared to existing methods.\nOur method, CRT successfully provokes toxic responses from LLaMA2 model that has been heavily fine-tuned using human preferences to avoid toxic outputs. Code is available at https://github.com/Improbable-AI/curiosity_redteam.", "keywords": "Curiosity-driven exploration;Reinforcement learning;Language model", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/da3812c3dc922578aa3779e75911bd941e9e9d13.zip", "author": "Zhang-Wei Hong;Idan Shenfeld;Tsun-Hsuan Wang;Yung-Sung Chuang;Aldo Pareja;James R. Glass;Akash Srivastava;Pulkit Agrawal", "authorids": "~Zhang-Wei_Hong1;~Idan_Shenfeld1;~Tsun-Hsuan_Wang2;~Yung-Sung_Chuang1;~Aldo_Pareja1;~James_R._Glass1;~Akash_Srivastava1;~Pulkit_Agrawal1", "gender": "M;M;M;M;M;;M;M", "homepage": ";https://www.linkedin.com/in/idan-shenfeld/;https://zswang666.github.io/;https://people.csail.mit.edu/yungsung/;https://mitibmwatsonailab.mit.edu/people/aldo-pareja/;;http://akashgit.github.io;https://people.eecs.berkeley.edu/~pulkitag/", "dblp": "198/0600;;217/1809.html;64/3095;;;24/9528;149/2672", "google_scholar": "GZkyN4cAAAAJ;;xE3WSuYAAAAJ;3ar1DOwAAAAJ;;;https://scholar.google.co.uk/citations?user=2h6SZeEAAAAJ;UpZmJI0AAAAJ", "orcid": ";;;0000-0002-1723-5063;;;;", "linkedin": ";;;yschuang;;;https://uk.linkedin.com/in/akash-srivastava-aa97361b;", "or_profile": "~Zhang-Wei_Hong1;~Idan_Shenfeld1;~Tsun-Hsuan_Wang2;~Yung-Sung_Chuang1;~Aldo_Pareja1;~James_R._Glass1;~Akash_Srivastava1;~Pulkit_Agrawal1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Liquid AI;Massachusetts Institute of Technology;Delft University of Technology;;International Business Machines;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;liquid.ai;mit.edu;tudelft.nl;;ibm.com;mit.edu", "position": "PhD student;PhD student;Researcher;PhD student;PhD student;;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nhong2024curiositydriven,\ntitle={Curiosity-driven Red-teaming for Large Language Models},\nauthor={Zhang-Wei Hong and Idan Shenfeld and Tsun-Hsuan Wang and Yung-Sung Chuang and Aldo Pareja and James R. Glass and Akash Srivastava and Pulkit Agrawal},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4KqkizXgXU}\n}", "github": "", "project": "", "reviewers": "NqgB;TuLd;ZhdE;8YjF", "pdf_size": 721695, "rating": "8;8;8;8", "confidence": "3;3;3;4", "soundness": "3;4;3;3", "contribution": "3;3;3;3", "presentation": "3;4;4;3", "wc_summary": "107;69;108;65", "wc_strengths": "63;59;74;91", "wc_weaknesses": "52;101;141;78", "wc_questions": "46;124;82;1", "wc_review": "268;353;405;235", "wc_reply_reviewers": "0;21;99;19", "wc_reply_authors": "443;767;1145;194", "reply_reviewers": "0;1;2;1", "reply_authors": "2;2;2;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 87.25, 20.30240133580262 ], "wc_strengths_avg": [ 71.75, 12.397076268217438 ], "wc_weaknesses_avg": [ 93.0, 32.687918257362305 ], "wc_questions_avg": [ 63.25, 45.31762902006238 ], "wc_review_avg": [ 315.25, 67.36607083688345 ], "wc_reply_reviewers_avg": [ 34.75, 37.989307706248084 ], "wc_reply_authors_avg": [ 637.25, 356.66817561986096 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14267068665623037182&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "openreview": "https://openreview.net/forum?id=4KqkizXgXU", "pdf": "https://openreview.net/pdf?id=4KqkizXgXU", "email": "mit.edu;mit.edu;liquid.ai;mit.edu;tudelft.nl;;ibm.com;mit.edu", "author_num": 8, "aff_unique_index": "0;0;1;0;2;3;0", "aff_unique_norm": "Massachusetts Institute of Technology;Liquid AI;Delft University of Technology;International Business Machines Corporation", "aff_unique_dep": ";;;", "aff_unique_url": "https://web.mit.edu;;https://www.tudelft.nl;https://www.ibm.com", "aff_unique_abbr": "MIT;;TU Delft;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2;0;0", "aff_country_unique": "United States;Unknown;Netherlands" }, { "title": "Chain-of-Table: Evolving Tables in the Reasoning Chain for Table Understanding", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19470", "id": "4L0xnS4GQM", "author_site": "Zilong Wang, Hao Zhang, Chun-Liang Li, Julian M Eisenschlos, Vincent Perot, Zifeng Wang, Lesly Miculicich, Yasuhisa Fujii, Jingbo Shang, Chen-Yu Lee, Tomas Pfister", "tldr": "", "abstract": "Table-based reasoning with large language models (LLMs) is a promising direction to tackle many table understanding tasks, such as table-based question answering and fact verification. Compared with generic reasoning, table-based reasoning requires the extraction of underlying semantics from both free-form questions and semi-structured tabular data. Chain-of-Thought and its similar approaches incorporate the reasoning chain in the form of textual context, but it is still an open question how to effectively leverage tabular data in the reasoning chain. We propose the Chain-of-Table framework, where tabular data is explicitly used in the reasoning chain as a proxy for intermediate thoughts. Specifically, we guide LLMs using in-context learning to iteratively generate operations and update the table to represent a tabular reasoning chain. LLMs can therefore dynamically plan the next operation based on the results of the previous ones. This continuous evolution of the table forms a chain, showing the reasoning process for a given tabular problem. The chain carries structured information of the intermediate results, enabling more accurate and reliable predictions. Chain-of-Table achieves new state-of-the-art performance on WikiTQ, FeTaQA, and TabFact benchmarks across multiple LLM choices.", "keywords": "Table Understanding;In-context Learning;Large Language Model", "primary_area": "generative models", "supplementary_material": "/attachment/e8fac55cc78468acf93bb210c449fc8e6fc3848a.zip", "author": "Zilong Wang;Hao Zhang;Chun-Liang Li;Julian Martin Eisenschlos;Vincent Perot;Zifeng Wang;Lesly Miculicich;Yasuhisa Fujii;Jingbo Shang;Chen-Yu Lee;Tomas Pfister", "authorids": "~Zilong_Wang1;~Hao_Zhang68;~Chun-Liang_Li1;~Julian_Martin_Eisenschlos1;~Vincent_Perot1;~Zifeng_Wang1;~Lesly_Miculicich1;~Yasuhisa_Fujii1;~Jingbo_Shang2;~Chen-Yu_Lee2;~Tomas_Pfister1", "gender": "M;;M;M;M;M;F;M;M;;M", "homepage": "https://zilongwang.me;https://www.lti.cs.cmu.edu/people/19123/hao-zhang;http://chunliangli.github.io;https://eisenjulian.github.io/;;https://kingspencer.github.io/;;;https://shangjingbo1226.github.io/;https://chl260.github.io/;http://tomas.pfister.fi", "dblp": "42/898-2;;;262/3990;227/2509;43/7716-2;;84/8914;151/3145.html;04/656;14/8360", "google_scholar": "S_wQccsAAAAJ;;https://scholar.google.com.tw/citations?user=vqHIt_sAAAAJ;2uAC2NQAAAAJ;RrANep4AAAAJ;https://scholar.google.co.il/citations?user=N1uBekcAAAAJ;https://scholar.google.ch/citations?user=0JL8SrsAAAAJ;_o1nj0gAAAAJ;0SkFI4MAAAAJ;uWPUSEgAAAAJ;ahSpJOAAAAAJ", "orcid": "0000-0002-1614-0943;;;;;;;;;;0009-0004-4088-8718", "linkedin": ";;;eisenjulian/;vincentperot/;zifeng-wang-21b069b4/;lesly-miculicich-werlen;;;chenyulee260/;", "or_profile": "~Zilong_Wang1;~Hao_Zhang68;~Chun-Liang_Li1;~Julian_Martin_Eisenschlos1;~Vincent_Perot1;~Zifeng_Wang1;~Lesly_Miculicich1;~Yasuhisa_Fujii1;~Jingbo_Shang2;~Chen-Yu_Lee2;~Tomas_Pfister1", "aff": "University of California, San Diego;;Apple;Universidad Nacional de C\u00f3rdoba;Google;Google;Google;Google;University of California, San Diego;Google;Google", "aff_domain": "ucsd.edu;;apple.com;unc.edu.ar;google.com;google.com;google.com;google.com;ucsd.edu;google.com;google.com", "position": "PhD student;;Researcher;PhD student;Software Engineer;Research Scientist;Researcher;RS;Assistant Professor;Research Scientist;Head of Research @ Cloud AI", "bibtex": "@inproceedings{\nwang2024chainoftable,\ntitle={Chain-of-Table: Evolving Tables in the Reasoning Chain for Table Understanding},\nauthor={Zilong Wang and Hao Zhang and Chun-Liang Li and Julian Martin Eisenschlos and Vincent Perot and Zifeng Wang and Lesly Miculicich and Yasuhisa Fujii and Jingbo Shang and Chen-Yu Lee and Tomas Pfister},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4L0xnS4GQM}\n}", "github": "", "project": "", "reviewers": "xr4a;eSFE;3YkD;4HBR", "pdf_size": 1425645, "rating": "5;5;6;6", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "contribution": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "78;96;46;68", "wc_strengths": "51;42;92;63", "wc_weaknesses": "200;138;115;53", "wc_questions": "26;53;7;140", "wc_review": "355;329;260;324", "wc_reply_reviewers": "0;0;0;11", "wc_reply_authors": "801;840;972;895", "reply_reviewers": "0;0;0;1", "reply_authors": "4;5;4;5", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.0, 18.05547008526779 ], "wc_strengths_avg": [ 62.0, 18.854707634964804 ], "wc_weaknesses_avg": [ 126.5, 52.60465758846834 ], "wc_questions_avg": [ 56.5, 50.904322016897545 ], "wc_review_avg": [ 317.0, 34.94996423460259 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 877.0, 64.21448434738069 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.5, 0.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 107, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13815199908318046768&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4L0xnS4GQM", "pdf": "https://openreview.net/pdf?id=4L0xnS4GQM", "email": "ucsd.edu;;apple.com;unc.edu.ar;google.com;google.com;google.com;google.com;ucsd.edu;google.com;google.com", "author_num": 11, "aff_unique_index": "0;1;2;3;3;3;3;0;3;3", "aff_unique_norm": "University of California, San Diego;Apple;Universidad Nacional de C\u00f3rdoba;Google", "aff_unique_dep": ";Apple Inc.;;Google", "aff_unique_url": "https://www.ucsd.edu;https://www.apple.com;https://www.unc.edu.ar;https://www.google.com", "aff_unique_abbr": "UCSD;Apple;UNC;Google", "aff_campus_unique_index": "0;2;2;2;2;0;2;2", "aff_campus_unique": "San Diego;;Mountain View", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;Argentina" }, { "title": "Deep Reinforcement Learning for Modelling Protein Complexes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19469", "id": "4MsfQ2H0lP", "author_site": "Ziqi Gao, Tao Feng, Jiaxuan You, Chenyi Zi, Yan Zhou, Chen Zhang, Jia Li", "tldr": "", "abstract": "Structure prediction of large protein complexes (a.k.a., protein multimer mod-\nelling, PMM) can be achieved through the one-by-one assembly using provided\ndimer structures and predicted docking paths. However, existing PMM methods\nstruggle with vast search spaces and generalization challenges: (1) The assembly\nof a N -chain multimer can be depicted using graph structured data, with each\nchain represented as a node and assembly actions as edges. Thus the assembly\ngraph can be arbitrary acyclic undirected connected graph, leading to the com-\nbinatorial optimization space of N^(N \u22122) for the PMM problem. (2) Knowledge\ntransfer in the PMM task is non-trivial. The gradually limited data availability as\nthe chain number increases necessitates PMM models that can generalize across\nmultimers of various chains. To address these challenges, we propose GAPN, a\nGenerative Adversarial Policy Network powered by domain-specific rewards and\nadversarial loss through policy gradient for automatic PMM prediction. Specifi-\ncally, GAPN learns to efficiently search through the immense assembly space and\noptimize the direct docking reward through policy gradient. Importantly, we de-\nsign a adversarial reward function to enhance the receptive field of our model. In\nthis way, GAPN will simultaneously focus on a specific batch of multimers and\nthe global assembly rules learned from multimers with varying chain numbers.\nEmpirically, we have achieved both significant accuracy (measured by RMSD\nand TM-Score) and efficiency improvements compared to leading complex mod-\neling software. GAPN outperforms the state-of-the-art method (MoLPC) with up\nto 27% improvement in TM-Score, with a speed-up of 600\u00d7.", "keywords": "protein complex structure prediction;docking path prediction;policy network;reinforcement learning", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Ziqi Gao;Tao Feng;Jiaxuan You;Chenyi Zi;Yan Zhou;Chen Zhang;Jia Li", "authorids": "~Ziqi_Gao1;~Tao_Feng5;~Jiaxuan_You2;~Chenyi_Zi2;~Yan_Zhou10;~Chen_Zhang24;~Jia_Li4", "gender": ";M;M;M;M;M;M", "homepage": ";;;http://www.chuanglintech.com;https://facultyprofiles.hkust-gz.edu.cn/faculty-personal-page/ZHANG-Chen/chenzhang;https://sites.google.com/view/lijia;https://cs.stanford.edu/~jiaxuan/", "dblp": ";;;;94/4084-10;23/6950-9;192/4727", "google_scholar": "https://scholar.google.com.hk/citations?user=UHwNFy8AAAAJ;3xK6SlIAAAAJ;uGbNoaQAAAAJ;;https://scholar.google.ca/citations?user=AHEItGQAAAAJ;1gSbcYoAAAAJ;NDbMl7oAAAAJ", "orcid": ";;;;0000-0001-8706-1372;0000-0002-6362-4385;", "linkedin": ";;;;;;jiaxuan-you-5859b37b/", "or_profile": "~Ziqi_Gao1;~Tao_Feng5;~Chenyi_Zi2;~Yan_Zhou10;~Chen_Zhang24;~Jia_Li4;~Jiaxuan_You1", "aff": "Hong Kong University of Science and Technology;Department of Computer Science, University of Illinois at Urbana-Champaign;Hong Kong University of Science and Technology;;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology (Guangzhou);NVIDIA", "aff_domain": "ust.hk;cs.illinois.edu;hkust.edu;;hkust-gz.edu.cn;ust.hk;nvidia.com", "position": "PhD student;PhD student;MS student;;Associate Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\ngao2024deep,\ntitle={Deep Reinforcement Learning for Modelling Protein Complexes},\nauthor={Ziqi Gao and Tao Feng and Jiaxuan You and Chenyi Zi and Yan Zhou and Chen Zhang and Jia Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4MsfQ2H0lP}\n}", "github": "", "project": "", "reviewers": "rNSL;554U;Sjm2;MBaj", "pdf_size": 6159313, "rating": "6;6;6;6", "confidence": "5;3;3;3", "soundness": "3;3;2;3", "contribution": "2;3;2;3", "presentation": "3;2;2;3", "wc_summary": "73;80;130;82", "wc_strengths": "37;24;108;117", "wc_weaknesses": "176;14;180;49", "wc_questions": "120;1;24;51", "wc_review": "406;119;442;299", "wc_reply_reviewers": "61;23;0;17", "wc_reply_authors": "1670;225;1103;1192", "reply_reviewers": "1;1;0;1", "reply_authors": "4;2;4;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 91.25, 22.620510604316607 ], "wc_strengths_avg": [ 71.5, 41.379342672401165 ], "wc_weaknesses_avg": [ 104.75, 74.3013290594455 ], "wc_questions_avg": [ 49.0, 44.64862819841165 ], "wc_review_avg": [ 316.5, 125.57169266996443 ], "wc_reply_reviewers_avg": [ 25.25, 22.29770167528483 ], "wc_reply_authors_avg": [ 1047.5, 521.52972110897 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16435311423968251503&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=4MsfQ2H0lP", "pdf": "https://openreview.net/pdf?id=4MsfQ2H0lP", "email": "ust.hk;cs.illinois.edu;hkust.edu;;hkust-gz.edu.cn;ust.hk;nvidia.com", "author_num": 7, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Illinois Urbana-Champaign;NVIDIA", "aff_unique_dep": ";Department of Computer Science;NVIDIA Corporation", "aff_unique_url": "https://www.ust.hk;https://illinois.edu;https://www.nvidia.com", "aff_unique_abbr": "HKUST;UIUC;NVIDIA", "aff_campus_unique_index": "0;1;0;0;0", "aff_campus_unique": "Hong Kong SAR;Urbana-Champaign;", "aff_country_unique_index": "0;1;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "4MvHiijJL3", "title": "Model Explanation Disparities as a Fairness Diagnostic", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent works on fairness in machine learning have focused on quantifying and eliminating bias against protected subgroups, and extended these results to more complex subgroups beyond simple discrete classes, known as \"rich subgroups.\" Orthogonally, recent works in model interpretability develop local feature importance methods that, given a classifier $h$ and test point $x$, attribute influence for the prediction $h(x)$ to the individual features of $x$. This raises a natural question: Do local feature importance methods attribute different feature importance values on average in protected subgroups versus the whole population, and can we detect these disparities efficiently? In this paper, we formally introduce the notion of feature importance disparity (FID) in the context of rich subgroups, which could be used as a potential indicator of bias in the model or data generation process. We design an oracle-efficient algorithm to identify large FID subgroups and conduct a thorough empirical analysis auditing for these subgroups across $4$ datasets and $4$ common feature importance methods of broad interest to the machine learning community. Our algorithm finds (feature, subgroup) pairs that: (i) have subgroup feature importance that is often an order of magnitude different than the importance on the whole dataset (ii) generalize out of sample, and (iii) yield interesting discussions about potential bias inherent in these common datasets.", "keywords": "Explainability;Auditing;Rich Subgroups;Fairness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/a93d04395392acfeed805fa286a9eb29d65d07f2.zip", "author": "Peter W Chang;Leor Fishman;Seth Neel", "authorids": "~Peter_W_Chang1;~Leor_Fishman1;~Seth_Neel2", "gender": "M;M;M", "homepage": ";;https://sethneel.com", "dblp": ";;188/6406", "google_scholar": ";;", "orcid": "0000-0003-3971-2630;;", "linkedin": "peter-chang-31718baa/;leor-f-63490785/;", "or_profile": "~Peter_W_Chang1;~Leor_Fishman1;~Seth_Neel1", "aff": "Harvard University;;Harvard University", "aff_domain": "harvard.edu;;harvard.edu", "position": "Researcher;;Assistant Professor", "bibtex": "@misc{\nchang2024model,\ntitle={Model Explanation Disparities as a Fairness Diagnostic},\nauthor={Peter W Chang and Leor Fishman and Seth Neel},\nyear={2024},\nurl={https://openreview.net/forum?id=4MvHiijJL3}\n}", "github": "", "project": "", "reviewers": "tQK6;nvHG;SE4V;ZZsY;5Fo2", "site": "https://openreview.net/forum?id=4MvHiijJL3", "pdf_size": 2289369, "rating": "3;5;5;6;8", "confidence": "4;2;2;3;4", "soundness": "2;2;3;3;3", "contribution": "2;2;2;3;3", "presentation": "3;2;2;3;3", "wc_summary": "97;64;90;63;156", "wc_strengths": "50;18;50;56;60", "wc_weaknesses": "132;63;320;58;177", "wc_questions": "228;37;5;49;199", "wc_review": "507;182;465;226;592", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1246;757;641;354;667", "reply_reviewers": "0;0;0;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 94.0, 33.85262175962151 ], "wc_strengths_avg": [ 46.8, 14.891608375189028 ], "wc_weaknesses_avg": [ 150.0, 95.90203334653546 ], "wc_questions_avg": [ 103.6, 91.34024304762933 ], "wc_review_avg": [ 394.4, 161.35749130424654 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 733.0, 289.898602963174 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13762047064079508, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10004785071373874203&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "4N7v4w2r3b", "title": "Robustness Evaluation of Proxy Models against Adversarial Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Ensuring the robustness of neural network proxies to optimization pressure is crucial as machine learning applications expand across diverse domains. However, research on proxy robustness remains limited and largely unexplored. In this paper, we introduce a comprehensive benchmark for investigating the robustness of neural network proxies under various sources of optimization pressure in the text domain. Through extensive experiments using our benchmark, we uncover previously unknown properties of the proxy gaming problem and highlight serious issues with proxy reward models currently used to fine-tune or monitor large language models. Furthermore, we explore different approaches to enhance proxy robustness and demonstrate the potential of adversarial training to improve alignment between proxy and gold models. Our findings suggest that proxy robustness is a solvable problem that can be incrementally improved, laying the groundwork for future research in this important area.", "keywords": "proxy gaming;reward hacking;specification gaming;misspecification;robustness;adversarial robustness;adversarial attacks;alignment;ai safety", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/c02cda3326c492710ad2b34cd7dde7da94f9636e.zip", "author": "Andy Zou;Long Phan;Nathaniel Li;Jun Shern Chan;Mantas Mazeika;Aidan O'Gara;Steven Basart;Jonathan Ng;Scott Emmons;J Zico Kolter;Matt Fredrikson;Dan Hendrycks", "authorids": "~Andy_Zou1;~Long_Phan1;~Nathaniel_Li1;~Jun_Shern_Chan1;~Mantas_Mazeika3;~Aidan_O'Gara1;~Steven_Basart1;~Jonathan_Ng1;~Scott_Emmons1;~J_Zico_Kolter1;~Matt_Fredrikson1;~Dan_Hendrycks1", "gender": ";M;M;M;M;;M;M;M;M;;M", "homepage": ";;https://nli0.github.io;https://junshern.github.io/;https://github.com/mmazeika;;http://stevenbas.art;;http://scottemmons.com/;https://cs.cmu.edu/~mfredrik;;http://www.zicokolter.com", "dblp": "274/2362;;;319/5188;215/4447;;245/2547;;180/5699;38/2612;182/2504;67/2526", "google_scholar": ";fVRQn4wAAAAJ;2XmBzbcAAAAJ;iUGazLcAAAAJ;;;MzKvJhAAAAAJ;;LoT0z6oAAAAJ;https://scholar.google.com.tw/citations?user=tMYCvLAAAAAJ;;UXh1I6UAAAAJ", "orcid": ";;;;;;;;0000-0002-7946-7046;;;", "linkedin": "andy-zou-09ba3616a/;long-phan-3110/;nli0/;chan-jun-shern/;;abogara/;xksteven/;jonathan-ng-7061a3162/;scott-emmons-5258005b/;;;", "or_profile": "~Andy_Zou1;~Long_Phan1;~Nathaniel_Li1;~Jun_Shern_Chan1;~Mantas_Mazeika3;~Aidan_O'Gara1;~Steven_Basart1;~Jonathan_Ng1;~Scott_Emmons1;~Matt_Fredrikson1;~Dan_Hendrycks1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Center for AI Safety;University of California, Berkeley;OpenAI;University of Illinois, Urbana-Champaign;;Center for AI Safety ;;University of California, Berkeley;Carnegie Mellon University;Center for AI Safety;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;safe.ai;berkeley.edu;openai.com;uiuc.edu;;safe.ai;;berkeley.edu;cmu.edu;safe.ai;cmu.edu", "position": "PhD student;Research Engineer;Undergrad student;Researcher;PhD student;;Researcher;;PhD student;Associate Professor;Executive and Research Director;Full Professor", "bibtex": "@misc{\nzou2024robustness,\ntitle={Robustness Evaluation of Proxy Models against Adversarial Optimization},\nauthor={Andy Zou and Long Phan and Nathaniel Li and Jun Shern Chan and Mantas Mazeika and Aidan O'Gara and Steven Basart and Jonathan Ng and Scott Emmons and J Zico Kolter and Matt Fredrikson and Dan Hendrycks},\nyear={2024},\nurl={https://openreview.net/forum?id=4N7v4w2r3b}\n}", "github": "", "project": "", "reviewers": "8v7i;2dFK;k5g4;tXHZ", "site": "https://openreview.net/forum?id=4N7v4w2r3b", "pdf_size": 3685608, "rating": "5;5;5;6", "confidence": "3;3;3;3", "soundness": "3;2;3;3", "contribution": "2;2;3;4", "presentation": "2;2;2;3", "wc_summary": "100;77;57;69", "wc_strengths": "82;42;52;140", "wc_weaknesses": "251;422;139;196", "wc_questions": "18;6;158;13", "wc_review": "451;547;406;418", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "832;735;422;560", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 15.706288549495072 ], "wc_strengths_avg": [ 79.0, 38.17066936798463 ], "wc_weaknesses_avg": [ 252.0, 105.83713903918606 ], "wc_questions_avg": [ 48.75, 63.21936016759423 ], "wc_review_avg": [ 455.5, 55.33760023709015 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 637.25, 157.94203841916186 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:er6z2Sned7cJ:scholar.google.com/&scioq=Robustness+Evaluation+of+Proxy+Models+against+Adversarial+Optimization&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;1;2;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Center for AI Safety;University of California, Berkeley;OpenAI;University of Illinois", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cmu.edu;https://www.centerforaisafety.org;https://www.berkeley.edu;https://openai.com;https://illinois.edu", "aff_unique_abbr": "CMU;;UC Berkeley;OpenAI;UIUC", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Berkeley;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Weakly-supervised Audio Separation via Bi-modal Semantic Similarity", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19468", "id": "4N97bz1sP6", "author_site": "Tanvir Mahmud, Saeed Amizadeh, Kazuhito Koishida, Diana Marculescu", "tldr": "", "abstract": "Conditional sound separation in multi-source audio mixtures without having access to single source sound data during training is a long standing challenge. Existing mix-and-separate based methods suffer from significant performance drop with multi-source training mixtures due to the lack of supervision signal for single source separation cases during training. However, in the case of language-conditional audio separation, we do have access to corresponding text descriptions for each audio mixture in our training data, which can be seen as (rough) representations of the audio samples in the language modality. That raises the curious question of how to generate supervision signal for single-source audio extraction by leveraging the fact that single-source sounding language entities can be easily extracted from the text description. To this end, in this paper, we propose a generic bi-modal separation framework which can enhance the existing unsupervised frameworks to separate single-source signals in a target modality (i.e., audio) using the easily separable corresponding signals in the conditioning modality (i.e., language), without having access to single-source samples in the target modality during training. We empirically show that this is well within reach if we have access to a pretrained joint embedding model between the two modalities (i.e., CLAP). Furthermore, we propose to incorporate our framework into two fundamental scenarios to enhance separation performance. First, we show that our proposed methodology significantly improves the performance of purely unsupervised baselines by reducing the distribution shift between training and test samples. In particular, we show that our framework can achieve 71% boost in terms of Signal-to-Distortion Ratio (SDR) over the baseline, reaching 97.5% of the supervised learning performance. Second, we show that we can further improve the performance of the supervised learning itself by 17% if we augment it by our proposed weakly-supervised framework. Our framework achieves this by making large corpora of unsupervised data available to the supervised learning model as well as utilizing a natural, robust regularization mechanism through weak supervision from the language modality, and hence enabling a powerful semi-supervised framework for audio separation. Code is released at https://github.com/microsoft/BiModalAudioSeparation.", "keywords": "Audio-language learning;conditional audio separation;unsupervised learning;weakly supervised learning;semi-supervised learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/0bae6ad825e485b1284a1ff933e7b68d7a019c79.pdf", "author": "Tanvir Mahmud;Saeed Amizadeh;Kazuhito Koishida;Diana Marculescu", "authorids": "~Tanvir_Mahmud1;~Saeed_Amizadeh1;~Kazuhito_Koishida1;~Diana_Marculescu4", "gender": ";M;;", "homepage": "https://sites.google.com/view/tanvirmahmud;;https://asgweb-production.azurewebsites.net/applied-sciences/people/kazuhito-koishida;", "dblp": "255/3041;48/8399;;", "google_scholar": "4aZPxRsAAAAJ;eDn7hQUAAAAJ;;", "orcid": ";;;", "linkedin": ";saeed-amizadeh/;;", "or_profile": "~Tanvir_Mahmud1;~Saeed_Amizadeh1;~Kazuhito_Koishida1;~Diana_Marculescu4", "aff": "University of Texas at Austin;Microsoft;Microsoft Corporation;", "aff_domain": "utexas.edu;microsoft.com;microsoft.com;", "position": "PhD student;Researcher;Researcher;", "bibtex": "@inproceedings{\nmahmud2024weaklysupervised,\ntitle={Weakly-supervised Audio Separation via Bi-modal Semantic Similarity},\nauthor={Tanvir Mahmud and Saeed Amizadeh and Kazuhito Koishida and Diana Marculescu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4N97bz1sP6}\n}", "github": "", "project": "", "reviewers": "SLiU;ScdR;fPmw", "pdf_size": 11227543, "rating": "6;6;8", "confidence": "5;4;3", "soundness": "2;2;3", "contribution": "3;2;3", "presentation": "3;3;3", "wc_summary": "185;92;133", "wc_strengths": "134;78;60", "wc_weaknesses": "649;256;21", "wc_questions": "31;17;77", "wc_review": "999;443;291", "wc_reply_reviewers": "40;14;19", "wc_reply_authors": "867;768;729", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 136.66666666666666, 38.055515004033545 ], "wc_strengths_avg": [ 90.66666666666667, 31.510139461590594 ], "wc_weaknesses_avg": [ 308.6666666666667, 259.0705609588931 ], "wc_questions_avg": [ 41.666666666666664, 25.62984371565478 ], "wc_review_avg": [ 577.6666666666666, 304.3214674576139 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 11.264496832477201 ], "wc_reply_authors_avg": [ 788.0, 58.08614292583042 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10153359908957697598&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=4N97bz1sP6", "pdf": "https://openreview.net/pdf?id=4N97bz1sP6", "email": "utexas.edu;microsoft.com;microsoft.com;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Texas at Austin;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.utexas.edu;https://www.microsoft.com", "aff_unique_abbr": "UT Austin;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DAM: Towards a Foundation Model for Forecasting", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19467", "id": "4NhMhElWqP", "author_site": "Luke Darlow, Qiwen Deng, Ahmed Hassan, Martin Asenov, Rajkarn Singh, Artjom Joosen, Adam Barker, Amos Storkey", "tldr": "", "abstract": "It is challenging to scale time series forecasting models such that they forecast accurately for multiple distinct domains and datasets, all with potentially different underlying collection procedures (e.g., sample resolution), patterns (e.g., periodicity), and prediction requirements (e.g., reconstruction vs. forecasting). We call this general task universal forecasting. Existing methods usually assume that input data is regularly sampled, and they forecast to pre-determined horizons, resulting in failure to generalise outside of the scope of their training. We propose the DAM -- a neural model that takes randomly sampled histories and outputs an adjustable basis composition as a continuous function of time for forecasting to non-fixed horizons. It involves three key components: (1) a flexible approach for using randomly sampled histories from a long-tail distribution, that enables an efficient global perspective of the underlying temporal dynamics while retaining focus on the recent history; (2) a transformer backbone that is trained on these actively sampled histories to produce, as representational output, (3) the basis coefficients of a continuous function of time. We show that a single univariate DAM, trained on 25 time series datasets, either outperformed or closely matched existing SoTA models at multivariate long-term forecasting across 18 datasets, including 8 held-out for zero-shot transfer, even though these models were trained to specialise for each dataset-horizon combination. This single DAM excels at zero-shot transfer and very-long-term forecasting, performs well at imputation, is interpretable via basis function composition and attention, can be tuned for different inference-cost requirements, is robust to missing and irregularly sampled data by design.", "keywords": "Forecasting;time series;foundation model;transfer learning;imputation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/b7f533f90084341c1ba61614bba4570106a95509.zip", "author": "Luke Nicholas Darlow;Qiwen Deng;Ahmed Hassan;Martin Asenov;Rajkarn Singh;Artjom Joosen;Adam Barker;Amos Storkey", "authorids": "~Luke_Nicholas_Darlow1;~Qiwen_Deng1;~Ahmed_Hassan7;~Martin_Asenov1;~Rajkarn_Singh1;~Artjom_Joosen1;~Adam_Barker1;~Amos_Storkey1", "gender": ";M;M;M;M;M;M;Not Specified", "homepage": ";;;https://masenov.com/;;https://www.linkedin.com/in/artjom-joosen-9774381b4;;http://homepages.inf.ed.ac.uk/amos/", "dblp": ";;;;149/5226;;24/1959;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;zFULG8IAAAAJ;8IlCysUAAAAJ;2jepixkAAAAJ;;", "orcid": ";;;;;;;", "linkedin": ";https://www.linkedin.com/me?trk=p_mwlite_feed_updates-secondary_nav;ahmed-hassan-9399a8291/;;rajkarn/;;;", "or_profile": "~Luke_Nicholas_Darlow1;~Qiwen_Deng1;~Ahmed_Hassan7;~Martin_Asenov1;~Rajkarn_Singh1;~Artjom_Joosen1;~Adam_Barker1;~Amos_Storkey1", "aff": ";Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Research, UK.;Huawei Technologies Ltd.;University of St Andrews;University of Edinburgh", "aff_domain": ";huawei.com;huawei.com;huawei.com;huawei.com;huawei.com;st-andrews.ac.uk;ed.ac.uk", "position": ";Researcher;Researcher;Researcher;Researcher;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndarlow2024dam,\ntitle={{DAM}: Towards a Foundation Model for Forecasting},\nauthor={Luke Nicholas Darlow and Qiwen Deng and Ahmed Hassan and Martin Asenov and Rajkarn Singh and Artjom Joosen and Adam Barker and Amos Storkey},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4NhMhElWqP}\n}", "github": "", "project": "", "reviewers": "sH5F;wavr;PhNp;yXbk", "pdf_size": 7798906, "rating": "6;6;8;8", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "contribution": "2;3;4;3", "presentation": "3;2;3;3", "wc_summary": "82;97;224;178", "wc_strengths": "52;73;44;71", "wc_weaknesses": "49;355;200;96", "wc_questions": "13;120;120;61", "wc_review": "196;645;588;406", "wc_reply_reviewers": "0;247;596;0", "wc_reply_authors": "869;1915;3134;1328", "reply_reviewers": "0;2;4;0", "reply_authors": "2;6;6;4", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 145.25, 58.31541391433315 ], "wc_strengths_avg": [ 60.0, 12.349089035228468 ], "wc_weaknesses_avg": [ 175.0, 117.4116689260484 ], "wc_questions_avg": [ 78.5, 44.83581157958446 ], "wc_review_avg": [ 458.75, 175.50979317405626 ], "wc_reply_reviewers_avg": [ 210.75, 244.2144293443776 ], "wc_reply_authors_avg": [ 1811.5, 848.7928192438953 ], "reply_reviewers_avg": [ 1.5, 1.6583123951777 ], "reply_authors_avg": [ 4.5, 1.6583123951777 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "openreview": "https://openreview.net/forum?id=4NhMhElWqP", "pdf": "https://openreview.net/pdf?id=4NhMhElWqP", "email": ";huawei.com;huawei.com;huawei.com;huawei.com;huawei.com;st-andrews.ac.uk;ed.ac.uk", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;2", "aff_unique_norm": "Huawei;University of St Andrews;University of Edinburgh", "aff_unique_dep": "Huawei Technologies;;", "aff_unique_url": "https://www.huawei.com;https://www.st-andrews.ac.uk;https://www.ed.ac.uk", "aff_unique_abbr": "Huawei;St Andrews;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;1;1", "aff_country_unique": "China;United Kingdom" }, { "id": "4P76wCt9N5", "title": "DAG-based Generative Regression", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Standard regression models address associations between targeted dependent variables and selected independent variables. This paper generalizes this by proposing DAG-based generative regression as a generative process in which the model learns the data generation mechanism from real data. DAG is explicitly involved in the generative process by using structural equation models to capture the data generation mechanisms among the data variables. We learn DAG by reconstructing the model to replicate the real data distribution. We have conducted experiments to measure the performance of our algorithm to show that the results outperform the state-of-the-art by a significantly large margin.", "keywords": "Generative regression modeling;DAG-learning;Generative adversarial learning;Causal discovery;Additive noise model", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Hristo Petkov;Feng Dong;Calum Robert MacLellan", "authorids": "~Hristo_Petkov1;~Feng_Dong2;~Calum_Robert_MacLellan1", "gender": "M;M;M", "homepage": "https://www.linkedin.com/in/hristo-petkov-628007215;https://www.strath.ac.uk/staff/dongfengprofessor/;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;0000-0002-5262-8532", "linkedin": "hristo-petkov-628007215;;", "or_profile": "~Hristo_Petkov1;~Feng_Dong2;~Calum_Robert_MacLellan1", "aff": "University of Strathclyde;University of Strathclyde;", "aff_domain": "strath.ac.uk;strath.ac.uk;", "position": "PhD student;Full Professor;", "bibtex": "@misc{\npetkov2024dagbased,\ntitle={{DAG}-based Generative Regression},\nauthor={Hristo Petkov and Feng Dong and Calum Robert MacLellan},\nyear={2024},\nurl={https://openreview.net/forum?id=4P76wCt9N5}\n}", "github": "", "project": "", "reviewers": "ay5v;XSCR;F2tX;K8HK;sCo4", "site": "https://openreview.net/forum?id=4P76wCt9N5", "pdf_size": 897466, "rating": "3;3;3;3;3", "confidence": "3;5;3;4;4", "soundness": "3;2;2;1;2", "contribution": "2;1;2;1;2", "presentation": "2;3;2;1;2", "wc_summary": "71;45;37;87;39", "wc_strengths": "36;17;24;8;50", "wc_weaknesses": "89;67;29;755;43", "wc_questions": "134;11;41;2;38", "wc_review": "330;140;131;852;170", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.0, 0.6324555320336759 ], "contribution_avg": [ 1.6, 0.4898979485566356 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 55.8, 19.78282083020518 ], "wc_strengths_avg": [ 27.0, 14.696938456699069 ], "wc_weaknesses_avg": [ 196.6, 279.95256741098126 ], "wc_questions_avg": [ 45.2, 46.88453902940712 ], "wc_review_avg": [ 324.6, 273.36393324650567 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0L4cYs1e3LEJ:scholar.google.com/&scioq=DAG-based+Generative+Regression&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Strathclyde", "aff_unique_dep": "", "aff_unique_url": "https://www.strath.ac.uk", "aff_unique_abbr": "Strathclyde", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "4P8itxA1oA", "title": "Set Features for Anomaly Detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper proposes set features for detecting anomalies in samples that consist of unusual combinations of normal elements. Most methods, discover anomalies by detecting an unusual part of a sample. For example, state-of-the-art segmentation-based approaches, first classify each element of the sample (e.g., image patch) as normal or anomalous and then classify the entire sample as anomalous if it contains anomalous elements. However, such approaches do not extend well to scenarios where the anomalies are expressed by an unusual combination of normal elements. In this paper, we overcome this limitation by proposing set features that model each sample by the distribution of its elements. We compute the anomaly score of each sample using a simple density estimation method. Our simple-to-implement approach outperforms the state-of-the-art in image-level logical anomaly detection (+5.2%) and sequence-level time series anomaly detection (+2.4%).", "keywords": "Anomaly Detection;Logical Anomaly Detection;Set Anomaly Detection;Time-Series", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/dfcb40befec813ee2d6d02482222c8b5e0948ba3.zip", "author": "Niv Cohen;Issar Tzachor;Yedid Hoshen", "authorids": "~Niv_Cohen1;~Issar_Tzachor1;~Yedid_Hoshen3", "gender": "M;;M", "homepage": "https://www.cs.huji.ac.il/w~nivc/;;https://www.cs.huji.ac.il/~ydidh/", "dblp": "259/2291;;136/0280", "google_scholar": "https://scholar.google.co.il/citations?user=ZMdC3OQAAAAJ;;https://scholar.google.co.il/citations?user=6y1-qS4AAAAJ", "orcid": ";;", "linkedin": "niv-cohen-39b49521/;;", "or_profile": "~Niv_Cohen1;~Issar_Tzachor1;~Yedid_Hoshen3", "aff": "Hebrew University of Jerusalem;;Google", "aff_domain": "huji.ac.il;;google.com", "position": "PhD student;;Researcher", "bibtex": "@misc{\ncohen2024set,\ntitle={Set Features for Anomaly Detection},\nauthor={Niv Cohen and Issar Tzachor and Yedid Hoshen},\nyear={2024},\nurl={https://openreview.net/forum?id=4P8itxA1oA}\n}", "github": "", "project": "", "reviewers": "JUmH;if8q;Eqh9;Najr", "site": "https://openreview.net/forum?id=4P8itxA1oA", "pdf_size": 3383639, "rating": "3;3;5;5", "confidence": "5;5;4;5", "soundness": "2;1;3;2", "contribution": "2;2;2;2", "presentation": "3;3;2;1", "wc_summary": "53;58;76;49", "wc_strengths": "19;47;9;43", "wc_weaknesses": "206;261;210;196", "wc_questions": "8;23;81;57", "wc_review": "286;389;376;345", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "138;277;200;68", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 59.0, 10.319883720275147 ], "wc_strengths_avg": [ 29.5, 15.960889699512368 ], "wc_weaknesses_avg": [ 218.25, 25.202926417382564 ], "wc_questions_avg": [ 42.25, 28.560243346302215 ], "wc_review_avg": [ 349.0, 39.73034105063786 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 170.75, 77.09531438420885 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=817268210022908330&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Hebrew University of Jerusalem;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.huji.ac.il;https://www.google.com", "aff_unique_abbr": "HUJI;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Jerusalem;Mountain View", "aff_country_unique_index": "0;1", "aff_country_unique": "Israel;United States" }, { "id": "4PzxLPEGRn", "title": "OCAtari: Object-Centric Atari 2600 Reinforcement Learning Environments", "track": "main", "status": "Reject", "tldr": "", "abstract": "Cognitive science and psychology suggest that object-centric representations of complex scenes are a promising step towards enabling efficient abstract reasoning from low-level perceptual features. Yet, most deep reinforcement learning approaches rely on only pixel-based representations that do not capture the compositional properties of natural scenes. For this, we need environments and datasets that allow us to work and evaluate object-centric approaches. We present OCAtari, a set of environment that provides object-centric state representations of Atari games, the most-used evaluation framework for deep RL approaches. OCAtari also allows for RAM state manipulations of the games to change and create specific or even novel situations. Our source code is available at\nhttps://anonymous.4open.science/r/OCAtari-52B9 .", "keywords": "Object Centric;Reinforcement Learning;Framework;Environments;Object Detection;Object Discovery", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Quentin Delfosse;Jannis Bl\u00fcml;Bjarne Gregori;Sebastian Sztwiertnia;Kristian Kersting", "authorids": "~Quentin_Delfosse1;~Jannis_Bl\u00fcml1;~Bjarne_Gregori1;~Sebastian_Sztwiertnia1;~Kristian_Kersting1", "gender": "M;M;M;;M", "homepage": "https://quentindelfosse.me/;;https://github.com/BJenrajb;;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "286/1466.html;;;297/9076.html;40/3793", "google_scholar": "k1E0FgIAAAAJ;;https://scholar.google.de/citations?hl=de;;QY-earAAAAAJ", "orcid": ";0000-0002-9400-0946;;;0000-0002-2873-9152", "linkedin": "quentin-delfosse-70b377150/;;;;", "or_profile": "~Quentin_Delfosse1;~Jannis_Bl\u00fcml1;~Bjarne_Gregori1;~Sebastian_Sztwiertnia1;~Kristian_Kersting1", "aff": "CS Department, TU Darmstadt, TU Darmstadt;Technische Universit\u00e4t Darmstadt;;Technische Universit\u00e4t Darmstadt;TU Darmstadt", "aff_domain": "cs.tu-darmstadt.de;tu-darmstadt.de;;tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;PhD student;;PhD student;Full Professor", "bibtex": "@misc{\ndelfosse2024ocatari,\ntitle={{OCA}tari: Object-Centric Atari 2600 Reinforcement Learning Environments},\nauthor={Quentin Delfosse and Jannis Bl{\\\"u}ml and Bjarne Gregori and Sebastian Sztwiertnia and Kristian Kersting},\nyear={2024},\nurl={https://openreview.net/forum?id=4PzxLPEGRn}\n}", "github": "", "project": "", "reviewers": "Lfyp;7uXz;BhyG;wGg4", "site": "https://openreview.net/forum?id=4PzxLPEGRn", "pdf_size": 1000480, "rating": "3;5;5;5", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "contribution": "2;2;2;2", "presentation": "2;3;3;3", "wc_summary": "82;138;81;66", "wc_strengths": "102;45;23;69", "wc_weaknesses": "118;174;21;305", "wc_questions": "216;120;279;39", "wc_review": "518;477;404;479", "wc_reply_reviewers": "0;89;0;190", "wc_reply_authors": "961;479;349;553", "reply_reviewers": "0;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.75, 27.444261695297982 ], "wc_strengths_avg": [ 59.75, 29.32042803234632 ], "wc_weaknesses_avg": [ 154.5, 102.69493658404001 ], "wc_questions_avg": [ 163.5, 91.5 ], "wc_review_avg": [ 469.5, 41.19769411022904 ], "wc_reply_reviewers_avg": [ 69.75, 78.35934851694468 ], "wc_reply_authors_avg": [ 585.5, 228.76352419037437 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=959748260740375104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "4QaKdsh15T", "title": "An Embodied Generalist Agent in 3D World", "track": "main", "status": "Reject", "tldr": "", "abstract": "Leveraging massive knowledge and learning schemes from large language models (LLMs), recent machine learning models show notable successes in building generalist agents that exhibit the capability of general-purpose task solving in diverse domains, including natural language processing, computer vision, and robotics. However, a significant challenge remains as these models exhibit limited ability in understanding and interacting with the 3D world. We argue this limitation significantly hinders the current models from performing real-world tasks and further achieving general intelligence. To this end, we introduce an embodied multi-modal and multi-task generalist agent that excels in perceiving, grounding, reasoning, planning, and acting in the 3D world. Our proposed agent, referred to as LEO, is trained with shared LLM-based model architectures, objectives, and weights in two stages: (i) 3D vision-language alignment and (ii) 3D vision-language-action instruction tuning. To facilitate the training, we meticulously curate and generate an extensive dataset comprising object-level and scene-level multi-modal tasks with exceeding scale and complexity, necessitating a deep understanding of and interaction with the 3D world. Through rigorous experiments, we demonstrate LEO's remarkable proficiency across a wide spectrum of tasks, including 3D captioning, question answering, embodied reasoning, embodied navigation, and robotic manipulation. Our ablation results further provide valuable insights for the development of future embodied generalist agents.", "keywords": "embodied generalist agent;3D generalist agent;large language model;3D vision-language;vision-language-action;multi-modal instruction tuning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/6bd0966288935d52a57036c358525c3de089d392.zip", "author": "Jiangyong Huang;Silong Yong;Xiaojian Ma;Xiongkun Linghu;Puhao Li;Yan Wang;Qing Li;Song-Chun Zhu;Baoxiong Jia;Siyuan Huang", "authorids": "~Jiangyong_Huang1;~Silong_Yong1;~Xiaojian_Ma1;~Xiongkun_Linghu1;~Puhao_Li1;~Yan_Wang30;~Qing_Li1;~Song-Chun_Zhu1;~Baoxiong_Jia1;~Siyuan_Huang2", "gender": ";M;;;M;M;M;M;M;M", "homepage": "https://huangjy-pku.github.io/;https://github.com/SilongYong;;;https://xiaoyao-li.github.io/;https://github.com/luckyWangYan;http://liqing-ustc.github.io/;https://zhusongchun.net/;https://buzz-beater.github.io/;https://siyuanhuang.com/", "dblp": "334/4572;;;;330/4121.html;;181/2689-3;10/10313;206/8738;62/885-1", "google_scholar": ";EitVAcwAAAAJ;;;https://scholar.google.at/citations?user=HTsO18AAAAAJ;;iwdFZBEAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;qIBUK6sAAAAJ;1NN7Ee8AAAAJ", "orcid": "0000-0001-9125-6893;;;0000-0002-4393-4386;0009-0003-2696-9346;;;;0000-0002-4968-3290;", "linkedin": ";;;;;;;;baoxiong-jia-2b6094122?trk=public_post-text;", "or_profile": "~Jiangyong_Huang1;~Silong_Yong1;~Xiaojian_Ma1;~Xiongkun_Linghu1;~Puhao_Li1;~Yan_Wang30;~Qing_Li1;~Song-Chun_Zhu1;~Baoxiong_Jia1;~Siyuan_Huang2", "aff": "Peking University;Carnegie Mellon University;;Beijing Institute for General Artificial Intelligence;Tsinghua University;;Beijing Institute for General Artificial Intelligence (BIGAI);Peking University;Beijing Institute for General Artificial Intelligence;Beijing Institute for General Artificial Intelligence", "aff_domain": "pku.edu.cn;andrew.cmu.edu;;bigai.ai;tsinghua.edu.cn;;bigai.ai;pku.edu.cn;bigai.ai;bigai.ai", "position": "PhD student;MS student;;Researcher;PhD student;;Researcher;Full Professor;Researcher;Researcher", "bibtex": "@misc{\nhuang2024an,\ntitle={An Embodied Generalist Agent in 3D World},\nauthor={Jiangyong Huang and Silong Yong and Xiaojian Ma and Xiongkun Linghu and Puhao Li and Yan Wang and Qing Li and Song-Chun Zhu and Baoxiong Jia and Siyuan Huang},\nyear={2024},\nurl={https://openreview.net/forum?id=4QaKdsh15T}\n}", "github": "", "project": "", "reviewers": "Vn42;bU9n;ADo8;foVx", "site": "https://openreview.net/forum?id=4QaKdsh15T", "pdf_size": 25507949, "rating": "3;5;5;8", "confidence": "4;3;4;4", "soundness": "2;2;2;4", "contribution": "2;3;2;4", "presentation": "2;4;2;4", "wc_summary": "97;120;103;40", "wc_strengths": "28;69;105;102", "wc_weaknesses": "395;249;646;51", "wc_questions": "174;100;96;18", "wc_review": "694;538;950;211", "wc_reply_reviewers": "690;0;0;0", "wc_reply_authors": "4894;1083;2839;580", "reply_reviewers": "3;0;0;0", "reply_authors": "9;3;6;1", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 90.0, 30.074906483645133 ], "wc_strengths_avg": [ 76.0, 31.10466202999158 ], "wc_weaknesses_avg": [ 335.25, 217.00964840301455 ], "wc_questions_avg": [ 97.0, 55.18151864528558 ], "wc_review_avg": [ 598.25, 267.6232192841271 ], "wc_reply_reviewers_avg": [ 172.5, 298.77876430563134 ], "wc_reply_authors_avg": [ 2349.0, 1691.8364282636783 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 4.75, 3.031088913245535 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.08084520834544431, "gs_citation": 143, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8869821326961875122&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3;2;0;2;2", "aff_unique_norm": "Peking University;Carnegie Mellon University;Beijing Institute for General Artificial Intelligence;Tsinghua University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.cmu.edu;http://www.bigaiai.org/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Peking U;CMU;BIGAI;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "4QtywskEyY", "title": "Teaching wiser, Learning smarter: Multi-stage Decoupled Relational Knowledge Distillation with Adaptive Stage Selection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Due to the effectiveness of contrastive-learning-based knowledge distillation methods, there has been a renewed interest on relational knowledge distillation.\nHowever, these methods primarily rely on the transfer of angle-wise information between samples, using only the normalized penultimate layer's output as the knowledge base.\nOur experiments demonstrate that properly harnessing relational information derived from intermediate layers can further improve the effectiveness of distillation.\nMeanwhile, we found that simply adding distance-wise relational information to contrastive-learning-based methods negatively impacts distillation quality, revealing an implicit contention between angle-wise and distance-wise attributes.\nTherefore, we propose a ${\\bf{M}}$ulti-stage ${\\bf{D}}$ecoupled ${\\bf{R}}$elational (MDR) knowledge distillation framework equipped with an adaptive stage selection to identify the stages that maximize the efficacy of transferring the relational knowledge.\nFurthermore, our framework decouples angle-wise and distance-wise information to resolve their conflicts while still preserves complete relational knowledge, thereby resulting in an elevated transferring efficiency and distillation quality.\nTo evaluate the proposed method, we conduct extensive experiments on multiple image benchmarks ($\\textit{i.e.}$ CIFAR100, ImageNet and Pascal VOC), covering various tasks ($\\textit{i.e.}$ classification, few-shot learning, transfer learning and object detection). \nOur method exhibits superior performance under diverse scenarios, surpassing the state of the art by an average improvement of 1.08\\% on CIFAR-100 across extensively utilized teacher-student network pairs.", "keywords": "relation-based knowledge distillation;multi-stage;decouple;contrastive learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "JiaQi Wang;Lu Lu;Mingmin Chi;Jian Chen", "authorids": "~JiaQi_Wang7;~Lu_Lu7;~Mingmin_Chi2;~Jian_Chen12", "gender": ";M;F;M", "homepage": "https://github.com/NickiJQ;https://cn.aliyun.com;https://faculty.fudan.edu.cn/chimingmin/zh_CN/zdylm/644183/list/index.htm;http://lca.ece.utexas.edu/people/jchen", "dblp": ";;03/2079.html;", "google_scholar": ";;;", "orcid": ";0009-0008-6524-802X;0000-0003-2650-4146;", "linkedin": ";;;", "or_profile": "~JiaQi_Wang7;~Lu_Lu7;~Mingmin_Chi2;~Jian_Chen12", "aff": "Fudan University;Alibaba Group;Fudan University;Alibaba Group", "aff_domain": "fudan.edu.cn;alibaba-inc.com;fudan.edu.cn;alibaba-inc.com", "position": "MS student;Expert Software Engineer;Associate Professor;Researcher", "bibtex": "@misc{\nwang2024teaching,\ntitle={Teaching wiser, Learning smarter: Multi-stage Decoupled Relational Knowledge Distillation with Adaptive Stage Selection},\nauthor={JiaQi Wang and Lu Lu and Mingmin Chi and Jian Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=4QtywskEyY}\n}", "github": "", "project": "", "reviewers": "zgd9;WUNU;qoFL;G2bH", "site": "https://openreview.net/forum?id=4QtywskEyY", "pdf_size": 4791349, "rating": "5;5;6;8", "confidence": "4;4;5;4", "soundness": "3;2;2;3", "contribution": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "31;93;61;127", "wc_strengths": "79;45;14;55", "wc_weaknesses": "98;323;77;393", "wc_questions": "1;10;41;117", "wc_review": "209;471;193;692", "wc_reply_reviewers": "0;0;0;34", "wc_reply_authors": "495;638;382;434", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.0, 35.79106033634656 ], "wc_strengths_avg": [ 48.25, 23.31710745354149 ], "wc_weaknesses_avg": [ 222.75, 137.69599667383218 ], "wc_questions_avg": [ 42.25, 45.63647116068463 ], "wc_review_avg": [ 391.25, 205.747873622062 ], "wc_reply_reviewers_avg": [ 8.5, 14.722431864335457 ], "wc_reply_authors_avg": [ 487.25, 95.7845890527281 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TwaiuYAM8i4J:scholar.google.com/&scioq=Teaching+wiser,+Learning+smarter:+Multi-stage+Decoupled+Relational+Knowledge+Distillation+with+Adaptive+Stage+Selection&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Fudan University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Fudan;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "4Qz9BT4mpM", "title": "Predicting the Performance of Foundation Models via Agreement-on-the-line", "track": "main", "status": "Reject", "tldr": "", "abstract": "Estimating out-of-distribution performance is critical to safely deploying machine learning models. Recently, Baek et al. showed that the phenomenon ``agreement-on-the-line'' can be a reliable method for predicting OOD accuracy of models in an ensemble consisting largely of CNNs trained from scratch. However, it is now increasingly common to lightly fine-tune foundation models, and it is unclear whether such fine-tuning is sufficient to produce enough diversity in models for such agreement-based methods to work properly. In this paper, we develop methods for reliably applying agreement-on-the-line-based performance estimation to fine-tuned foundation models. In particular, we first study the case of fine-tuning a single foundation model, where we extensively study how different types of randomness (linear head initialization, hyperparameter selection, data subsetting, and data shuffling) contribute to the agreement on the line of the resulting model sets; we find, somewhat surprisingly, that it is typically possible to obtain strong agreement via random initialization of the linear head alone. Next, we study how \\emph{multiple} foundation models, pretrained on different data sets but fine-tuned on the same task, may or may not produce agreement; we show, again rather surprisingly, that the diversity of such models is already sufficient and not too disparate for them to all lie on the same agreement lines. In total, these methods enable reliable and efficient estimation of OOD accuracy for fine-tuned foundation models, without leveraging any labeled OOD data.", "keywords": "robustness;OOD performance estimation;foundation model safety", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Aman Mehra;Rahul Saxena;Taeyoun Kim;Christina Baek;J Zico Kolter;Aditi Raghunathan", "authorids": "~Aman_Mehra1;~Rahul_Saxena1;~Taeyoun_Kim1;~Christina_Baek2;~J_Zico_Kolter1;~Aditi_Raghunathan1", "gender": ";M;M;;F;M", "homepage": ";https://www.linkedin.com/in/rahul-saxena-cs/;https://danielkty.github.io;https://kebaek.github.io;https://www.cs.cmu.edu/~aditirag/;http://www.zicokolter.com", "dblp": ";;122/6365;202/7238;166/1409;67/2526", "google_scholar": ";;;;Ch9iRwQAAAAJ;UXh1I6UAAAAJ", "orcid": ";;;;;", "linkedin": "am-me/;;;;;", "or_profile": "~Aman_Mehra1;~Rahul_Saxena1;~Taeyoun_Kim1;~Christina_Baek2;~Aditi_Raghunathan1;~Zico_Kolter1", "aff": "Carnegie Mellon University;;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;;cs.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "MS student;;MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nmehra2024predicting,\ntitle={Predicting the Performance of Foundation Models via Agreement-on-the-line},\nauthor={Aman Mehra and Rahul Saxena and Taeyoun Kim and Christina Baek and J Zico Kolter and Aditi Raghunathan},\nyear={2024},\nurl={https://openreview.net/forum?id=4Qz9BT4mpM}\n}", "github": "", "project": "", "reviewers": "sUjj;47Ds;BrfQ;vvH6", "site": "https://openreview.net/forum?id=4Qz9BT4mpM", "pdf_size": 1808454, "rating": "5;6;6;6", "confidence": "4;3;4;3", "soundness": "3;3;3;2", "contribution": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "166;306;100;97", "wc_strengths": "149;77;82;289", "wc_weaknesses": "409;226;207;439", "wc_questions": "95;106;32;152", "wc_review": "819;715;421;977", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2126;1750;1818;2120", "reply_reviewers": "0;0;0;0", "reply_authors": "4;4;4;5", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 167.25, 84.721234056168 ], "wc_strengths_avg": [ 149.25, 85.54640553524152 ], "wc_weaknesses_avg": [ 320.25, 104.50687776409742 ], "wc_questions_avg": [ 96.25, 42.81573892857625 ], "wc_review_avg": [ 733.0, 202.85462775100794 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1953.5, 171.209666783158 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5847648953960085500&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4SmhpF1nO4", "title": "Tabular Deep-SMOTE: A supervised autoencoder-based minority-oversampling technique for class-imbalanced tabular classification", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Class imbalance, present in many real-world tabular datasets, may cause machine-learning models to under-classify minority samples, which are often highly significant. This work proposes a new oversampling method called Tabular Deep-SMOTE (TD-SMOTE), which harnesses the class labels to improve synthetic sample generation via autoencoders. The method is based on oversampling in an alternative space shaped by a metric-learning loss. Such spaces tend to be more semantic and obtain higher class separation and density, which improves the quality of samples generated by linear interpolations over the observed minority samples. In addition, we propose a synthetic samples filtering scheme based on the decision boundary of a pre-trained tabular classifier to guarantee the quality of synthetic samples. Compared to common and leading oversampling methods, the method achieves improved classification performance in an extensive set of experiments that includes over 36 publicly available datasets.", "keywords": "supervised learning;tabular data;imbalanced datasets;classification;minority oversampling", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/2b92313bd939007a9d662a74245c38664934b7f8.zip", "author": "Daniel Lawton;Moshe Salhov;Ofir Lindenbaum;Amir Averbuch", "authorids": "~Daniel_Lawton1;~Moshe_Salhov1;~Ofir_Lindenbaum1;~Amir_Averbuch1", "gender": ";M;M;M", "homepage": ";;https://www.eng.biu.ac.il/lindeno/;http://www.cs.tau.ac.il/~amir1/", "dblp": ";;142/4140;", "google_scholar": ";oxtRoo8AAAAJ;https://scholar.google.co.il/citations?user=jXxk6gcAAAAJ;https://scholar.google.com.tw/citations?user=W5hdNOgAAAAJ", "orcid": ";;;0000-0002-3664-1489", "linkedin": ";;;", "or_profile": "~Daniel_Lawton1;~Moshe_Salhov1;~Ofir_Lindenbaum1;~Amir_Averbuch1", "aff": ";;Bar-Ilan University;Tel Aviv University", "aff_domain": ";;biu.ac.il;tau.ac.il", "position": ";;Assistant Professor;Full Professor", "bibtex": "@misc{\nlawton2024tabular,\ntitle={Tabular Deep-{SMOTE}: A supervised autoencoder-based minority-oversampling technique for class-imbalanced tabular classification},\nauthor={Daniel Lawton and Moshe Salhov and Ofir Lindenbaum and Amir Averbuch},\nyear={2024},\nurl={https://openreview.net/forum?id=4SmhpF1nO4}\n}", "github": "", "project": "", "reviewers": "KYqz;Uo1S;RWeJ;iTrN", "site": "https://openreview.net/forum?id=4SmhpF1nO4", "pdf_size": 2513016, "rating": "3;3;3;3", "confidence": "3;5;2;3", "soundness": "2;2;2;3", "contribution": "2;1;1;3", "presentation": "2;1;2;4", "wc_summary": "52;68;46;53", "wc_strengths": "19;7;10;47", "wc_weaknesses": "279;221;247;249", "wc_questions": "2;156;1;48", "wc_review": "352;452;304;397", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 54.75, 8.104782538723663 ], "wc_strengths_avg": [ 20.75, 15.785673884886892 ], "wc_weaknesses_avg": [ 249.0, 20.54263858417414 ], "wc_questions_avg": [ 51.75, 63.1124987621311 ], "wc_review_avg": [ 376.25, 54.71916940159088 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Js6F-3aYw4MJ:scholar.google.com/&scioq=Tabular+Deep-SMOTE:+A+supervised+autoencoder-based+minority-oversampling+technique+for+class-imbalanced+tabular+classification&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Bar-Ilan University;Tel Aviv University", "aff_unique_dep": ";", "aff_unique_url": "https://www.biu.ac.il;https://www.tau.ac.il", "aff_unique_abbr": "BIU;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "4SrzKsJocx", "title": "Simultaneous Dimensionality Reduction: A Data Efficient Approach for Multimodal Representations Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Current experiments frequently produce high-dimensional, multimodal datasets\u2014such as those combining neural activity and animal behavior or gene expression and phenotypic profiling\u2014with the goal of extracting useful correlations between the modalities. Often, the first step in analyzing such datasets is dimensionality reduction. We explore two primary classes of approaches to dimensionality reduction: Independent Dimensionality Reduction (IDR) and Simultaneous Dimensionality Reduction (SDR). In IDR methods, of which Principal Components Analysis is a paradigmatic example, each modality is compressed independently, striving to retain as much variation within each modality as possible. In contrast, in SDR, one simultaneously compresses the modalities to maximize the covariation between the reduced descriptions while paying less attention to how much individual variation is preserved. Paradigmatic examples include Partial Least Squares and Canonical Correlations Analysis. Even though these dimensionality reduction methods are a staple of statistics, their relative accuracy and data set size requirements are poorly understood. We introduce a generative linear model to synthesize multimodal data with known variance and covariance structures to examine these questions. We assess the accuracy of the reconstruction of the covariance structures as a function of the number of samples, signal-to-noise ratio, and the number of varying and covarying signals in the data. Using numerical experiments, we demonstrate that SDR methods consistently outperform IDR methods and yield higher-quality, more succinct reduced-dimensional representations at smaller dataset sizes. Remarkably, regularized CCA can identify low-dimensional weak covarying structures even when the number of samples is much smaller than the dimensionality of the data, a challenge known to affect all dimensionality reduction methods. Our work corroborates and explains previous observations in the literature that SDR can be more effective in detecting covariation patterns in data. These findings suggest that SDR should be preferred to IDR in real-world data analysis when detecting covariation is more important than preserving variation.", "keywords": "Dimensionality reduction;Independent Dimensionality Reduction (IDR);Simultaneous Dimensionality Reduction (SDR);PCA;PLS;CCA;regularized CCA;Multimodal data analysis.", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Eslam Abdelaleem;Ahmed Roman;K. Michael Martini;Ilya Nemenman", "authorids": "~Eslam_Abdelaleem1;~Ahmed_Roman1;~K._Michael_Martini1;~Ilya_Nemenman1", "gender": ";M;M;Not Specified", "homepage": ";https://connects.catalyst.harvard.edu/Profiles/display/Person/210375;https://guava.physics.uiuc.edu/~martini/;https://physics.emory.edu/people/bios/nemenman-ilya.html", "dblp": ";;;83/5659.html", "google_scholar": ";vKAd0r4AAAAJ;ztH4760AAAAJ;https://scholar.google.com.tw/citations?user=enjXdH4AAAAJ", "orcid": "0009-0006-9429-3589;0000-0003-1845-9533;0000-0002-5946-1122;0000-0003-3024-4244", "linkedin": "eslamalaa/;ahmed-roman-2a8a7868/;;", "or_profile": "~Eslam_Abdelaleem1;~Ahmed_Roman1;~K._Michael_Martini1;~Ilya_Nemenman1", "aff": "Emory University;Harvard Medical School;Emory University;Emory University", "aff_domain": "emory.edu;hms.harvard.edu;emory.edu;emory.edu", "position": "PhD student;Postdoc;Postdoc;Full Professor", "bibtex": "@misc{\nabdelaleem2024simultaneous,\ntitle={Simultaneous Dimensionality Reduction: A Data Efficient Approach for Multimodal Representations Learning},\nauthor={Eslam Abdelaleem and Ahmed Roman and K. Michael Martini and Ilya Nemenman},\nyear={2024},\nurl={https://openreview.net/forum?id=4SrzKsJocx}\n}", "github": "", "project": "", "reviewers": "KsZw;7S2y;7ULi;di89;595z", "site": "https://openreview.net/forum?id=4SrzKsJocx", "pdf_size": 2253801, "rating": "3;3;3;5;5", "confidence": "4;4;4;2;4", "soundness": "2;2;3;3;3", "contribution": "2;1;1;2;2", "presentation": "2;3;3;3;3", "wc_summary": "49;78;19;73;93", "wc_strengths": "25;6;7;39;28", "wc_weaknesses": "89;262;150;127;61", "wc_questions": "395;13;9;4;177", "wc_review": "558;359;185;243;359", "wc_reply_reviewers": "0;288;48;0;45", "wc_reply_authors": "830;397;651;126;362", "reply_reviewers": "0;1;1;0;1", "reply_authors": "2;1;1;1;2", "rating_avg": [ 3.8, 0.9797958971132712 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 1.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 62.4, 25.904439773907484 ], "wc_strengths_avg": [ 21.0, 12.727922061357855 ], "wc_weaknesses_avg": [ 137.8, 69.23987290571813 ], "wc_questions_avg": [ 119.6, 152.38057619001182 ], "wc_review_avg": [ 340.8, 127.80985877466574 ], "wc_reply_reviewers_avg": [ 76.2, 107.92664175262752 ], "wc_reply_authors_avg": [ 473.2, 243.98311416981298 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13215423161228151904&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Emory University;Harvard University", "aff_unique_dep": ";Medical School", "aff_unique_url": "https://www.emory.edu;https://hms.harvard.edu", "aff_unique_abbr": "Emory;HMS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Rayleigh Quotient Graph Neural Networks for Graph-level Anomaly Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19466", "id": "4UIBysXjVq", "author_site": "Xiangyu Dong, Xingyi Zhang, Sibo WANG", "tldr": "", "abstract": "Graph-level anomaly detection has gained significant attention as it finds applications in various domains, such as cancer diagnosis and enzyme prediction. However, existing methods fail to capture the spectral properties of graph anomalies, resulting in unexplainable framework design and unsatisfying performance. In this paper, we re-investigate the spectral differences between anomalous and normal graphs. Our main observation shows a significant disparity in the accumulated spectral energy between these two classes. Moreover, we prove that the accumulated spectral energy of the graph signal can be represented by its Rayleigh Quotient, indicating that the Rayleigh Quotient is a driving factor behind the anomalous properties of graphs. Motivated by this, we propose Rayleigh Quotient Graph Neural Network (RQGNN), the first spectral GNN that explores the inherent spectral features of anomalous graphs for graph-level anomaly detection. Specifically, we introduce a novel framework with two components: the Rayleigh Quotient learning component (RQL) and Chebyshev Wavelet GNN with RQ-pooling (CWGNN-RQ). RQL explicitly captures the Rayleigh Quotient of graphs and CWGNN-RQ implicitly explores the spectral space of graphs. Extensive experiments on 10 real-world datasets show that RQGNN outperforms the best rival by 6.74% in Macro-F1 score and 1.44% in AUC, demonstrating the effectiveness of our framework. Our code is available at https://github.com/xydong127/RQGNN.", "keywords": "Graph-level anomaly detection;Spectral GNN;Rayleigh Quotient", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Xiangyu Dong;Xingyi Zhang;Sibo Wang", "authorids": "~Xiangyu_Dong2;~Xingyi_Zhang1;~Sibo_Wang3", "gender": ";M;M", "homepage": "https://xydong127.github.io/;http://josiah96zhang.github.io/;https://www1.se.cuhk.edu.hk/~swang/", "dblp": "84/3152-2.html;93/1107-3;131/6529-1", "google_scholar": ";j_o_XDkAAAAJ;b2gLqsgAAAAJ", "orcid": "0009-0009-6312-8160;0000-0001-5203-5916;0000-0003-1892-6971", "linkedin": ";xingyi-zhang-769338204/;sibo-wang-b6a60941/?originalSubdomain=hk", "or_profile": "~Xiangyu_Dong2;~Xingyi_Zhang1;~Sibo_Wang3", "aff": "Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "cuhk.hk;cuhk.edu.hk;cuhk.edu.hk", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndong2024rayleigh,\ntitle={Rayleigh Quotient Graph Neural Networks for Graph-level Anomaly Detection},\nauthor={Xiangyu Dong and Xingyi Zhang and Sibo Wang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4UIBysXjVq}\n}", "github": "", "project": "", "reviewers": "X1rz;oJCz;qWnD", "pdf_size": 622914, "rating": "6;6;6", "confidence": "4;4;4", "soundness": "2;3;2", "contribution": "2;3;2", "presentation": "2;3;3", "wc_summary": "105;54;96", "wc_strengths": "81;30;115", "wc_weaknesses": "253;194;174", "wc_questions": "41;29;145", "wc_review": "480;307;530", "wc_reply_reviewers": "295;0;0", "wc_reply_authors": "3861;1869;1718", "reply_reviewers": "3;0;0", "reply_authors": "7;4;4", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 85.0, 22.22611077089287 ], "wc_strengths_avg": [ 75.33333333333333, 34.93167935015754 ], "wc_weaknesses_avg": [ 207.0, 33.53605025441527 ], "wc_questions_avg": [ 71.66666666666667, 52.08539995899896 ], "wc_review_avg": [ 439.0, 95.54405615561161 ], "wc_reply_reviewers_avg": [ 98.33333333333333, 139.06433363335435 ], "wc_reply_authors_avg": [ 2482.6666666666665, 976.5764463448602 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 5.0, 1.4142135623730951 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12135700093364746008&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=4UIBysXjVq", "pdf": "https://openreview.net/pdf?id=4UIBysXjVq", "email": "cuhk.hk;cuhk.edu.hk;cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "4UP387Adir", "title": "Weakly Supervised Graph Contrastive Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Contrastive Learning (GCL) has recently gained popularity owing to its ability to learn efficient node representations in a self-supervised manner. These representations are typically used to train a downstream classifier. In several real-world datasets, it is difficult to acquire sufficient clean labels for classification and instead we have weak or noisy labels available. There is little known about the robustness of the node representations learnt by the current GCL methods in the presence of weak labels.\nMoreover, GCL has been successfully adapted to a supervised setting where class labels are used to contrast between pairs of nodes. \nCan weak labels similarly be leveraged to learn better node embeddings? In this paper, we first empirically study the robustness of current GCL node representations to weak supervision. Then, we introduce Weakly Supervised Graph Contrastive Learning, WSNet, a novel method that incorporates signals from weak labels for the contrastive learning objective. We evaluate WSNet on five benchmark graph datasets comparing its performance with state-of-the-art GCL and noisy-label learning methods. We show that WSNet outperforms all baselines particularly in the high noise setting. We conclude that although current GCL methods show great promise in the weak supervision paradigm, they are still limited in their capacity to deal with label noise and utilizing signals from weak labels is an effective way to improve their performance.", "keywords": "weak supervision;graph contrastive learning;noisy label learning;weakly supervised node classification", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Pratheeksha Nair;Reihaneh Rabbany", "authorids": "~Pratheeksha_Nair2;~Reihaneh_Rabbany1", "gender": "F;F", "homepage": "https://nair-p.github.io/;http://www.reirab.com/", "dblp": "233/1231;94/9024", "google_scholar": ";https://scholar.google.ca/citations?user=Foh_c-QAAAAJ", "orcid": ";", "linkedin": "pnair96/;", "or_profile": "~Pratheeksha_Nair2;~Reihaneh_Rabbany1", "aff": "McGill University;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "mcgill.ca;mila.umontreal.ca", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nnair2024weakly,\ntitle={Weakly Supervised Graph Contrastive Learning},\nauthor={Pratheeksha Nair and Reihaneh Rabbany},\nyear={2024},\nurl={https://openreview.net/forum?id=4UP387Adir}\n}", "github": "", "project": "", "reviewers": "YxoE;oC5W;waBU;G4h6", "site": "https://openreview.net/forum?id=4UP387Adir", "pdf_size": 790235, "rating": "5;5;6;6", "confidence": "3;3;4;3", "soundness": "2;2;2;2", "contribution": "2;2;2;2", "presentation": "3;2;2;2", "wc_summary": "75;116;70;56", "wc_strengths": "45;39;72;23", "wc_weaknesses": "107;158;242;241", "wc_questions": "3;687;3;65", "wc_review": "230;1000;387;385", "wc_reply_reviewers": "128;22;282;18", "wc_reply_authors": "818;1504;939;1262", "reply_reviewers": "1;1;1;1", "reply_authors": "2;4;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.25, 22.331312097590683 ], "wc_strengths_avg": [ 44.75, 17.66882848408462 ], "wc_weaknesses_avg": [ 187.0, 57.40644563113101 ], "wc_questions_avg": [ 189.5, 288.34484562759224 ], "wc_review_avg": [ 500.5, 295.33582579836127 ], "wc_reply_reviewers_avg": [ 112.5, 107.34407296166845 ], "wc_reply_authors_avg": [ 1130.75, 269.7789604472521 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ER0Ktiwc88YJ:scholar.google.com/&scioq=Weakly+Supervised+Graph+Contrastive+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "McGill University;University of Montreal", "aff_unique_dep": ";Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.mcgill.ca;https://www.umontreal.ca", "aff_unique_abbr": "McGill;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Locality-Aware Graph Rewiring in GNNs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19465", "id": "4Ua4hKiAJX", "author_site": "Federico Barbero, Ameya Velingker, Amin Saberi, Michael Bronstein, Francesco Di Giovanni", "tldr": "", "abstract": "Graph Neural Networks (GNNs) are popular models for machine learning on graphs that typically follow the message-passing paradigm, whereby the feature of a node is updated recursively upon aggregating information over its neighbors. While exchanging messages over the input graph endows GNNs with a strong inductive bias, it can also make GNNs susceptible to over-squashing, thereby preventing them from capturing long-range interactions in the given graph. To rectify this issue, graph rewiring techniques have been proposed as a means of improving information flow by altering the graph connectivity. In this work, we identify three desiderata for graph-rewiring: (i) reduce over-squashing, (ii) respect the locality of the graph, and \n(iii) preserve the sparsity of the graph. We highlight fundamental trade-offs that occur between spatial and spectral rewiring techniques; while the former often satisfy (i) and (ii) but not (iii), the latter generally satisfy (i) and (iii) at the expense of (ii). We propose a novel rewiring framework that satisfies all of (i)--(iii) through a locality-aware sequence of rewiring operations. We then discuss a specific instance of such rewiring framework and \nvalidate its effectiveness on several real-world benchmarks, showing that it either matches or significantly outperforms existing rewiring approaches.", "keywords": "Graph Neural Networks;Message Passing Neural Networks;Over-squashing;Graph Rewiring", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/387291a37c07ba744d2e44457c8089aa6c05ad87.zip", "author": "Federico Barbero;Ameya Velingker;Amin Saberi;Michael M. Bronstein;Francesco Di Giovanni", "authorids": "~Federico_Barbero1;~Ameya_Velingker1;~Amin_Saberi1;~Michael_M._Bronstein1;~Francesco_Di_Giovanni1", "gender": ";M;;M;M", "homepage": "https://federicobarbero.com;http://www.ameyavelingker.com;https://www.stanford.edu/~saberi;http://www.inf.usi.ch/bronstein/;https://francescodgv.github.io/", "dblp": ";117/3666.html;28/4017;07/2668;", "google_scholar": "jpYtKMEAAAAJ;6dFFudUAAAAJ;;UU3N6-UAAAAJ;yzjjeqsAAAAJ", "orcid": ";;;;", "linkedin": ";ameya-velingker-5811b711;;mbronstein/;", "or_profile": "~Federico_Barbero1;~Ameya_Velingker1;~Amin_Saberi1;~Michael_M._Bronstein1;~Francesco_Di_Giovanni1", "aff": "University of Oxford;Google;Stanford University;University of Oxford;Valence Labs powered by recursion", "aff_domain": "ox.ac.uk;google.com;stanford.edu;ox.ac.uk;valencelabs.com", "position": "PhD student;Research Scientist;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nbarbero2024localityaware,\ntitle={Locality-Aware Graph Rewiring in {GNN}s},\nauthor={Federico Barbero and Ameya Velingker and Amin Saberi and Michael M. Bronstein and Francesco Di Giovanni},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4Ua4hKiAJX}\n}", "github": "", "project": "", "reviewers": "RVSP;KgxZ;EJ4V;XEwE", "pdf_size": 570762, "rating": "3;5;6;8", "confidence": "3;4;4;3", "soundness": "2;2;3;2", "contribution": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "150;52;37;63", "wc_strengths": "16;134;62;46", "wc_weaknesses": "78;762;217;109", "wc_questions": "128;2;14;194", "wc_review": "372;950;330;412", "wc_reply_reviewers": "402;604;10;13", "wc_reply_authors": "1257;1706;760;439", "reply_reviewers": "2;3;1;1", "reply_authors": "3;5;2;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.5, 43.99147644714826 ], "wc_strengths_avg": [ 64.5, 43.39066719929529 ], "wc_weaknesses_avg": [ 291.5, 276.5 ], "wc_questions_avg": [ 84.5, 80.09213444527497 ], "wc_review_avg": [ 516.0, 252.24194734421155 ], "wc_reply_reviewers_avg": [ 257.25, 255.91929880335323 ], "wc_reply_authors_avg": [ 1040.5, 482.2460471585019 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14010768070887396527&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4Ua4hKiAJX", "pdf": "https://openreview.net/pdf?id=4Ua4hKiAJX", "email": "ox.ac.uk;google.com;stanford.edu;ox.ac.uk;valencelabs.com", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of Oxford;Google;Stanford University;Valence Labs", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.google.com;https://www.stanford.edu;", "aff_unique_abbr": "Oxford;Google;Stanford;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Stanford", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;United States;" }, { "title": "Coordinate-Aware Modulation for Neural Fields", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19464", "id": "4UiLqimGm5", "author_site": "Joo Chan Lee, Daniel Rho, Seungtae Nam, Jong Hwan Ko, Eunbyung Park", "tldr": "", "abstract": "Neural fields, mapping low-dimensional input coordinates to corresponding signals, have shown promising results in representing various signals. Numerous methodologies have been proposed, and techniques employing MLPs and grid representations have achieved substantial success. MLPs allow compact and high expressibility, yet often suffer from spectral bias and slow convergence speed. On the other hand, methods using grids are free from spectral bias and achieve fast training speed, however, at the expense of high spatial complexity. In this work, we propose a novel way for exploiting both MLPs and grid representations in neural fields. Unlike the prevalent methods that combine them sequentially (extract features from the grids first and feed them to the MLP), we inject spectral bias-free grid representations into the intermediate features in the MLP. More specifically, we suggest a Coordinate-Aware Modulation (CAM), which modulates the intermediate features using scale and shift parameters extracted from the grid representations. This can maintain the strengths of MLPs while mitigating any remaining potential biases, facilitating the rapid learning of high-frequency components. In addition, we empirically found that the feature normalizations, which have not been successful in neural filed literature, proved to be effective when applied in conjunction with the proposed CAM. Experimental results demonstrate that CAM enhances the performance of neural representation and improves learning stability across a range of signals. Especially in the novel view synthesis task, we achieved state-of-the-art performance with the least number of parameters and fast training speed for dynamic scenes and the best performance under 1MB memory for static scenes. CAM also outperforms the best-performing video compression methods using neural fields by a large margin. Our project page is available at https://maincold2.github.io/cam/.", "keywords": "Neural Fields;Neural Representation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/406c3e16161fffc2b34f230764f866385cec0619.zip", "author": "Joo Chan Lee;Daniel Rho;Seungtae Nam;Jong Hwan Ko;Eunbyung Park", "authorids": "~Joo_Chan_Lee1;~Daniel_Rho1;~Seungtae_Nam1;~Jong_Hwan_Ko2;~Eunbyung_Park1", "gender": "M;M;M;;M", "homepage": ";;https://github.com/stnamjef;http://iris.skku.edu/;https://silverbottlep.github.io/", "dblp": "282/1951;311/4143;321/0019;168/6308;92/9727", "google_scholar": "WKCHKk8AAAAJ;nEC0wK4AAAAJ;8NKPmmwCmrAC;https://scholar.google.co.kr/citations?user=UN_OIs4AAAAJ;iPyuJmQAAAAJ", "orcid": ";;;0000-0003-4434-4318;", "linkedin": ";;;;eunbyung-park-286384b4/", "or_profile": "~Joo_Chan_Lee1;~Daniel_Rho1;~Seungtae_Nam1;~Jong_Hwan_Ko2;~Eunbyung_Park1", "aff": "Sungkyunkwan University;Korea Telecom Research;Sungkyunkwan University;Sungkyunkwan University;Sungkyunkwan University", "aff_domain": "skku.edu;kt.com;skku.edu;skku.edu;skku.edu", "position": "PhD student;Researcher;MS student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nlee2024coordinateaware,\ntitle={Coordinate-Aware Modulation for Neural Fields},\nauthor={Joo Chan Lee and Daniel Rho and Seungtae Nam and Jong Hwan Ko and Eunbyung Park},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4UiLqimGm5}\n}", "github": "", "project": "", "reviewers": "kCkz;oV38;DCsj;7EJV", "pdf_size": 2840063, "rating": "6;6;8;8", "confidence": "2;3;4;1", "soundness": "3;3;3;4", "contribution": "3;3;3;4", "presentation": "3;4;3;4", "wc_summary": "71;113;156;45", "wc_strengths": "43;74;67;71", "wc_weaknesses": "20;220;313;64", "wc_questions": "13;2;109;4", "wc_review": "147;409;645;184", "wc_reply_reviewers": "33;0;72;0", "wc_reply_authors": "192;575;843;159", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 96.25, 42.174488734304774 ], "wc_strengths_avg": [ 63.75, 12.234684303242156 ], "wc_weaknesses_avg": [ 154.25, 117.99655715316443 ], "wc_questions_avg": [ 32.0, 44.64862819841165 ], "wc_review_avg": [ 346.25, 199.50861510220554 ], "wc_reply_reviewers_avg": [ 26.25, 29.65109610115619 ], "wc_reply_authors_avg": [ 442.25, 283.31905601282807 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5986837010442502038&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4UiLqimGm5", "pdf": "https://openreview.net/pdf?id=4UiLqimGm5", "email": "skku.edu;kt.com;skku.edu;skku.edu;skku.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Sungkyunkwan University;Korea Telecom", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.skku.edu;https://www.kt.com", "aff_unique_abbr": "SKKU;KT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Towards Non-Asymptotic Convergence for Diffusion-Based Generative Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19463", "id": "4VGEeER6W9", "author_site": "Gen Li, Yuting Wei, Yuxin Chen, Yuejie Chi", "tldr": "", "abstract": "Diffusion models, which convert noise into new data instances by learning to reverse a Markov diffusion process, have become a cornerstone in contemporary generative modeling. While their practical power has now been widely recognized, the theoretical underpinnings remain far from mature. In this work, we develop a suite of non-asymptotic theory towards understanding the data generation process of diffusion models in discrete time, assuming access to $\\ell_2$-accurate estimates of the (Stein) score functions. For a popular deterministic sampler (based on the probability flow ODE), we establish a convergence rate proportional to $1/T$ (with $T$ the total number of steps), improving upon past results; for another mainstream stochastic sampler (i.e., a type of the denoising diffusion probabilistic model), we derive a convergence rate proportional to $1/\\sqrt{T}$, matching the state-of-the-art theory. Imposing only minimal assumptions on the target data distribution (e.g., no smoothness assumption is imposed), our results characterize how $\\ell_2$ score estimation errors affect the quality of the data generation process. In contrast to prior works, our theory is developed based on an elementary yet versatile non-asymptotic approach without resorting to toolboxes for SDEs and ODEs.", "keywords": "diffusion models;score-based generative modeling;non-asymptotic theory;reverse SDE;probability flow ODE;denoising diffusion probabilistic model", "primary_area": "learning theory", "supplementary_material": "", "author": "Gen Li;Yuting Wei;Yuxin Chen;Yuejie Chi", "authorids": "~Gen_Li2;~Yuting_Wei1;~Yuxin_Chen5;~Yuejie_Chi1", "gender": "M;F;M;", "homepage": ";https://yutingwei.github.io/;https://yuxinchen2020.github.io/;", "dblp": "28/538-5.html;184/3856;11/5123-2;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;fsbXdAYAAAAJ;RtNVud4AAAAJ;", "orcid": "0000-0002-3078-9191;;0000-0001-9256-5815;", "linkedin": ";;;", "or_profile": "~Gen_Li2;~Yuting_Wei1;~Yuxin_Chen5;~Yuejie_Chi1", "aff": "The Chinese University of Hong Kong;The Wharton School, University of Pennsylvania;University of Pennsylvania;", "aff_domain": "cuhk.edu.hk;wharton.upenn.edu;upenn.edu;", "position": "Assistant Professor;Assistant Professor;Associate Professor;", "bibtex": "@inproceedings{\nli2024towards,\ntitle={Towards Non-Asymptotic Convergence for Diffusion-Based Generative Models},\nauthor={Gen Li and Yuting Wei and Yuxin Chen and Yuejie Chi},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4VGEeER6W9}\n}", "github": "", "project": "", "reviewers": "TgkF;nMDV;h2T8;ScYQ", "pdf_size": 308414, "rating": "6;8;8;8", "confidence": "3;2;4;5", "soundness": "4;3;3;4", "contribution": "3;3;2;4", "presentation": "3;3;2;3", "wc_summary": "133;155;116;81", "wc_strengths": "109;114;64;52", "wc_weaknesses": "56;182;137;128", "wc_questions": "123;142;60;3", "wc_review": "421;593;377;264", "wc_reply_reviewers": "24;74;73;0", "wc_reply_authors": "603;677;1134;468", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;3;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 121.25, 27.040478915877213 ], "wc_strengths_avg": [ 84.75, 27.141987768032024 ], "wc_weaknesses_avg": [ 125.75, 45.168434774740646 ], "wc_questions_avg": [ 82.0, 54.78594710324902 ], "wc_review_avg": [ 413.75, 118.27800936776033 ], "wc_reply_reviewers_avg": [ 42.75, 31.901214710415026 ], "wc_reply_authors_avg": [ 720.5, 250.21840459886238 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10749362722430110799&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4VGEeER6W9", "pdf": "https://openreview.net/pdf?id=4VGEeER6W9", "email": "cuhk.edu.hk;wharton.upenn.edu;upenn.edu;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Chinese University of Hong Kong;University of Pennsylvania", "aff_unique_dep": ";The Wharton School", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.wharton.upenn.edu", "aff_unique_abbr": "CUHK;UPenn Wharton", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Stable Neural Stochastic Differential Equations in Analyzing Irregular Time Series Data", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19462", "id": "4VIgNuQ1pY", "author_site": "YongKyung Oh, Dongyoung Lim, Sungil Kim", "tldr": "", "abstract": "Irregular sampling intervals and missing values in real-world time series data present challenges for conventional methods that assume consistent intervals and complete data. Neural Ordinary Differential Equations (Neural ODEs) offer an alternative approach, utilizing neural networks combined with ODE solvers to learn continuous latent representations through parameterized vector fields. Neural Stochastic Differential Equations (Neural SDEs) extend Neural ODEs by incorporating a diffusion term, although this addition is not trivial, particularly when addressing irregular intervals and missing values. Consequently, careful design of drift and diffusion functions is crucial for maintaining stability and enhancing performance, while incautious choices can result in adverse properties such as the absence of strong solutions, stochastic destabilization, or unstable Euler discretizations, significantly affecting Neural SDEs' performance. In this study, we propose three stable classes of Neural SDEs: Langevin-type SDE, Linear Noise SDE, and Geometric SDE. Then, we rigorously demonstrate their robustness in maintaining excellent performance under distribution shift, while effectively preventing overfitting. To assess the effectiveness of our approach, we conduct extensive experiments on four benchmark datasets for interpolation, forecasting, and classification tasks, and analyze the robustness of our methods with 30 public datasets under different missing rates. Our results demonstrate the efficacy of the proposed method in handling real-world irregular time series data.", "keywords": "Neural Ordinary Differential Equations;Neural Stochastic Differential Equations;Irregular time series data", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "YongKyung Oh;Dongyoung Lim;Sungil Kim", "authorids": "~YongKyung_Oh1;~Dongyoung_Lim1;~Sungil_Kim1", "gender": ";M;M", "homepage": ";https://sites.google.com/view/dlim/;http://analytics.unist.ac.kr", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~YongKyung_Oh1;~Dongyoung_Lim1;~Sungil_Kim1", "aff": ";Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology", "aff_domain": ";unist.ac.kr;unist.ac.kr", "position": ";Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\noh2024stable,\ntitle={Stable Neural Stochastic Differential Equations in Analyzing Irregular Time Series Data},\nauthor={YongKyung Oh and Dongyoung Lim and Sungil Kim},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4VIgNuQ1pY}\n}", "github": "", "project": "", "reviewers": "Q7tH;8YRs;gG9F", "pdf_size": 1326615, "rating": "6;6;8", "confidence": "3;3;4", "soundness": "3;3;3", "contribution": "3;3;3", "presentation": "3;3;3", "wc_summary": "62;88;146", "wc_strengths": "64;74;120", "wc_weaknesses": "64;163;178", "wc_questions": "168;85;84", "wc_review": "358;410;528", "wc_reply_reviewers": "15;12;48", "wc_reply_authors": "765;1175;360", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 98.66666666666667, 35.11251755270318 ], "wc_strengths_avg": [ 86.0, 24.385788210895843 ], "wc_weaknesses_avg": [ 135.0, 50.57667446560717 ], "wc_questions_avg": [ 112.33333333333333, 39.36439451529206 ], "wc_review_avg": [ 432.0, 71.12430433168866 ], "wc_reply_reviewers_avg": [ 25.0, 16.30950643030009 ], "wc_reply_authors_avg": [ 766.6666666666666, 332.72444388045125 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14858899934737697192&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4VIgNuQ1pY", "pdf": "https://openreview.net/pdf?id=4VIgNuQ1pY", "email": ";unist.ac.kr;unist.ac.kr", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Ulsan National Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.unist.ac.kr", "aff_unique_abbr": "UNIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Unveiling the Unseen: Identifiable Clusters in Trained Depthwise Convolutional Kernels", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19461", "id": "4VgBjsOC8k", "author_site": "Zahra Babaiee, Peyman Kiasari, Daniela Rus, Radu Grosu", "tldr": "", "abstract": "Recent advances in depthwise-separable convolutional neural networks (DS-CNNs) have led to novel architectures, that surpass the performance of classical CNNs, by a considerable scalability and accuracy margin. This paper reveals another striking property of DS-CNN architectures: discernible and explainable patterns emerge in their trained depthwise convolutional kernels in all layers. Through an extensive analysis of millions of trained filters, with different sizes and from various models, we employed unsupervised clustering with autoencoders, to categorize these filters. Astonishingly, the patterns converged into a few main clusters, each resembling the difference of Gaussian (DoG) functions, and their first and second-order derivatives. Notably, we classify over 95\\% and 90\\% of the filters from state-of-the-art ConvNeXtV2 and ConvNeXt models, respectively. This finding is not merely a technological curiosity; it echoes the foundational models neuroscientists have long proposed for the vision systems of mammals. Our results thus deepen our understanding of the emergent properties of trained DS-CNNs and provide a bridge between artificial and biological visual processing systems. More broadly, they pave the way for more interpretable and biologically-inspired neural network designs in the future.", "keywords": "Depthwise Convolutions;Explainability;Neuroscience;Computer Vision;ConvNext", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Zahra Babaiee;Peyman Kiasari;Daniela Rus;Radu Grosu", "authorids": "~Zahra_Babaiee1;~Peyman_Kiasari1;~Daniela_Rus1;~Radu_Grosu1", "gender": "F;;F;M", "homepage": "https://informatics.tuwien.ac.at/people/zahra-babaiee;;https://www.csail.mit.edu/person/daniela-rus;https://ti.tuwien.ac.at/cps/people/grosu", "dblp": ";;r/DanielaRus;94/5421", "google_scholar": ";;https://scholar.google.com/citations?hl=en;1g_muAgAAAAJ", "orcid": ";;;0000-0001-5715-2142", "linkedin": "zahra-babaiee-5b4ba314b;;;", "or_profile": "~Zahra_Babaiee1;~Peyman_Kiasari1;~Daniela_Rus1;~Radu_Grosu1", "aff": "TU Wien Vienna University of Technology;;Massachusetts Institute of Technology;TU Wien Vienna University of Technology", "aff_domain": "tuwien.ac.at;;mit.edu;tuwien.ac.at", "position": "PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbabaiee2024unveiling,\ntitle={Unveiling the Unseen: Identifiable Clusters in Trained Depthwise Convolutional Kernels},\nauthor={Zahra Babaiee and Peyman Kiasari and Daniela Rus and Radu Grosu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4VgBjsOC8k}\n}", "github": "", "project": "", "reviewers": "hU9E;Jbbg;9rPz;GKik", "pdf_size": 23199154, "rating": "3;6;8;8", "confidence": "5;4;4;2", "soundness": "3;3;3;4", "contribution": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "104;39;93;110", "wc_strengths": "36;137;73;219", "wc_weaknesses": "225;302;288;461", "wc_questions": "217;51;2;104", "wc_review": "582;529;456;894", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "876;755;482;865", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.5, 28.09359357576029 ], "wc_strengths_avg": [ 116.25, 69.45997048660473 ], "wc_weaknesses_avg": [ 319.0, 86.96263565463043 ], "wc_questions_avg": [ 93.5, 79.90775932285925 ], "wc_review_avg": [ 615.25, 167.03798220764043 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 744.5, 158.76791237526555 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7567450038061343, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2784907143363510760&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=4VgBjsOC8k", "pdf": "https://openreview.net/pdf?id=4VgBjsOC8k", "email": "tuwien.ac.at;;mit.edu;tuwien.ac.at", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Vienna University of Technology;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tuwien.ac.at;https://web.mit.edu", "aff_unique_abbr": "TU Wien;MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Vienna;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Austria;United States" }, { "id": "4WCqddG4Ep", "title": "Quantifying Classification Performance through Combinatorial Geometry and Localized Data Analysis", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Understanding the theoretical boundaries of a learning mechanism and ascertaining its fundamental capabilities remains a persistent challenge in machine learning. While the VC-dimension has been instrumental in quantifying a model's data-fitting abilities, its independence from data distribution sometimes limits its practicality. In this study, we address the problem of establishing realistic bounds on a model\u2019s classification power by harnessing the underlying combinatorial geometry of data using novel tools. We introduce conditions that rely on \\emph{local} computations performed on small data subsets to determine the \\emph{global} performance of classifiers. Specifically, by considering a dataset $\\{(X_i,y_i)\\}_{i=1}^{n}$, where $X_i\\in\\mathbb{R}^d$ is a feature vector and $y_i$ is the corresponding label, we establish optimal bounds on the training error (in terms of number of misclassifications) of a linear classifier based on the linear separability of local data subsets, each comprising of $(d + 2)$ data points. We also prove an optimal bound on the margin of Support Vector Machines (SVMs) in terms of performance of SVMs on $(d+2)$ sized subsets. Furthermore, we extend these results to a non-linear classifier employing hypersphere boundary separation. Our experimental results underscore the significance and applicability of these theoretical bounds in real-world machine learning scenarios. This research contributes valuable insights into assessing the classification potential of both linear and non-linear models for large datasets. By emphasizing local computations on subsets of data with fixed cardinality, it provides a foundation for informed and efficient decision-making in practical machine learning applications.", "keywords": "lower bound;geometrical insights;local data;classification performance;combinatorics;linear separation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Christopher Lee;Mudassir Shabbir;Waseem Abbas", "authorids": "~Christopher_Lee10;~Mudassir_Shabbir1;~Waseem_Abbas2", "gender": "M;;", "homepage": ";;http://www.wabbas.com/", "dblp": ";78/7323;79/11046", "google_scholar": ";https://scholar.google.com.pk/citations?user=bRKvwRYAAAAJ;U9oBOmMAAAAJ", "orcid": "0000-0003-0477-7450;;", "linkedin": ";;", "or_profile": "~Christopher_Lee10;~Mudassir_Shabbir1;~Waseem_Abbas2", "aff": "University of Texas at Dallas;ITU of Punjab Lahore, Pakistan;The University of Texas at Dallas", "aff_domain": "utdallas.edu;itu.edu.pk;utdallas.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\nlee2024quantifying,\ntitle={Quantifying Classification Performance through Combinatorial Geometry and Localized Data Analysis},\nauthor={Christopher Lee and Mudassir Shabbir and Waseem Abbas},\nyear={2024},\nurl={https://openreview.net/forum?id=4WCqddG4Ep}\n}", "github": "", "project": "", "reviewers": "re7m;RpW5;VSAx;VFVG;t2D4", "site": "https://openreview.net/forum?id=4WCqddG4Ep", "pdf_size": 612816, "rating": "3;5;5;5;6", "confidence": "4;3;4;5;3", "soundness": "4;3;4;4;2", "contribution": "1;2;2;3;3", "presentation": "3;2;4;3;2", "wc_summary": "86;53;153;62;72", "wc_strengths": "49;15;45;109;65", "wc_weaknesses": "395;75;104;303;70", "wc_questions": "2;20;1;226;2", "wc_review": "532;163;303;700;209", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.8 ], "contribution_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 85.2, 35.628078814328454 ], "wc_strengths_avg": [ 56.6, 30.78701024783017 ], "wc_weaknesses_avg": [ 189.4, 134.02477382931858 ], "wc_questions_avg": [ 50.2, 88.18707388274089 ], "wc_review_avg": [ 381.4, 203.83974097314783 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XXbM6lowOY0J:scholar.google.com/&scioq=Quantifying+Classification+Performance+through+Combinatorial+Geometry+and+Localized+Data+Analysis&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Dallas;ITU of Punjab", "aff_unique_dep": ";", "aff_unique_url": "https://www.utdallas.edu;", "aff_unique_abbr": "UT Dallas;", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Dallas;Lahore", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Pakistan" }, { "id": "4WKDwIaF7y", "title": "Lookahead Sharpness-Aware Minimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Sharpness-Aware Minimization (SAM), which performs gradient descent on adversarially perturbed weights, can improve generalization by\nidentifying flatter minima. However, recent studies have shown that SAM may suffer from convergence instability and oscillate around saddle points, resulting in slow convergence and inferior performance.\nTo address this problem, we propose the use of a lookahead mechanism in the methods of extra-gradient and optimistic gradient.\nBy examining the nature of SAM, we simplify the extrapolation procedure, resulting in a more efficient algorithm.\nTheoretical results show that the proposed method converge to a stationary point and escape saddle points faster. Experiments on standard benchmark datasets also verify that the proposed method outperforms the SOTAs, and converge more effectively to flat minima.", "keywords": "Deep Learning;Sharpness-Aware Minimization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Runsheng Yu;Youzhi Zhang;James Kwok", "authorids": "~Runsheng_Yu2;~Youzhi_Zhang2;~James_Kwok1", "gender": "Not Specified;;", "homepage": "https://www.linkedin.com/in/runsheng-yu-560696127/;https://youzhi333.github.io/index.html;", "dblp": "210/2646.html?q=runsheng%20yu;131/9490-1;", "google_scholar": ";i2j5DmwAAAAJ;", "orcid": "0000-0003-0053-1234;0000-0002-2984-734X;", "linkedin": ";;", "or_profile": "~Runsheng_Yu2;~Youzhi_Zhang2;~James_Kwok1", "aff": "Hong Kong University of Science and Technology;Centre for Artificial Intelligence and Robotics, Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;", "aff_domain": "ust.hk;cair-cas.org.hk;", "position": "PhD student;Assistant Professor;", "bibtex": "@misc{\nyu2024lookahead,\ntitle={Lookahead Sharpness-Aware Minimization},\nauthor={Runsheng Yu and Youzhi Zhang and James Kwok},\nyear={2024},\nurl={https://openreview.net/forum?id=4WKDwIaF7y}\n}", "github": "", "project": "", "reviewers": "hY3b;1P81;gBpE;rtXV", "site": "https://openreview.net/forum?id=4WKDwIaF7y", "pdf_size": 4328500, "rating": "3;5;5;6", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "contribution": "1;3;2;2", "presentation": "2;3;2;3", "wc_summary": "124;59;20;101", "wc_strengths": "14;100;11;63", "wc_weaknesses": "587;5;145;73", "wc_questions": "71;284;1;2", "wc_review": "796;448;177;239", "wc_reply_reviewers": "251;74;63;15", "wc_reply_authors": "3041;894;908;682", "reply_reviewers": "1;1;1;1", "reply_authors": "5;3;2;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.0, 39.85599076675927 ], "wc_strengths_avg": [ 47.0, 36.9120576505835 ], "wc_weaknesses_avg": [ 202.5, 227.44394913912308 ], "wc_questions_avg": [ 89.5, 115.82422026502056 ], "wc_review_avg": [ 415.0, 241.8005376338109 ], "wc_reply_reviewers_avg": [ 100.75, 89.5387485952311 ], "wc_reply_authors_avg": [ 1381.25, 962.4316534175297 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gTAxBl5bXQYJ:scholar.google.com/&scioq=Lookahead+Sharpness-Aware+Minimization&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences", "aff_unique_dep": ";Centre for Artificial Intelligence and Robotics", "aff_unique_url": "https://www.ust.hk;", "aff_unique_abbr": "HKUST;", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Learning from Sparse Offline Datasets via Conservative Density Estimation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19460", "id": "4WM0OogPTx", "author_site": "Zhepeng Cen, Zuxin Liu, Zitong Wang, Yihang Yao, Henry Lam, DING ZHAO", "tldr": "", "abstract": "Offline reinforcement learning (RL) offers a promising direction for learning policies from pre-collected datasets without requiring further interactions with the environment. However, existing methods struggle to handle out-of-distribution (OOD) extrapolation errors, especially in sparse reward or scarce data settings. In this paper, we propose a novel training algorithm called Conservative Density Estimation (CDE), which addresses this challenge by explicitly imposing constraints on the state-action occupancy stationary distribution. CDE overcomes the limitations of existing approaches, such as the stationary distribution correction method, by addressing the support mismatch issue in marginal importance sampling. Our method achieves state-of-the-art performance on the D4RL benchmark. Notably, CDE consistently outperforms baselines in challenging tasks with sparse rewards or insufficient data, demonstrating the advantages of our approach in addressing the extrapolation error problem in offline RL.", "keywords": "offline reinforcement learning;stationary distribution correction estimation", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Zhepeng Cen;Zuxin Liu;Zitong Wang;Yihang Yao;Henry Lam;Ding Zhao", "authorids": "~Zhepeng_Cen1;~Zuxin_Liu1;~Zitong_Wang1;~Yihang_Yao1;~Henry_Lam1;~Ding_Zhao1", "gender": "M;M;M;;;", "homepage": "https://czp16.github.io/;https://www.zuxin.me;;https://yihangyao.github.io/;http://www.columbia.edu/~khl2114/;https://safeai-lab.github.io", "dblp": "254/6182;227/3137;;305/7045.html;35/9508;", "google_scholar": "M-X3Q-UAAAAJ;5ApCTCoAAAAJ;v6UwRlwAAAAJ;EPduTdwAAAAJ;Bnj50x0AAAAJ;z7tPc9IAAAAJ", "orcid": ";0000-0001-7412-5074;;;;", "linkedin": ";zuxin-liu/;;yihang-yao-3a7658249/;;", "or_profile": "~Zhepeng_Cen1;~Zuxin_Liu1;~Zitong_Wang1;~Yihang_Yao1;~Henry_Lam1;~Ding_Zhao1", "aff": "Carnegie Mellon University;Salesforce AI Research;Columbia University;Carnegie Mellon University;Columbia University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;salesforce.com;columbia.edu;cmu.edu;columbia.edu;cmu.edu", "position": "PhD student;Researcher;PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ncen2024learning,\ntitle={Learning from Sparse Offline Datasets via Conservative Density Estimation},\nauthor={Zhepeng Cen and Zuxin Liu and Zitong Wang and Yihang Yao and Henry Lam and Ding Zhao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4WM0OogPTx}\n}", "github": "", "project": "", "reviewers": "MKs2;HRnW;6pFf;voqr", "pdf_size": 2547850, "rating": "5;6;8;8", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "contribution": "2;3;4;3", "presentation": "2;3;3;3", "wc_summary": "39;68;37;236", "wc_strengths": "35;129;86;38", "wc_weaknesses": "231;168;117;137", "wc_questions": "38;2;37;2", "wc_review": "343;367;277;413", "wc_reply_reviewers": "0;0;51;0", "wc_reply_authors": "1135;568;572;311", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.0, 82.32557318354972 ], "wc_strengths_avg": [ 72.0, 38.632887544163715 ], "wc_weaknesses_avg": [ 163.25, 43.129891026989625 ], "wc_questions_avg": [ 19.75, 17.75352077758099 ], "wc_review_avg": [ 350.0, 49.08156476723211 ], "wc_reply_reviewers_avg": [ 12.75, 22.083647796503186 ], "wc_reply_authors_avg": [ 646.5, 301.207984621922 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12655561621720249093&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=4WM0OogPTx", "pdf": "https://openreview.net/pdf?id=4WM0OogPTx", "email": "andrew.cmu.edu;salesforce.com;columbia.edu;cmu.edu;columbia.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;2;0", "aff_unique_norm": "Carnegie Mellon University;Salesforce;Columbia University", "aff_unique_dep": ";Salesforce AI Research;", "aff_unique_url": "https://www.cmu.edu;https://www.salesforce.com;https://www.columbia.edu", "aff_unique_abbr": "CMU;Salesforce AI;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4WRoRL0YKR", "title": "Explainable Multi-Objective Model Selection for Time Series Forecasting", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Machine learning (ML) models exhibit miscellaneous properties, and deployment inevitably trades certain performance aspects against others. This is particularly valid for time series forecasting, where special characteristics such as seasonality impact how models perform. Since there is \u201cno free lunch\u201d, practitioners have to choose among available methods when assembling new learning systems. Benchmarks, meta-learning, and automated ML come to aid, but in many cases focus on predictive capabilities while ignoring other aspects such as complexity and resource consumption. This is especially concerning considering the popularity of deep neural networks (DNNs) for forecasting, as these models are widely conceived as computation-heavy black boxes. To alleviate these shortcomings, we propose X-PCR \u2013 a novel approach for explainable multi-objective model selection. It uses meta-learning to assess the suitability of any model in terms of (p)redictive error, (c)omplexity and (r)esource demand. By allowing users to prioritize the individual objectives in this trade-off, model recommendations become both controllable and understandable. We demonstrate the feasibility of our methodology in the task of forecasting time series with state-of-the-art DNNs. In total, we perform over 1000 experiments across 114 data sets, discuss the resulting efficiency landscape, and provide evidence of how X-PCR outperforms other selection approaches. On average, our approach only requires 20% of computation costs for recommending models with 85% of the best possible performance.", "keywords": "Meta-learning;Time Series Forecasting;Resource-aware ML;Explainability;Trustworthy AI", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Raphael Fischer;Amal Saadallah", "authorids": "~Raphael_Fischer1;~Amal_Saadallah1", "gender": "M;F", "homepage": "https://lamarr.cs.tu-dortmund.de/team/raphael-fischer;https://www-ai.cs.tu-dortmund.de/PERSONAL/saadallah.html", "dblp": "249/4056;242/4220.html", "google_scholar": "https://scholar.google.de/citations?user=zxTGb7AAAAAJ;UxU-Q-gAAAAJ", "orcid": "0000-0002-1808-5773;0000-0003-2976-7574", "linkedin": "raphael-fischer-3b1046208/;", "or_profile": "~Raphael_Fischer1;~Amal_Saadallah1", "aff": "TU Dortmund University;Technische Universit\u00e4t Dortmund", "aff_domain": "udo.edu;tu-dortmund.de", "position": "PhD student;Postdoc", "bibtex": "@misc{\nfischer2024explainable,\ntitle={Explainable Multi-Objective Model Selection for Time Series Forecasting},\nauthor={Raphael Fischer and Amal Saadallah},\nyear={2024},\nurl={https://openreview.net/forum?id=4WRoRL0YKR}\n}", "github": "", "project": "", "reviewers": "m8sj;4DT3;Sacq", "site": "https://openreview.net/forum?id=4WRoRL0YKR", "pdf_size": 753472, "rating": "3;3;5", "confidence": "4;3;3", "soundness": "1;2;2", "contribution": "1;1;2", "presentation": "2;2;2", "wc_summary": "76;96;57", "wc_strengths": "17;17;56", "wc_weaknesses": "339;111;72", "wc_questions": "85;70;72", "wc_review": "517;294;257", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "133;79;86", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 76.33333333333333, 15.923427883328248 ], "wc_strengths_avg": [ 30.0, 18.384776310850235 ], "wc_weaknesses_avg": [ 174.0, 117.75398082442904 ], "wc_questions_avg": [ 75.66666666666667, 6.649979114420002 ], "wc_review_avg": [ 356.0, 114.84192033689904 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 99.33333333333333, 23.976840677805924 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jSpCvMouZPoJ:scholar.google.com/&scioq=Explainable+Multi-Objective+Model+Selection+for+Time+Series+Forecasting&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Dortmund", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-dortmund.de", "aff_unique_abbr": "TU Dortmund", "aff_campus_unique_index": "0", "aff_campus_unique": "Dortmund;", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "4WZNdnwmhk", "title": "Parameter-Efficient Fine-Tuning via Partially Decomposable Loss Analysis and Sharing", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large language model (LLM) has become a crucial tool for many machine learning research and applications. Due to the large parameter count of these models and the enormous amount of training data, large language models are usually strong at general tasks. For most applications however, one would like a smaller, more parameter-efficient model that is specialized in a particular field. This motivates the design of fine-tuning, which tunes a pre-trained LLM for a few iterations on a dedicated dataset for specific tasks. If not handled correctly, the fine-tuning process would create another LLM that has comparable amount of parameters, significantly slowers downstream applications.\n\nOne of the most widely-known ideas for resolving this issue is the Low-Rank Adaptation (LoRA) framework, where one assumes the fine-tuning weights are low-rank therefore the number of parameters together with the inference time is drastically improved. While performing well in practice, LoRA method is still a heuristic and lacks theoretical guarantees even though the loss function might inherit certain structures. Moreover, when fine-tuning multiple similar tasks in parallel, LoRA requires one to learn a pair of distinct low-rank matrices for each task, ignoring possible shared structure between tasks.\n\nIn this work, we design a framework that further reduces parameter count compared to LoRA and enables parameter sharing across different parallel fine-tuning tasks. When the number of parallel fine-tuning tasks grows larger, we cut the parameter count almost in half compared to LoRA. Moreover, we prove why our approach --- or more generally, LoRA works for a large class of loss functions. We empirically verify the effectiveness of our method on various benchmark models and datasets, demonstrating much improved parameter count while retaining similar performance as LoRA.", "keywords": "Fine-tuning;efficient training", "primary_area": "optimization", "supplementary_material": "", "author": "Raghavendra Addanki;Ritwik Sinha;Zhao Song;Yizhou Wang;Lichen Zhang", "authorids": "~Raghavendra_Addanki1;~Ritwik_Sinha1;~Zhao_Song3;~Yizhou_Wang3;~Lichen_Zhang2", "gender": "M;M;M;M;M", "homepage": "https://raddanki.github.io/;https://research.adobe.com/person/ritwik-sinha/;https://www.youtube.com/@zhaosong2031;https://wyzjack.github.io/;https://lczh.github.io/", "dblp": "218/5579;127/3163;76/4051-2;71/3387-6;00/6357-3", "google_scholar": "SUPaOhgAAAAJ;https://scholar.google.co.in/citations?user=4SDTMIQAAAAJ;yDZct7UAAAAJ;H4kqV1MAAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;;0000-0003-1601-9649;", "linkedin": ";;;yizhou-wang-786603155/;", "or_profile": "~Raghavendra_Addanki1;~Ritwik_Sinha1;~Zhao_Song3;~Yizhou_Wang3;~Lichen_Zhang2", "aff": "Adobe Systems;Adobe Systems;Adobe;Northeastern University;Amazon", "aff_domain": "adobe.com;adobe.com;adobe.com;northeastern.edu;amazon.com", "position": "Research Scientist;Researcher;Researcher;PhD student;Intern", "bibtex": "@misc{\naddanki2024parameterefficient,\ntitle={Parameter-Efficient Fine-Tuning via Partially Decomposable Loss Analysis and Sharing},\nauthor={Raghavendra Addanki and Ritwik Sinha and Zhao Song and Yizhou Wang and Lichen Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=4WZNdnwmhk}\n}", "github": "", "project": "", "reviewers": "rWs7;GVCH;Va7k", "site": "https://openreview.net/forum?id=4WZNdnwmhk", "pdf_size": 265640, "rating": "3;3;5", "confidence": "4;2;3", "soundness": "1;2;3", "contribution": "2;3;2", "presentation": "3;3;3", "wc_summary": "28;99;87", "wc_strengths": "29;85;79", "wc_weaknesses": "218;53;107", "wc_questions": "8;2;71", "wc_review": "283;239;344", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.33333333333333, 31.030450993965413 ], "wc_strengths_avg": [ 64.33333333333333, 25.104227178350307 ], "wc_weaknesses_avg": [ 126.0, 68.68769904429759 ], "wc_questions_avg": [ 27.0, 31.20897306865447 ], "wc_review_avg": [ 288.6666666666667, 43.05293898859351 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rwCji96uKrUJ:scholar.google.com/&scioq=Parameter-Efficient+Fine-Tuning+via+Partially+Decomposable+Loss+Analysis+and+Sharing&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Adobe;Northeastern University;Amazon", "aff_unique_dep": "Adobe Systems Incorporated;;Amazon.com, Inc.", "aff_unique_url": "https://www.adobe.com;https://www.northeastern.edu;https://www.amazon.com", "aff_unique_abbr": "Adobe;NEU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Llemma: An Open Language Model for Mathematics", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19459", "id": "4WnqRR915j", "author_site": "Zhangir Azerbayev, Hailey Schoelkopf, Keiran Paster, Marco Dos Santos, Stephen McAleer, Qiaochu Jiang, Jia Deng, Stella R Biderman, Sean Welleck", "tldr": "", "abstract": "We present Llemma, a large language model for mathematics. We continue pretraining Code Llama on the Proof-Pile-2, a mixture of scientific papers, web data containing mathematics, and mathematical code, yielding Llemma. On the MATH benchmark Llemma outperforms all known openly released models, as well as the unreleased Minerva model suite on an equi-parameter basis. Moreover, Llemma is capable of tool use and formal theorem proving without any finetuning. We openly release all artifacts, including 7 billion and 34 billion parameter models, the Proof-Pile-2, and code to replicate our experiments.", "keywords": "reasoning;language models;pretraining", "primary_area": "generative models", "supplementary_material": "/attachment/1db90b39240173c73d9cf5a455dc8fb7ce342985.zip", "author": "Zhangir Azerbayev;Hailey Schoelkopf;Keiran Paster;Marco Dos Santos;Stephen Marcus McAleer;Albert Q. Jiang;Jia Deng;Stella Biderman;Sean Welleck", "authorids": "~Zhangir_Azerbayev1;~Hailey_Schoelkopf1;~Keiran_Paster1;~Marco_Dos_Santos1;~Stephen_Marcus_McAleer1;~Albert_Q._Jiang1;~Jia_Deng1;~Stella_Biderman1;~Sean_Welleck1", "gender": "M;F;M;M;M;;M;F;", "homepage": ";;http://keirp.com;;https://www.andrew.cmu.edu/user/smcaleer/;;;http://www.stellabiderman.com;", "dblp": ";;;;;;07/6526-1.html;239/5641;", "google_scholar": ";XLahYIYAAAAJ;;;iEFL4-YAAAAJ;;U3Eub-EAAAAJ;bO7H0DAAAAAJ;", "orcid": ";;;;;;;0000-0001-8228-1042;", "linkedin": "zhangir-azerbayev-314ab21b8/;;;dsantosmarco/;stephen-mcaleer/;;;stellabiderman;", "or_profile": "~Zhangir_Azerbayev1;~Hailey_Schoelkopf1;~Keiran_Paster1;~Marco_Dos_Santos1;~Stephen_Marcus_McAleer1;~Albert_Q._Jiang1;~Jia_Deng1;~Stella_Biderman1;~Sean_Welleck1", "aff": ";EleutherAI;University of Toronto;Sorbonne University;Carnegie Mellon University;;Princeton University;Booz Allen Hamilton;", "aff_domain": ";eleuther.ai;toronto.edu;sorbonne-universite.fr;cmu.edu;;princeton.edu;boozallen.com;", "position": ";Researcher;PhD student;MS student;Postdoc;;Associate Professor;Industry researcher;", "bibtex": "@inproceedings{\nazerbayev2024llemma,\ntitle={Llemma: An Open Language Model for Mathematics},\nauthor={Zhangir Azerbayev and Hailey Schoelkopf and Keiran Paster and Marco Dos Santos and Stephen Marcus McAleer and Albert Q. Jiang and Jia Deng and Stella Biderman and Sean Welleck},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4WnqRR915j}\n}", "github": "", "project": "", "reviewers": "JQiP;HYfd;Ch2c", "pdf_size": 441862, "rating": "6;6;8", "confidence": "2;4;3", "soundness": "2;3;3", "contribution": "3;3;3", "presentation": "3;3;3", "wc_summary": "57;54;26", "wc_strengths": "56;37;53", "wc_weaknesses": "160;37;55", "wc_questions": "22;17;29", "wc_review": "295;145;163", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "488;592;390", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 45.666666666666664, 13.960261060914616 ], "wc_strengths_avg": [ 48.666666666666664, 8.339997335464536 ], "wc_weaknesses_avg": [ 84.0, 54.24020648928247 ], "wc_questions_avg": [ 22.666666666666668, 4.9216076867444665 ], "wc_review_avg": [ 201.0, 66.87301398920195 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 490.0, 82.47827996913288 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 320, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8004581123322544769&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=4WnqRR915j", "pdf": "https://openreview.net/pdf?id=4WnqRR915j", "email": ";eleuther.ai;toronto.edu;sorbonne-universite.fr;cmu.edu;;princeton.edu;boozallen.com;", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "EleutherAI;University of Toronto;Sorbonne University;Carnegie Mellon University;Princeton University;Booz Allen Hamilton", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.eleuther.ai;https://www.utoronto.ca;https://www.sorbonne.universite.fr;https://www.cmu.edu;https://www.princeton.edu;https://www.boozallen.com", "aff_unique_abbr": "EleutherAI;U of T;Sorbonne;CMU;Princeton;BAH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0;0", "aff_country_unique": "United States;Canada;France" }, { "id": "4XCfu7fTgw", "title": "Spectral Contrastive Regression", "track": "main", "status": "Reject", "tldr": "", "abstract": "While several techniques have been proposed to enhance the generalization of\ndeep learning models for classification problems, limited research has been con-\nducted on improving generalization for regression tasks. This is primarily due\nto the continuous nature of regression labels, which makes it challenging to di-\nrectly apply classification-based techniques to regression tasks. Conversely, exist-\ning regression methods overlook feature-level generalization and primarily focus\non data augmentation using linear interpolation, which may not be an effective\napproach for synthesizing data for regression. In this paper, we introduce a novel\ngeneralization method for regression tasks based on the metric learning assump-\ntion that the distance between features and labels should be proportional. Unlike\nprevious approaches that solely consider the scale prediction of this proportion and\ndisregard its variation among samples, we argue that this proportion is not constant\nand can be defined as a mapping function. Additionally, we propose minimizing\nthe error of this function and stabilizing its fluctuating behavior by smoothing\nout its variations. The t-SNE visualization of the embedding space demonstrates\nthat our proposed loss function generates a more discriminative pattern with re-\nduced variance. To enhance Out-of-Distribution (OOD) generalization, we lever-\nage the characteristics of the spectral norm (i.e., the sub-multiplicativity of the\nspectral norm of the feature matrix can be expressed as Frobenius norm of the\noutput), and align the maximum singular value of the feature matrices across dif-\nferent domains. Experimental results on the MPI3D benchmark dataset reveal\nthat aligning the spectral norms significantly improves the unstable performance\non OOD data. We conduct experiments on eight benchmark datasets for domain\ngeneralization in regression, and our method consistently outperforms state-of-\nthe-art approaches in the majority of cases. Our code is available in an anonymous\nrepository, and it will be made publicly available upon acceptance of the paper: https://github.com/workerasd/SCR", "keywords": "Metric Learning;Out-of-Distribution Generalization;In-Distribution Generalization;Regression", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Kaiyu Guo;Zijian Wang;Brian C. Lovell;Mahsa Baktashmotlagh", "authorids": "~Kaiyu_Guo2;~Zijian_Wang2;~Brian_C._Lovell1;~Mahsa_Baktashmotlagh1", "gender": "M;M;F;M", "homepage": ";;;https://staff.itee.uq.edu.au/lovell/", "dblp": "262/2952;03/4540-9;119/1507;09/2347", "google_scholar": "fC6KpI0AAAAJ;OfTXHvsAAAAJ;https://scholar.google.com.au/citations?user=3kaiBBYAAAAJ;https://scholar.google.com.au/citations?user=gXiGxcMAAAAJ", "orcid": "0000-0002-4187-2839;;;0000-0001-6722-1754", "linkedin": ";;;brian-lovell-0997594/", "or_profile": "~Kaiyu_Guo2;~Zijian_Wang2;~Mahsa_Baktashmotlagh1;~Brian_Lovell1", "aff": "University of Queensland;The University of Queensland;The University of Queensland;University of Queensland", "aff_domain": "uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au", "position": "PhD student;Postdoc;Assistant Professor;Professor", "bibtex": "@misc{\nguo2024spectral,\ntitle={Spectral Contrastive Regression},\nauthor={Kaiyu Guo and Zijian Wang and Brian C. Lovell and Mahsa Baktashmotlagh},\nyear={2024},\nurl={https://openreview.net/forum?id=4XCfu7fTgw}\n}", "github": "", "project": "", "reviewers": "xKM4;xHdj;Rgn5", "site": "https://openreview.net/forum?id=4XCfu7fTgw", "pdf_size": 5375266, "rating": "5;5;5", "confidence": "4;3;4", "soundness": "3;3;3", "contribution": "2;3;2", "presentation": "2;2;3", "wc_summary": "104;65;41", "wc_strengths": "11;72;47", "wc_weaknesses": "133;117;65", "wc_questions": "34;392;243", "wc_review": "282;646;396", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "744;395;973", "reply_reviewers": "0;0;0", "reply_authors": "2;1;3", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 70.0, 25.96150997149434 ], "wc_strengths_avg": [ 43.333333333333336, 25.037749277618563 ], "wc_weaknesses_avg": [ 105.0, 29.028721409436322 ], "wc_questions_avg": [ 223.0, 146.83550887529444 ], "wc_review_avg": [ 441.3333333333333, 152.020466458376 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 704.0, 237.6566150282097 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dSDibI2Z_vMJ:scholar.google.com/&scioq=Spectral+Contrastive+Regression&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Queensland", "aff_unique_dep": "", "aff_unique_url": "https://www.uq.edu.au", "aff_unique_abbr": "UQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "4Y4hPWUppN", "title": "Key point is key in resolving the offline three-dimensional bin packing problem", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In recent years, with the surge in deep learning and reinforcement learning, researchers have begun to explore the use of deep reinforcement learning to solve the offline three-dimensional bin packing problem. However, the valid action space in the offline three-dimensional bin packing problem is quite large, making it difficult for the model to converge as the number of boxes increases. Therefore, reducing the number of valid actions is crucial. In previous studies, many researchers have developed heuristic rules to reduce the number of effective actions. However, some of these heuristic rules drastically reduce the number of valid actions, potentially forgoing the optimal solution, while others do not sufficiently reduce the number of effective actions, making it still challenging for the model to converge when there are many boxes.In response to this, we propose a heuristic rule where boxes are placed only at certain specific locations, which we refer to as Key Points, while other locations are masked. This method integrates well with existing deep reinforcement learning models for solving the offline three-dimensional bin packing problem. We not only theoretically demonstrate the efficacy of this heuristic rule but also empirically show that when our method is combined with existing models, it can easily train with four times the number of boxes. The model converges ten times faster than before, and its performance also improves. Interestingly, even without retraining the model, using our method in the testing phase yields better results than the original method. We also compare our method to other heuristic rules. Experimental results show that our approach strikes a balance between convergence speed and performance.", "keywords": "deep reinforcement learning; three-dimensional offline packing;", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/a6379ff5d04f970d800bd7b1e295da271271eef3.zip", "author": "Xinyuan Tian;pengzhan qu;Hongsheng Qi", "authorids": "~Xinyuan_Tian1;~pengzhan_qu1;~Hongsheng_Qi1", "gender": ";;M", "homepage": ";;https://lenovo.com", "dblp": ";;", "google_scholar": ";;", "orcid": ";0000-0001-5910-3694;", "linkedin": ";;hong-sheng-qi-a542151/", "or_profile": "~Xinyuan_Tian1;~pengzhan_qu1;~Hongsheng_Qi1", "aff": ";Lenovo;Xi'an University of Electronic Science and Technology", "aff_domain": ";lenovo.com;xidian.edu.cn", "position": ";Principal Researcher;Lecturer", "bibtex": "@misc{\ntian2024key,\ntitle={Key point is key in resolving the offline three-dimensional bin packing problem},\nauthor={Xinyuan Tian and pengzhan qu and Hongsheng Qi},\nyear={2024},\nurl={https://openreview.net/forum?id=4Y4hPWUppN}\n}", "github": "", "project": "", "reviewers": "RRYs;TnxE;z1aJ", "site": "https://openreview.net/forum?id=4Y4hPWUppN", "pdf_size": 537850, "rating": "1;3;3", "confidence": "5;4;5", "soundness": "3;2;2", "contribution": "1;3;2", "presentation": "3;1;1", "wc_summary": "84;93;43", "wc_strengths": "37;25;34", "wc_weaknesses": "356;102;350", "wc_questions": "15;37;28", "wc_review": "492;257;455", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_summary_avg": [ 73.33333333333333, 21.761331658599286 ], "wc_strengths_avg": [ 32.0, 5.0990195135927845 ], "wc_weaknesses_avg": [ 269.3333333333333, 118.34788642904539 ], "wc_questions_avg": [ 26.666666666666668, 9.030811456096044 ], "wc_review_avg": [ 401.3333333333333, 103.1708399802106 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:j_3GAMb_huUJ:scholar.google.com/&scioq=Key+point+is+key+in+resolving+the+offline+three-dimensional+bin+packing+problem&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Lenovo Group Limited;Xi'an University of Electronic Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.lenovo.com;http://www.xidian.edu.cn/", "aff_unique_abbr": "Lenovo;Xidian University", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "NfgTransformer: Equivariant Representation Learning for Normal-form Games", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19458", "id": "4YESQqIys7", "author_site": "SIQI LIU, Luke Marris, Georgios Piliouras, Ian Gemp, Nicolas Heess", "tldr": "", "abstract": "Normal-form games (NFGs) are the fundamental model of *strategic interaction*. We study their representation using neural networks. We describe the inherent equivariance of NFGs --- any permutation of strategies describes an equivalent game --- as well as the challenges this poses for representation learning. We then propose the NfgTransformer architecture that leverages this equivariance, leading to state-of-the-art performance in a range of game-theoretic tasks including equilibrium-solving, deviation gain estimation and ranking, with a common approach to NFG representation. We show that the resulting model is interpretable and versatile, paving the way towards deep learning systems capable of game-theoretic reasoning when interacting with humans and with each other.", "keywords": "Game Theory;Deep Learning;Representation Learning;Nash Equilibrium", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/0b4fd48a1700b50096ccdbc85285c7722631b18e.pdf", "author": "Siqi Liu;Luke Marris;Georgios Piliouras;Ian Gemp;Nicolas Heess", "authorids": "~Siqi_Liu1;~Luke_Marris2;~Georgios_Piliouras1;~Ian_Gemp1;~Nicolas_Heess1", "gender": "M;;;M;", "homepage": "http://siqi.fr/;https://www.lukemarris.info/;;https://imgemp.github.io/;", "dblp": "60/9360-2.html;223/4422;62/1236;66/10996;76/9181", "google_scholar": "7U_OA0oAAAAJ;dvTeSX4AAAAJ;;5vo3MeEAAAAJ;79k7bGEAAAAJ", "orcid": "0000-0001-6381-4552;;;;", "linkedin": ";;;;", "or_profile": "~Siqi_Liu1;~Luke_Marris2;~Georgios_Piliouras1;~Ian_Gemp1;~Nicolas_Heess1", "aff": "Google;University College London;Singapore University of Technology and Design;Google DeepMind;Google DeepMind", "aff_domain": "google.com;ucl.ac.uk;sutd.edu.sg;google.com;google.com", "position": "Research Engineer;PhD student;Associate Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nliu2024nfgtransformer,\ntitle={NfgTransformer: Equivariant Representation Learning for Normal-form Games},\nauthor={Siqi Liu and Luke Marris and Georgios Piliouras and Ian Gemp and Nicolas Heess},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4YESQqIys7}\n}", "github": "", "project": "", "reviewers": "6Yok;s4as;6Avg;pter", "pdf_size": 858335, "rating": "3;5;8;8", "confidence": "4;4;4;3", "soundness": "2;2;4;2", "contribution": "1;2;4;1", "presentation": "1;3;3;1", "wc_summary": "56;62;94;29", "wc_strengths": "17;47;66;18", "wc_weaknesses": "417;205;82;1022", "wc_questions": "42;4;57;43", "wc_review": "532;318;299;1112", "wc_reply_reviewers": "0;61;170;1031", "wc_reply_authors": "678;1015;931;1960", "reply_reviewers": "0;1;1;5", "reply_authors": "2;3;5;7", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.0, 1.224744871391589 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 60.25, 23.112496619794236 ], "wc_strengths_avg": [ 37.0, 20.627651344736268 ], "wc_weaknesses_avg": [ 431.5, 361.3699627805277 ], "wc_questions_avg": [ 36.5, 19.67866865415443 ], "wc_review_avg": [ 565.25, 328.6574014076056 ], "wc_reply_reviewers_avg": [ 315.5, 417.5586785111764 ], "wc_reply_authors_avg": [ 1146.0, 486.05709541163986 ], "reply_reviewers_avg": [ 1.75, 1.920286436967152 ], "reply_authors_avg": [ 4.25, 1.920286436967152 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16489762721706866882&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4YESQqIys7", "pdf": "https://openreview.net/pdf?id=4YESQqIys7", "email": "google.com;ucl.ac.uk;sutd.edu.sg;google.com;google.com", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Google;University College London;Singapore University of Technology and Design", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.ucl.ac.uk;https://www.sutd.edu.sg", "aff_unique_abbr": "Google;UCL;SUTD", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;2;1;1", "aff_country_unique": "United States;United Kingdom;Singapore" }, { "id": "4YK1e3Ehdy", "title": "Understanding Deep Neural Networks as Dynamical Systems: Insights into Training and Fine-tuning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper offers an interpretation mechanism for understanding deep neural networks and their learning processes from a dynamical perspective. The aim is to uncover the relationship between the representational capacity of neural networks and the dynamical properties of their corresponding dynamical systems. To this end, we first interpret neural networks as dynamical systems by representing neural weight values as relationships among neuronal dynamics. Then, we model both neural network training and inference as the dynamical phenomena occurring within these systems. Built upon this framework, we introduce the concept of dynamical discrepancy, a macroscopic attribute that describes the dynamical states of neurons. Taking the generalization capability of neural models as a starting point, we launch a hypothesis: the dynamical discrepancy of neuromorphic-dynamical systems correlates with the representational capacity of neural models. We conduct dynamics-based conversions on neural structures such as ResNet, ViT, and LLaMA to investigate this hypothesis on MNIST, ImageNet, SQuAD, and IMDB. The experimental fact reveals that the relationship between these neural models' dynamical discrepancy and representational capacity aligns perfectly with our theoretical conjecture. Building upon these findings, we introduce a universal analytical approach tailored for neural models.", "keywords": "Interpretation;Dynamical system;Expressive ability", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "/attachment/d72b32b38ab46172343f42424475889c9e12d1c9.zip", "author": "Shufan Shen;Zhengqi Pei;Shuhui Wang;Qingming Huang", "authorids": "~Shufan_Shen1;~Zhengqi_Pei1;~Shuhui_Wang1;~Qingming_Huang1", "gender": "M;M;M;", "homepage": ";;https://vipl.ict.ac.cn/people/shwang/;https://qmhuang-ucas.github.io/", "dblp": "277/0707;223/2296;37/2537;68/4388", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;Qs5zacQAAAAJ;h-JxBSYAAAAJ;https://scholar.google.com.hk/citations?user=J1vMnRgAAAAJ", "orcid": ";;0000-0002-5931-0527;", "linkedin": ";;;", "or_profile": "~Shufan_Shen1;~Zhengqi_Pei1;~Shuhui_Wang1;~Qingming_Huang2", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ucas.ac.cn;ict.ac.cn;ucas.ac.cn", "position": "PhD student;MS student;Full Professor;Full Professor", "bibtex": "@misc{\nshen2024understanding,\ntitle={Understanding Deep Neural Networks as Dynamical Systems: Insights into Training and Fine-tuning},\nauthor={Shufan Shen and Zhengqi Pei and Shuhui Wang and Qingming Huang},\nyear={2024},\nurl={https://openreview.net/forum?id=4YK1e3Ehdy}\n}", "github": "", "project": "", "reviewers": "DroU;K7Rz;YDs9;diZv;DpVF", "site": "https://openreview.net/forum?id=4YK1e3Ehdy", "pdf_size": 5740253, "rating": "1;1;3;3;5", "confidence": "2;5;3;4;3", "soundness": "2;1;2;2;2", "contribution": "2;1;2;1;2", "presentation": "1;1;1;1;2", "wc_summary": "32;67;53;97;65", "wc_strengths": "17;12;24;35;127", "wc_weaknesses": "202;281;236;165;79", "wc_questions": "12;7;20;185;50", "wc_review": "263;367;333;482;321", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 2.6, 1.4966629547095767 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 1.8, 0.4000000000000001 ], "contribution_avg": [ 1.6, 0.4898979485566356 ], "presentation_avg": [ 1.2, 0.4 ], "wc_summary_avg": [ 62.8, 21.15088650624366 ], "wc_strengths_avg": [ 43.0, 42.703629822299646 ], "wc_weaknesses_avg": [ 192.6, 68.488247166941 ], "wc_questions_avg": [ 54.8, 66.78742396589345 ], "wc_review_avg": [ 353.2, 72.60964123310347 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1572427255082878, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zFkJi5lbB04J:scholar.google.com/&scioq=Understanding+Deep+Neural+Networks+as+Dynamical+Systems:+Insights+into+Training+and+Fine-tuning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "4YgfwJBJeQ", "title": "StructChart: Perception, Structuring, Reasoning for Visual Chart Understanding", "track": "main", "status": "Reject", "tldr": "", "abstract": "Charts are common in literature across different scientific fields, conveying rich information easily accessible to readers. Current chart-related tasks focus on either chart perception which refers to extracting information from the visual charts, or performing reasoning given the extracted data, e.g. in a tabular form. In this paper, we aim to establish a unified and label-efficient learning paradigm for joint perception and reasoning tasks, which can be generally applicable to different downstream tasks, beyond the question-answering task as specifically studied in peer works. Specifically, StructChart first reformulates the chart information from the popular tubular form (specifically linearized CSV) to the proposed Structured Triplet Representations (STR), which is more friendly for reducing the task gap between chart perception and reasoning due to the employed structured information extraction for charts. We then propose a Structuring Chart-oriented Representation Metric (SCRM) to quantitatively evaluate the performance for the chart perception task. To enrich the dataset for training, we further explore the possibility of leveraging the Large Language Model (LLM), enhancing the chart diversity in terms of both chart visual style and its statistical information. Extensive experiments are conducted on various chart-related tasks, demonstrating the effectiveness and promising potential for a unified chart perception-reasoning paradigm to push the frontier of chart understanding.", "keywords": "Chart Perception;Chart Understanding;Simulation-to-Real", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/907ab34604f6a8b336c1b817ad5c2e0719042727.zip", "author": "Renqiu Xia;Bo Zhang;Haoyang Peng;Ning Liao;Peng Ye;Botian Shi;Junchi Yan;Yu Qiao", "authorids": "~Renqiu_Xia2;~Bo_Zhang17;~Haoyang_Peng1;~Ning_Liao1;~Peng_Ye4;~Botian_Shi1;~Junchi_Yan2;~Yu_Qiao1", "gender": ";M;M;M;M;M;;", "homepage": ";https://bobrown.github.io/boZhang.github.io/;https://myaccount.google.com/?hl=en;https://scholar.google.com/citations?user=6aARLhMAAAAJ&hl=zh-CN;;;;", "dblp": ";36/2259-69;;44/1117;53/930-6;245/8742;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;6aARLhMAAAAJ;UEZZP5QAAAAJ;K0PpvLkAAAAJ;;", "orcid": ";0000-0001-8052-782X;;0000-0002-3764-2555;0000-0002-8486-7562;0000-0003-3677-7252;;", "linkedin": ";;;;;friskit/;;", "or_profile": "~Renqiu_Xia2;~Bo_Zhang17;~Haoyang_Peng1;~Ning_Liao1;~Peng_Ye4;~Botian_Shi1;~Junchi_Yan2;~Yu_Qiao1", "aff": ";Shanghai Artificial Intelligence Laboratory;Fudan University;Shanghai Jiaotong University;Fudan University;Shanghai AI Lab;;", "aff_domain": ";pjlab.org.cn;fudan.edu.cn;sjtu.edu.cn;fudan.edu.cn;pjlab.org.cn;;", "position": ";Researcher;MS student;PhD student;PhD student;Researcher;;", "bibtex": "@misc{\nxia2024structchart,\ntitle={StructChart: Perception, Structuring, Reasoning for Visual Chart Understanding},\nauthor={Renqiu Xia and Bo Zhang and Haoyang Peng and Ning Liao and Peng Ye and Botian Shi and Junchi Yan and Yu Qiao},\nyear={2024},\nurl={https://openreview.net/forum?id=4YgfwJBJeQ}\n}", "github": "", "project": "", "reviewers": "mxZb;Rkaq;ojHH", "site": "https://openreview.net/forum?id=4YgfwJBJeQ", "pdf_size": 2362471, "rating": "5;6;6", "confidence": "3;5;4", "soundness": "2;3;3", "contribution": "2;4;3", "presentation": "3;3;3", "wc_summary": "121;37;83", "wc_strengths": "56;37;125", "wc_weaknesses": "390;54;50", "wc_questions": "5;4;12", "wc_review": "572;132;270", "wc_reply_reviewers": "0;22;0", "wc_reply_authors": "2068;1188;561", "reply_reviewers": "0;1;0", "reply_authors": "6;5;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.33333333333333, 34.34465832637669 ], "wc_strengths_avg": [ 72.66666666666667, 37.80946383586463 ], "wc_weaknesses_avg": [ 164.66666666666666, 159.3430959352247 ], "wc_questions_avg": [ 7.0, 3.559026084010437 ], "wc_review_avg": [ 324.6666666666667, 183.74136412057274 ], "wc_reply_reviewers_avg": [ 7.333333333333333, 10.370899457402697 ], "wc_reply_authors_avg": [ 1272.3333333333333, 618.1134379887095 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.666666666666667, 1.247219128924647 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1328798167899051335&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Fudan University;Shanghai Jiao Tong University;Shanghai AI Lab", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.shailab.org/;https://www.fudan.edu.cn;https://www.sjtu.edu.cn;https://www.shanghaiailab.com", "aff_unique_abbr": "Shanghai AI Lab;Fudan;SJTU;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "4ZcPYmFsJm", "title": "Geom-Erasing: Geometry-Driven Removal of Implicit Concept in Diffusion Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Fine-tuning diffusion models through personalized datasets is an acknowledged method for improving generation quality across downstream tasks, which, however, often inadvertently generates unintended concepts such as watermarks and QR codes, attributed to the limitations in image sources and collecting methods within specific downstream tasks. Existing solutions suffer from eliminating these unintentionally learned implicit concepts, primarily due to the dependency on the model\u2019s ability to recognize concepts that it actually cannot discern. In this work, we introduce GEOM-ERASING, a novel approach that successfully removes the implicit concepts with either an additional accessible classifier or detector model to encode geometric information of these concepts into the text domain. Moreover, we construct three distinct datasets, each imbued with specific implicit concepts (i.e., watermarks, QR codes, and text) for training and evaluation. Experimental results demonstrate that GEOM-ERASING not only identifies but also proficiently eradicates specific implicit concepts, revealing a significant improvement over the existing methods. The integration of geometric information marks a substantial progression in the precise removal of implicit concepts in diffusion models.", "keywords": "concept erasure;diffusion model;generative model", "primary_area": "generative models", "supplementary_material": "/attachment/d10fd101d2bb287e5905abd411e84fd95090b0f1.pdf", "author": "Zhili LIU;Kai Chen;Yifan Zhang;Jianhua Han;Lanqing HONG;Hang Xu;Zhenguo Li;Dit-Yan Yeung;James Kwok", "authorids": "~Zhili_LIU1;~Kai_Chen11;~Yifan_Zhang1;~Jianhua_Han1;~Lanqing_HONG1;~Hang_Xu1;~Zhenguo_Li1;~Dit-Yan_Yeung2;~James_Kwok1", "gender": "M;M;M;M;F;M;M;M;", "homepage": "https://capricious-liu.github.io/;https://kaichen1998.github.io/;https://sites.google.com/view/yifan-zhang/%E9%A6%96%E9%A1%B5;;https://racheltechie.github.io/;;http://www.ee.columbia.edu/~zgli/;https://cse.hkust.edu.hk/faculty/dyyeung/;", "dblp": "03/10297;c/KaiChen24;57/4707-4;29/6207;226/4258;;23/6479;41/5668;", "google_scholar": "FdR09jsAAAAJ;3qBfyLIAAAAJ;https://scholar.google.com.hk/citations?user=zuYIUJEAAAAJ;OEPMQEMAAAAJ;https://scholar.google.com.sg/citations?user=2p7x6OUAAAAJ;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;XboZC1AAAAAJ;nEsOOx8AAAAJ;", "orcid": ";;;;;0000-0003-3645-8972;;0000-0003-3716-8125;", "linkedin": "%E6%99%BA%E7%AB%8B-%E5%88%98-49153b167/;;;;;;;;", "or_profile": "~Zhili_LIU1;~Kai_Chen11;~Yifan_Zhang1;~Jianhua_Han1;~Lanqing_HONG1;~Hang_Xu1;~Zhenguo_Li1;~Dit-Yan_Yeung2;~James_Kwok1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;National University of Singapore;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Noah\u2018s Ark Lab;Huawei Noah's Ark Lab;Hong Kong University of Science and Technology;", "aff_domain": "hkust.edu;ust.hk;nus.edu;huawei.com;huawei.com;huawei.com;huawei.com;ust.hk;", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;Researcher;Principal Researcher;Chair Professor;", "bibtex": "@misc{\nliu2024geomerasing,\ntitle={Geom-Erasing: Geometry-Driven Removal of Implicit Concept in Diffusion Models},\nauthor={Zhili LIU and Kai Chen and Yifan Zhang and Jianhua Han and Lanqing HONG and Hang Xu and Zhenguo Li and Dit-Yan Yeung and James Kwok},\nyear={2024},\nurl={https://openreview.net/forum?id=4ZcPYmFsJm}\n}", "github": "", "project": "", "reviewers": "ci9W;3V6v;GREc;dnxP", "site": "https://openreview.net/forum?id=4ZcPYmFsJm", "pdf_size": 1568186, "rating": "3;5;5;5", "confidence": "2;3;3;4", "soundness": "3;2;3;2", "contribution": "1;2;2;2", "presentation": "3;3;4;2", "wc_summary": "44;99;60;100", "wc_strengths": "32;56;143;100", "wc_weaknesses": "60;297;112;362", "wc_questions": "32;6;46;4", "wc_review": "168;458;361;566", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.75, 24.416951079117148 ], "wc_strengths_avg": [ 82.75, 42.48161366991607 ], "wc_weaknesses_avg": [ 207.75, 125.25648685796676 ], "wc_questions_avg": [ 22.0, 17.72004514666935 ], "wc_review_avg": [ 388.25, 146.38369957068306 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=474392227487030829&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;2;2;2;0", "aff_unique_norm": "Hong Kong University of Science and Technology;National University of Singapore;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.ust.hk;https://www.nus.edu.sg;https://www.huawei.com", "aff_unique_abbr": "HKUST;NUS;Huawei", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Adaptive Instrument Design for Indirect Experiments", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19457", "id": "4Zz5UELkIt", "author_site": "Yash Chandak, Shiv Shankar, Vasilis Syrgkanis, Emma Brunskill", "tldr": "", "abstract": "Indirect experiments provide a valuable framework for estimating treatment effects in situations where conducting randomized control trials (RCTs) is impractical or unethical. Unlike RCTs, indirect experiments estimate treatment effects by leveraging (conditional) instrumental variables, enabling estimation through encouragement and recommendation rather than strict treatment assignment. However, the sample efficiency of such estimators depends not only on the inherent variability in outcomes but also on the varying compliance levels of users with the instrumental variables and the choice of estimator being used, especially when dealing with numerous instrumental variables. While adaptive experiment design has a rich literature for \\textit{direct} experiments, in this paper we take the initial steps towards enhancing sample efficiency for \\textit{indirect} experiments by adaptively designing a data collection policy over instrumental variables. Our main contribution is a practical computational procedure that utilizes influence functions to search for an optimal data collection policy, minimizing the mean-squared error of the desired (non-linear) estimator. Through experiments conducted in various domains inspired by real-world applications, we showcase how our method can significantly improve the sample efficiency of indirect experiments.", "keywords": "instrument variable;experiment design;indirect experiments;adaptive design", "primary_area": "causal reasoning", "supplementary_material": "/attachment/1797d39d1ff28743b25f3ea281884f02614a8159.zip", "author": "Yash Chandak;Shiv Shankar;Vasilis Syrgkanis;Emma Brunskill", "authorids": "~Yash_Chandak1;~Shiv_Shankar2;~Vasilis_Syrgkanis1;~Emma_Brunskill2", "gender": ";;;", "homepage": "https://yashchandak.github.io/;;https://www.vsyrgkanis.com;", "dblp": "168/8450;203/9123;;", "google_scholar": "AsgUcSEAAAAJ;;G1WMpcUAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yash_Chandak1;~Shiv_Shankar2;~Vasilis_Syrgkanis1;~Emma_Brunskill2", "aff": "Computer Science Department, Stanford University;IIT Bombay;Stanford University;", "aff_domain": "cs.stanford.edu;iitb.ac.in;stanford.edu;", "position": "Postdoc;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nchandak2024adaptive,\ntitle={Adaptive Instrument Design for Indirect Experiments},\nauthor={Yash Chandak and Shiv Shankar and Vasilis Syrgkanis and Emma Brunskill},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4Zz5UELkIt}\n}", "github": "", "project": "", "reviewers": "QTvC;7XxM;ST2k;SUta", "pdf_size": 749001, "rating": "6;6;6;8", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "contribution": "3;3;3;4", "presentation": "3;3;2;4", "wc_summary": "71;64;159;284", "wc_strengths": "41;68;56;100", "wc_weaknesses": "93;281;99;114", "wc_questions": "524;2;52;107", "wc_review": "729;415;366;605", "wc_reply_reviewers": "328;28;0;72", "wc_reply_authors": "3025;643;656;417", "reply_reviewers": "2;1;0;1", "reply_authors": "6;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 144.5, 88.81582066276256 ], "wc_strengths_avg": [ 66.25, 21.706853756359994 ], "wc_weaknesses_avg": [ 146.75, 77.88573361020617 ], "wc_questions_avg": [ 171.25, 207.01856800779973 ], "wc_review_avg": [ 528.75, 146.0656958358122 ], "wc_reply_reviewers_avg": [ 107.0, 130.14991356124673 ], "wc_reply_authors_avg": [ 1185.25, 1066.4226120539643 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16284860440980537462&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=4Zz5UELkIt", "pdf": "https://openreview.net/pdf?id=4Zz5UELkIt", "email": "cs.stanford.edu;iitb.ac.in;stanford.edu;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;Indian Institute of Technology Bombay", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.stanford.edu;https://www.iitb.ac.in", "aff_unique_abbr": "Stanford;IITB", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Stanford;Mumbai", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;India" }, { "id": "4aJg9e4nvF", "title": "What do vision transformers learn? A visual exploration", "track": "main", "status": "Reject", "tldr": "", "abstract": "Vision transformers (ViTs) are quickly becoming the de-facto architecture for computer vision, yet we understand very little about why they work and what they learn. While existing studies visually analyze the mechanisms of convolutional neural networks, an analogous exploration of ViTs remains challenging. In this paper, we first address the obstacles to performing visualizations on ViTs. Assisted by these solutions, we observe that neurons in ViTs trained with language model supervision (e.g., CLIP) are activated by semantic concepts rather than visual features. We also explore the underlying differences between ViTs and CNNs, and we find that transformers detect image background features, just like their convolutional counterparts, but their predictions depend far less on high-frequency information. On the other hand, both architecture types behave similarly in the way features progress from abstract patterns in early layers to concrete objects in late layers. In addition, we show that ViTs maintain spatial information in all layers except the final layer. In contrast to previous works, we show that the last layer most likely discards the spatial information and behaves as a learned global pooling operation. Finally, we conduct large-scale visualizations on a wide range of ViT variants, including DeiT, CoaT, ConViT, PiT, Swin, and Twin, to validate the effectiveness of our method.", "keywords": "Interpretability;Visualization;Vision Transformers;Computer Vision;Deep Learning", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Hamid Kazemi;Amin Ghiasi;Eitan Borgnia;Steven Reich;Manli Shu;Micah Goldblum;Andrew Gordon Wilson;Tom Goldstein", "authorids": "~Hamid_Kazemi1;~Amin_Ghiasi1;~Eitan_Borgnia1;~Steven_Reich1;~Manli_Shu1;~Micah_Goldblum1;~Andrew_Gordon_Wilson1;~Tom_Goldstein1", "gender": "M;M;M;M;F;;Not Specified;M", "homepage": ";http://cs.umd.edu/~amin;https://eitanborgnia.com;https://www.cs.umd.edu/people/sreich47;https://azshue.github.io/;;https://cims.nyu.edu/~andrewgw;https://www.cs.umd.edu/~tomg/", "dblp": ";239/8313;;;263/3503;241/7231;65/10453;25/8184", "google_scholar": "7hNdaGQAAAAJ;tNQWOxUAAAAJ;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=en;pGDKzuUAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;;;;;", "linkedin": "hamid-kazemi-608a8085/;;;;manli-shu-a804a8164/;;;", "or_profile": "~Hamid_Kazemi1;~Amin_Ghiasi1;~Eitan_Borgnia1;~Steven_Reich1;~Manli_Shu1;~Micah_Goldblum1;~Andrew_Gordon_Wilson1;~Tom_Goldstein1", "aff": "University of Maryland, College Park;Apple;University of Chicago;;Department of Computer Science, University of Maryland, College Park;New York University;New York University;University of Maryland, College Park", "aff_domain": "umd.edu;apple.com;uchicago.edu;;cs.umd.edu;nyu.edu;nyu.edu;umd.edu", "position": "PhD student;Researcher;PhD student;;PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@misc{\nkazemi2024what,\ntitle={What do vision transformers learn? A visual exploration},\nauthor={Hamid Kazemi and Amin Ghiasi and Eitan Borgnia and Steven Reich and Manli Shu and Micah Goldblum and Andrew Gordon Wilson and Tom Goldstein},\nyear={2024},\nurl={https://openreview.net/forum?id=4aJg9e4nvF}\n}", "github": "", "project": "", "reviewers": "ESvq;TawR;f2Nw;Y5ru", "site": "https://openreview.net/forum?id=4aJg9e4nvF", "pdf_size": 32891953, "rating": "3;5;5;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "contribution": "1;2;2;3", "presentation": "2;2;3;3", "wc_summary": "71;92;60;120", "wc_strengths": "28;46;159;68", "wc_weaknesses": "198;208;79;74", "wc_questions": "18;27;206;23", "wc_review": "315;373;504;285", "wc_reply_reviewers": "0;103;0;100", "wc_reply_authors": "626;698;542;294", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.75, 22.873292285982796 ], "wc_strengths_avg": [ 75.25, 50.38538974742579 ], "wc_weaknesses_avg": [ 139.75, 63.3733974156349 ], "wc_questions_avg": [ 68.5, 79.44966960283725 ], "wc_review_avg": [ 369.25, 83.98325726000391 ], "wc_reply_reviewers_avg": [ 50.75, 50.76108253376793 ], "wc_reply_authors_avg": [ 540.0, 152.38110119040354 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6842742910784796915&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;4;4;0", "aff_unique_norm": "University of Maryland;Apple;University of Chicago;University of Maryland, College Park;New York University", "aff_unique_dep": ";Apple Inc.;;Department of Computer Science;", "aff_unique_url": "https://www/umd.edu;https://www.apple.com;https://www.uchicago.edu;https://www/umd.edu;https://www.nyu.edu", "aff_unique_abbr": "UMD;Apple;UChicago;UMD;NYU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Tackling the Data Heterogeneity in Asynchronous Federated Learning with Cached Update Calibration", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19456", "id": "4aywmeb97I", "author_site": "Yujia Wang, Yuanpu Cao, Jingcheng Wu, Ruoyu Chen, Jinghui Chen", "tldr": "", "abstract": "Asynchronous federated learning, which enables local clients to send their model update asynchronously to the server without waiting for others, has recently emerged for its improved efficiency and scalability over traditional synchronized federated learning. In this paper, we study how the asynchronous delay affects the convergence of asynchronous federated learning under non-i.i.d. distributed data across clients. Through the theoretical convergence analysis of one representative asynchronous federated learning algorithm under standard nonconvex stochastic settings, we show that the asynchronous delay can largely slow down the convergence, especially with high data heterogeneity. To further improve the convergence of asynchronous federated learning under heterogeneous data distributions, we propose a novel asynchronous federated learning method with a cached update calibration. Specifically, we let the server cache the latest update for each client and reuse these variables for calibrating the global update at each round. We theoretically prove the convergence acceleration for our proposed method under nonconvex stochastic settings. Extensive experiments on several vision and language tasks demonstrate our superior performances compared to other asynchronous federated learning baselines.", "keywords": "Federated learning;Nonconvex optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Yujia Wang;Yuanpu Cao;Jingcheng Wu;Ruoyu Chen;Jinghui Chen", "authorids": "~Yujia_Wang3;~Yuanpu_Cao1;~Jingcheng_Wu1;~Ruoyu_Chen4;~Jinghui_Chen1", "gender": ";M;M;M;M", "homepage": "https://yujiaw98.github.io/;;https://www.lti.cs.cmu.edu/people/222227896/jingcheng-wu;https://github.com/cryttx;https://jinghuichen.github.io/", "dblp": ";243/0230;;;67/5633", "google_scholar": "0DwROiMAAAAJ;F5S2bO8AAAAJ;;;mKia7Y4AAAAJ", "orcid": ";;;;", "linkedin": ";yuanpu-cao-a392751b2/;;;", "or_profile": "~Yujia_Wang3;~Yuanpu_Cao1;~Jingcheng_Wu1;~Ruoyu_Chen4;~Jinghui_Chen1", "aff": "Pennsylvania State University;Pennsylvania State University;;;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;;;psu.edu", "position": "PhD student;PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\nwang2024tackling,\ntitle={Tackling the Data Heterogeneity in Asynchronous Federated Learning with Cached Update Calibration},\nauthor={Yujia Wang and Yuanpu Cao and Jingcheng Wu and Ruoyu Chen and Jinghui Chen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4aywmeb97I}\n}", "github": "", "project": "", "reviewers": "xHG4;yUBQ;3At2", "pdf_size": 777743, "rating": "6;8;8", "confidence": "3;4;5", "soundness": "3;3;3", "contribution": "3;3;3", "presentation": "2;4;3", "wc_summary": "174;66;111", "wc_strengths": "93;46;82", "wc_weaknesses": "99;128;87", "wc_questions": "145;243;28", "wc_review": "511;483;308", "wc_reply_reviewers": "34;65;12", "wc_reply_authors": "858;1455;519", "reply_reviewers": "1;1;1", "reply_authors": "3;4;2", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 117.0, 44.294469180700204 ], "wc_strengths_avg": [ 73.66666666666667, 20.07209228976613 ], "wc_weaknesses_avg": [ 104.66666666666667, 17.21110752456745 ], "wc_questions_avg": [ 138.66666666666666, 87.88755442167123 ], "wc_review_avg": [ 434.0, 89.82575725629407 ], "wc_reply_reviewers_avg": [ 37.0, 21.740898478827106 ], "wc_reply_authors_avg": [ 944.0, 386.9289340434494 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14121697309955840422&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=4aywmeb97I", "pdf": "https://openreview.net/pdf?id=4aywmeb97I", "email": "psu.edu;psu.edu;;;psu.edu", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "EMO: EARTH MOVER DISTANCE OPTIMIZATION FOR AUTO-REGRESSIVE LANGUAGE MODELING", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19455", "id": "4bLXfRd0CX", "author_site": "Siyu Ren, Zhiyong Wu, Kenny Zhu", "tldr": "", "abstract": "Neural language models are probabilistic models of human text. They are predominantly trained using maximum likelihood estimation (MLE), which is equivalent to minimizing the forward cross-entropy between the empirical data distribution and the model distribution. However, various degeneration phenomena are still widely observed when decoding from the distributions learned by such models. We establish that the forward cross-entropy is suboptimal as a distance metric for aligning human and model distribution due to its (1) recall-prioritization (2) negative diversity ignorance and (3) train-test mismatch. In this paper, we propose Earth Mover Distance Optimization (EMO) for auto-regressive language modeling. EMO capitalizes on the inherent properties of earth mover distance to address the aforementioned challenges. Due to the high complexity of direct computation, we further introduce a feasible upper bound for EMO to ease end-to-end training. Upon extensive evaluation of language models trained using EMO and MLE. We find that EMO demonstrates a consistently better language modeling performance than MLE across domains. Moreover, EMO demonstrates noteworthy enhancements in downstream performance with minimal fine-tuning on merely 25,000 sentences. This highlights the tremendous potential of EMO as a lightweight calibration method for enhancing large-scale pre-trained language models.", "keywords": "language modeling;earth mover distance;language generation;language understanding", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Siyu Ren;Zhiyong Wu;Kenny Q. Zhu", "authorids": "~Siyu_Ren1;~Zhiyong_Wu3;~Kenny_Q._Zhu1", "gender": "M;;M", "homepage": "https://drsy.github.io/;;http://www.cs.sjtu.edu.cn/~kzhu/", "dblp": ";;z/KennyQiliZhu", "google_scholar": "jkJDyrkAAAAJ;;https://scholar.google.com.tw/citations?user=ZIRJ6lIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Siyu_Ren1;~Zhiyong_Wu3;~Kenny_Q._Zhu1", "aff": "Shanghai Jiaotong University;;University of Texas at Arlington", "aff_domain": "sjtu.edu.cn;;uta.edu", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nren2024emo,\ntitle={{EMO}: {EARTH} {MOVER} {DISTANCE} {OPTIMIZATION} {FOR} {AUTO}-{REGRESSIVE} {LANGUAGE} {MODELING}},\nauthor={Siyu Ren and Zhiyong Wu and Kenny Q. Zhu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4bLXfRd0CX}\n}", "github": "", "project": "", "reviewers": "XE26;JZqx;TjsD;2RWM", "pdf_size": 640517, "rating": "3;6;6;8", "confidence": "4;4;4;4", "soundness": "1;4;3;4", "contribution": "2;2;3;3", "presentation": "3;4;3;4", "wc_summary": "68;44;56;102", "wc_strengths": "11;80;36;22", "wc_weaknesses": "235;109;147;96", "wc_questions": "1;86;33;25", "wc_review": "315;319;272;245", "wc_reply_reviewers": "765;161;201;0", "wc_reply_authors": "1674;1045;1156;442", "reply_reviewers": "3;3;2;0", "reply_authors": "3;4;3;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 1.224744871391589 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 67.5, 21.650635094610966 ], "wc_strengths_avg": [ 37.25, 26.223796445213647 ], "wc_weaknesses_avg": [ 146.75, 54.28800512083678 ], "wc_questions_avg": [ 36.25, 31.04331651096577 ], "wc_review_avg": [ 287.75, 30.80077109424373 ], "wc_reply_reviewers_avg": [ 281.75, 288.97004602553534 ], "wc_reply_authors_avg": [ 1079.25, 437.8580677571215 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12705620678870906094&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=4bLXfRd0CX", "pdf": "https://openreview.net/pdf?id=4bLXfRd0CX", "email": "sjtu.edu.cn;;uta.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Shanghai Jiao Tong University;University of Texas at Arlington", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.uta.edu", "aff_unique_abbr": "SJTU;UTA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Arlington", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "Going Beyond Neural Network Feature Similarity: The Network Feature Complexity and Its Interpretation Using Category Theory", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19454", "id": "4bSQ3lsfEV", "author_site": "Yiting Chen, Zhanpeng Zhou, Junchi Yan", "tldr": "", "abstract": "The behavior of neural networks still remains opaque, and a recently widely noted phenomenon is that networks often achieve similar performance when initialized with different random parameters. This phenomenon has attracted significant attention in measuring the similarity between features learned by distinct networks. However, feature similarity could be vague in describing the same feature since equivalent features hardly exist. In this paper, we expand the concept of equivalent feature and provide the definition of what we call *functionally equivalent features*. These features produce equivalent output under certain transformations. \nUsing this definition, we aim to derive a more intrinsic metric for the so-called *feature complexity* regarding the redundancy of features learned by a neural network at each layer. We offer a formal interpretation of our approach through the lens of category theory, a well-developed area in mathematics. To quantify the feature complexity, we further propose an efficient algorithm named Iterative Feature Merging. Our experimental results validate our ideas and theories from various perspectives. We empirically demonstrate that the functionally equivalence widely exists among different features learned by the same neural network and we could reduce the number of parameters of the network without affecting the performance. We have also drawn several interesting empirical findings, including: \n1) the larger the network, the more redundant features it learns; 2) in particular, we show how to prune the networks based on our finding using direct equivalent feature merging, without fine-tuning which is often needed in peer network pruning methods; 3) same structured networks with higher feature complexity achieve better performance; 4) through the layers of a neural network, the feature complexity first increase then decrease; 5) for the image classification task, a group of functionally equivalent features may correspond to a specific semantic meaning. Source code will be made publicly available.", "keywords": "Category Theory; Neural Networks Feature Complexity", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Yiting Chen;Zhanpeng Zhou;Junchi Yan", "authorids": "~Yiting_Chen1;~Zhanpeng_Zhou1;~Junchi_Yan2", "gender": "M;M;M", "homepage": "https://ytchen981.github.io/;https://zzp1012.github.io/;http://thinklab.sjtu.edu.cn/", "dblp": "135/6971;;60/7949.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;idxXY3UAAAAJ;ga230VoAAAAJ", "orcid": ";;0000-0001-9639-7679", "linkedin": ";;", "or_profile": "~Yiting_Chen1;~Zhanpeng_Zhou1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2024going,\ntitle={Going Beyond Neural Network Feature Similarity: The Network Feature Complexity and Its Interpretation Using Category Theory},\nauthor={Yiting Chen and Zhanpeng Zhou and Junchi Yan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4bSQ3lsfEV}\n}", "github": "", "project": "", "reviewers": "R8Xc;QeEM;oxNv;Po5f", "pdf_size": 1034460, "rating": "3;6;6;8", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "contribution": "1;2;3;3", "presentation": "2;2;3;3", "wc_summary": "92;63;228;87", "wc_strengths": "51;50;54;92", "wc_weaknesses": "219;124;263;163", "wc_questions": "112;127;17;23", "wc_review": "474;364;562;365", "wc_reply_reviewers": "193;24;0;0", "wc_reply_authors": "1528;842;591;505", "reply_reviewers": "1;1;0;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 117.5, 64.73214039408862 ], "wc_strengths_avg": [ 61.75, 17.52676524633111 ], "wc_weaknesses_avg": [ 192.25, 52.99705180479382 ], "wc_questions_avg": [ 69.75, 50.07681599303214 ], "wc_review_avg": [ 441.25, 82.8171932632349 ], "wc_reply_reviewers_avg": [ 54.25, 80.70432144563264 ], "wc_reply_authors_avg": [ 866.5, 401.48630113616576 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16979939216988961962&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4bSQ3lsfEV", "pdf": "https://openreview.net/pdf?id=4bSQ3lsfEV", "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "4bUeP3qrNu", "title": "A Systematic Comparison of Syllogistic Reasoning in Humans and Language Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "A central component of rational behavior is logical inference: the process of determining which conclusions follow from a set of premises. Psychologists have documented several ways in which humans' inferences deviate from the rules of logic. Do language models, which are trained on text generated by humans, replicate these biases, or are they able to overcome them? Focusing on the case of syllogisms -inferences from two simple premises, which have been studied extensively in psychology - we show that larger models are more logical than smaller ones, and also more logical than humans. At the same time, even the largest models make systematic errors, some of which mirror human reasoning biases such as ordering effects and logical fallacies. Overall, we find that language models mimic the human biases included in their training data, but are able to overcome them in some cases.", "keywords": "Language models;Logic;Reasoning;Syllogisms;Cognitive Science", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Tiwalayo Eisape;Michael Henry Tessler;Fei Sha;Ishita Dasgupta;Sjoerd van Steenkiste;Tal Linzen", "authorids": "~Tiwalayo_Eisape1;~Michael_Henry_Tessler1;~Fei_Sha3;~Ishita_Dasgupta1;~Sjoerd_van_Steenkiste1;~Tal_Linzen1", "gender": ";M;;M;M;M", "homepage": "https://eisape.github.io/;https://www.mit.edu/~tessler/;;http://www.sjoerdvansteenkiste.com/;http://tallinzen.net;http://feisha.org", "dblp": ";;169/6218;183/9326;169/3438;13/3601", "google_scholar": ";DQjm2rAAAAAJ;;i-AStBYAAAAJ;5mJDXjoAAAAJ;HDHOS0QAAAAJ", "orcid": ";;;;;", "linkedin": "eisape/;;idasgupta6/;;;", "or_profile": "~Tiwalayo_Eisape1;~Michael_Henry_Tessler1;~Ishita_Dasgupta1;~Sjoerd_van_Steenkiste1;~Tal_Linzen1;~Fei_Sha2", "aff": "Massachusetts Institute of Technology;Google DeepMind;Google DeepMind;Google;Google;Google", "aff_domain": "mit.edu;deepmind.com;deepmind.com;google.com;google.com;google.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;research scientist", "bibtex": "@misc{\neisape2024a,\ntitle={A Systematic Comparison of Syllogistic Reasoning in Humans and Language Models},\nauthor={Tiwalayo Eisape and Michael Henry Tessler and Fei Sha and Ishita Dasgupta and Sjoerd van Steenkiste and Tal Linzen},\nyear={2024},\nurl={https://openreview.net/forum?id=4bUeP3qrNu}\n}", "github": "", "project": "", "reviewers": "Q1gw;acDo;Wr2g;hrcw", "site": "https://openreview.net/forum?id=4bUeP3qrNu", "pdf_size": 3784401, "rating": "3;3;5;6", "confidence": "4;4;3;3", "soundness": "1;2;4;3", "contribution": "3;2;2;3", "presentation": "4;3;3;3", "wc_summary": "42;70;53;119", "wc_strengths": "23;58;64;66", "wc_weaknesses": "179;178;89;180", "wc_questions": "5;65;2;93", "wc_review": "249;371;208;458", "wc_reply_reviewers": "187;0;0;0", "wc_reply_authors": "500;942;326;804", "reply_reviewers": "1;0;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.0, 29.45335295004628 ], "wc_strengths_avg": [ 52.75, 17.426631917843448 ], "wc_weaknesses_avg": [ 156.5, 38.977557645393844 ], "wc_questions_avg": [ 41.25, 39.04084399702445 ], "wc_review_avg": [ 321.5, 99.02146232004453 ], "wc_reply_reviewers_avg": [ 46.75, 80.97337525384502 ], "wc_reply_authors_avg": [ 643.0, 243.03291958086666 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1488268212595618188&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://web.mit.edu;https://deepmind.com", "aff_unique_abbr": "MIT;DeepMind", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "4bat0pSQBq", "title": "FLOOD SIMULATION WITH PHYSICS-INFORMED MESSAGE PASSING", "track": "main", "status": "Reject", "tldr": "", "abstract": "Flood modeling is an important tool for supporting preventive and emergency\nmeasures to mitigate flood risks. Recently, there has been an increasing interest\nin exploring machine learning-based models as an alternative to traditional hydrodynamic models for flood simulation to address challenges such as scalability and accuracy. However, current ML approaches are ineffective at modeling early stages of flooding events, limiting their ability to simulate the entire evolution of the flood. Another key challenge is how to incorporate physics domain-knowledge into these data-driven models. In this paper, we address these challenges by introducing a physics-inspired graph neural network for flood simulation. Given a (geographical) region and precipitation data, our model predicts water depths in an autoregressive fashion. We propose a message-passing framework inspired by the conservation of momentum and mass expressed in the shallow-water equations, which describe the physical process of a flooding event. Empirical results on a dataset covering 9 regions and 7 historical precipitation events demonstrate that our model outperforms the best baseline, and is able to capture the propagation of water flow better, especially at the very early stage of the flooding event.", "keywords": "Physics-informed GNN;flood simulation;PDEs", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Arnold Kazadi;James Doss-Gollin;Arlei Lopes da Silva", "authorids": "~Arnold_Kazadi1;jdossgollin@rice.edu;~Arlei_Lopes_da_Silva1", "gender": ";;M", "homepage": "https://kanz76.github.io/;;https://cs.rice.edu/~al110/index.html", "dblp": ";;19/2546", "google_scholar": "8K_HYF8AAAAJ;;atGtis4AAAAJ", "orcid": "0000-0002-9690-5212;;0000-0003-1792-0076", "linkedin": ";;", "or_profile": "~Arnold_Kazadi1;jdossgollin@rice.edu;~Arlei_Lopes_da_Silva1", "aff": "Rice University;;Rice University", "aff_domain": "rice.edu;;rice.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@misc{\nkazadi2024flood,\ntitle={{FLOOD} {SIMULATION} {WITH} {PHYSICS}-{INFORMED} {MESSAGE} {PASSING}},\nauthor={Arnold Kazadi and James Doss-Gollin and Arlei Lopes da Silva},\nyear={2024},\nurl={https://openreview.net/forum?id=4bat0pSQBq}\n}", "github": "", "project": "", "reviewers": "rnbK;j8ny;g1kJ;iyD6", "site": "https://openreview.net/forum?id=4bat0pSQBq", "pdf_size": 6725532, "rating": "3;5;5;5", "confidence": "3;4;4;4", "soundness": "3;2;2;3", "contribution": "2;3;2;2", "presentation": "3;3;4;3", "wc_summary": "137;99;193;110", "wc_strengths": "69;68;72;125", "wc_weaknesses": "237;129;385;591", "wc_questions": "90;145;209;236", "wc_review": "533;441;859;1062", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "428;688;794;776", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 134.75, 36.36189626518397 ], "wc_strengths_avg": [ 83.5, 24.005207768315607 ], "wc_weaknesses_avg": [ 335.5, 173.25919888998678 ], "wc_questions_avg": [ 170.0, 56.79348554191757 ], "wc_review_avg": [ 723.75, 249.51891210888203 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 671.5, 146.19422013198744 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:82NQ0Rn2MSwJ:scholar.google.com/&scioq=FLOOD+SIMULATION+WITH+PHYSICS-INFORMED+MESSAGE+PASSING&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Rice University", "aff_unique_dep": "", "aff_unique_url": "https://www.rice.edu", "aff_unique_abbr": "Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "4cUuvzIxOW", "title": "Unveiling the Potential of Probabilistic Embeddings in Self-Supervised Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In recent years, self-supervised learning has played a pivotal role in advancing machine learning by allowing models to acquire meaningful representations from unlabeled data. An intriguing research avenue involves developing self-supervised models within an information-theoretic framework, but many studies often deviate from the stochasticity assumptions made when deriving their objectives. To gain deeper insights into this issue, we propose to explicitly model the representation with stochastic embeddings and assess their effects on performance, information compression and potential for out-of-distribution detection. From an information-theoretic perspective, we seek to unravel the relationships between stochastic embeddings and the regularization mechanisms of Barlow Twins and VICReg and investigate the impact of probabilistic modeling on the information bottleneck, shedding light on a trade-off between compression and preservation of relevant information in both representation and loss space. Emphasizing the importance of distinguishing between these two spaces, we demonstrate how constraining one can affect the other, potentially leading to performance degradation. Moreover, our findings suggest that introducing an additional bottleneck in the loss space can significantly enhance the ability to detect out-of-distribution examples, only leveraging either representation features or the variance of their underlying distribution.", "keywords": "self-supervised learning;information bottleneck;probabilistic embeddings;computer vision", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Denis Janiak;Jakub Binkowski;Piotr Bielak;Tomasz Jan Kajdanowicz", "authorids": "~Denis_Janiak1;~Jakub_Binkowski1;~Piotr_Bielak1;~Tomasz_Jan_Kajdanowicz1", "gender": "M;M;;M", "homepage": ";;https://piotrbielak.com/;http://www.kajdanowicz.com", "dblp": "306/8791;306/9925;;74/608", "google_scholar": "ICTlqh0AAAAJ;yFzn0owAAAAJ;https://scholar.google.ca/citations?user=Z0lkjn0AAAAJ;GOoaHHEAAAAJ", "orcid": ";0000-0001-7386-5150;;0000-0002-8417-1012", "linkedin": "denis-janiak-4a7b7a159/;;;kajdanowicz", "or_profile": "~Denis_Janiak1;~Jakub_Binkowski1;~Piotr_Bielak1;~Tomasz_Jan_Kajdanowicz1", "aff": "Wroclaw University of Science and Technology;Technical University of Wroclaw;Wroclaw University of Science and Technology;Wroclaw University of Science and Technology", "aff_domain": "pwr.edu.pl;pwr.edu.pl;pwr.edu.pl;pwr.edu.pl", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@misc{\njaniak2024unveiling,\ntitle={Unveiling the Potential of Probabilistic Embeddings in Self-Supervised Learning},\nauthor={Denis Janiak and Jakub Binkowski and Piotr Bielak and Tomasz Jan Kajdanowicz},\nyear={2024},\nurl={https://openreview.net/forum?id=4cUuvzIxOW}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=4cUuvzIxOW", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-OjeBM2rFT0J:scholar.google.com/&scioq=Unveiling+the+Potential+of+Probabilistic+Embeddings+in+Self-Supervised+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Wroclaw University of Science and Technology;Wroclaw University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.pwr.edu.pl;https://www.pwr.edu.pl", "aff_unique_abbr": "WUST;WUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Poland" }, { "id": "4d32Ufnoxb", "title": "Enhancing Graph Tasks with a Dual-Block Graph Transformer: A Synergistic Approach to Local and Global Attention", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this work, we address the limitations of traditional Transformers in graph tasks. While some approaches predominantly leverage local attention mechanisms akin to Graph Neural Networks (GNNs), often neglecting the global attention capabilities inherent in the Transformer model. Conversely, other methods overly focus on the global attention aspect of the Transformer, ignoring the importance of local attention mechanisms in the context of graph structure. To this end, we propose a novel Message Passing Transformer with strategic modifications to the original Transformer, significantly enhancing its performance on graph tasks by improving the handling of local attention. Building on this, we further propose a novel Dual-Block Graph Transformer that synergistically integrates local and global attention mechanisms. This architecture comprises two distinct blocks inside each head: the Message Passing Block, designed to emulate local attention, and a second block that encapsulates the global attention mechanism. This dual-block design inside each head enables our model to capture both fine-grained local and high-level global interactions in graph tasks, leading to a more comprehensive and robust graph representation. We empirically validate our model on node classification tasks, particularly on heterophilic graphs, and graph classification tasks. The results demonstrate that our Dual-Block Graph Transformer significantly outperforms both GNN and Graph Transformer models. Remarkably, this superior performance is achieved without the necessity for complex positional encoding strategies, underscoring the efficacy of our approach.", "keywords": "Graph Transformer;Transformer;Graph Learning;Semi-supervised", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Shihan Guan;Lei Song;Xin Chen;Yueying Li;Qinghua Si;Laurent Albera;R\u00e9gine LE BOUQUIN JEANN\u00c8S;Youyong Kong;Huazhong Shu", "authorids": "~Shihan_Guan1;~Lei_Song1;~Xin_Chen33;~Yueying_Li2;~Qinghua_Si1;~Laurent_Albera1;~R\u00e9gine_LE_BOUQUIN_JEANN\u00c8S1;~Youyong_Kong1;~Huazhong_Shu1", "gender": "M;M;M;F;M;;F;M;M", "homepage": "https://orcid.org/0000-0003-3563-5308;;;;https://github.com/smqh2165;https://perso.univ-rennes1.fr/laurent.albera/;https://esir.univ-rennes.fr/node/58;https://cse.seu.edu.cn/2023/1024/c23024a469537/page.htm;https://cse.seu.edu.cn/", "dblp": ";;;https://dblp.org/rec/journals/kbs/LiLZZ21;;;;154/7641;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;;;;;;Fsc-Sa4AAAAJ", "orcid": "0000-0003-3563-5308;0000-0003-2044-3311;0000-0001-5772-9917;0000-0002-5327-8511;;;0000-0002-4050-2895;;", "linkedin": ";;;;;;;;", "or_profile": "~Shihan_Guan1;~Lei_Song1;~Xin_Chen33;~Yueying_Li2;~Qinghua_Si1;~Laurent_Albera1;~R\u00e9gine_LE_BOUQUIN_JEANN\u00c8S1;~Youyong_Kong1;~Huazhong_Shu1", "aff": "Southeast University;Southeast University;Southeast University;Southeast University;Southeast University;University of Rennes;Universit\u00e9 de Rennes, France;Southeast University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;univ-rennes.fr;univ-rennes.fr;seu.edu.cn;seu.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;MS student;Full Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nguan2024enhancing,\ntitle={Enhancing Graph Tasks with a Dual-Block Graph Transformer: A Synergistic Approach to Local and Global Attention},\nauthor={Shihan Guan and Lei Song and Xin Chen and Yueying Li and Qinghua Si and Laurent Albera and R{\\'e}gine LE BOUQUIN JEANN{\\`E}S and Youyong Kong and Huazhong Shu},\nyear={2024},\nurl={https://openreview.net/forum?id=4d32Ufnoxb}\n}", "github": "", "project": "", "reviewers": "PHGs;NDKD;5j2q;wt2P;GViT", "site": "https://openreview.net/forum?id=4d32Ufnoxb", "pdf_size": 1296110, "rating": "3;3;3;5;5", "confidence": "5;5;4;4;4", "soundness": "3;2;2;2;3", "contribution": "2;1;2;2;2", "presentation": "3;2;3;3;3", "wc_summary": "97;105;48;61;70", "wc_strengths": "74;34;27;25;23", "wc_weaknesses": "315;94;87;86;95", "wc_questions": "3;208;7;45;57", "wc_review": "489;441;169;217;245", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 3.8, 0.9797958971132712 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 1.8, 0.4000000000000001 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 76.2, 21.572204337990126 ], "wc_strengths_avg": [ 36.6, 19.06410239166796 ], "wc_weaknesses_avg": [ 135.4, 89.87235392488616 ], "wc_questions_avg": [ 64.0, 74.98799903984637 ], "wc_review_avg": [ 312.2, 128.00999960940553 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.6666666666666665, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bgMoNg1HgoAJ:scholar.google.com/&scioq=Enhancing+Graph+Tasks+with+a+Dual-Block+Graph+Transformer:+A+Synergistic+Approach+to+Local+and+Global+Attention&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;1;2;0;0", "aff_unique_norm": "Southeast University;University of Rennes;Universit\u00e9 de Rennes", "aff_unique_dep": ";;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.univ-rennes1.fr;https://www.univ-rennes1.fr", "aff_unique_abbr": "SEU;UR1;UR1", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;1;0;0", "aff_country_unique": "China;France" }, { "id": "4dw16l4iqC", "title": "Adapting ConvNets for New Cameras without Retraining", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In the vast majority of research, it is assumed images will be perspective or can be rectified to a perspective projection. However, in many applications it is beneficial to use non conventional cameras, such as fisheye cameras, that have a larger field of view (FOV). The issue arises that these large FOV images can't be rectified to a perspective projection without significant cropping of the original image. To address this issue we propose Rectify Convolutions (RectConv); a new approach for adapting pre-trained convolutional networks to operate with new non-perspective images, without any retraining. Replacing the convolutional layers of the network with RectConv layers allows the network to see both rectified patches and the entire FOV. We demonstrate RectConv adapting multiple pre-trained networks to perform segmentation and detection on fisheye imagery from two publicly available datasets. Our method shows improved results over both direct application of the network and naive pre-rectification of imagery. Our approach requires no additional data or training, and we develop a software tool that transforms existing pre-trained networks to operate on new camera geometries. We believe this work is a significant step toward adapting the vast resources available for perspective images to operate across a broad range of camera geometries. Code available upon acceptance.", "keywords": "Convolutional Networks;Pretrained;Wide FOV;Fisheye;Segmentation;Rectification", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Ryan Griffiths;Donald G. Dansereau", "authorids": "~Ryan_Griffiths1;~Donald_G._Dansereau1", "gender": "M;M", "homepage": "https://ryanbgriffiths.github.io;http://roboticimaging.org", "dblp": ";27/5079G", "google_scholar": "OfqJfm8AAAAJ;MVfxch0AAAAJ", "orcid": ";0000-0003-2540-1639", "linkedin": ";donald-dansereau/", "or_profile": "~Ryan_Griffiths1;~Donald_Gilbert_Dansereau1", "aff": "University of Sydney;University of Sydney", "aff_domain": "usyd.edu.au;sydney.edu.au", "position": "PhD student;Senior Lecturer", "bibtex": "@misc{\ngriffiths2024adapting,\ntitle={Adapting ConvNets for New Cameras without Retraining},\nauthor={Ryan Griffiths and Donald G. Dansereau},\nyear={2024},\nurl={https://openreview.net/forum?id=4dw16l4iqC}\n}", "github": "", "project": "", "reviewers": "v97K;Vp9L;v5En;Qoap", "site": "https://openreview.net/forum?id=4dw16l4iqC", "pdf_size": 25776189, "rating": "1;3;5;5", "confidence": "4;5;5;4", "soundness": "2;3;3;2", "contribution": "2;1;2;3", "presentation": "2;3;3;3", "wc_summary": "104;112;63;162", "wc_strengths": "16;28;66;70", "wc_weaknesses": "174;267;334;182", "wc_questions": "23;71;23;77", "wc_review": "317;478;486;491", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 110.25, 35.187888541371734 ], "wc_strengths_avg": [ 45.0, 23.430749027719962 ], "wc_weaknesses_avg": [ 239.25, 65.73193668225515 ], "wc_questions_avg": [ 48.5, 25.588083163847973 ], "wc_review_avg": [ 443.0, 72.89375830618147 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:f4fcP470nUwJ:scholar.google.com/&scioq=Adapting+ConvNets+for+New+Cameras+without+Retraining&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.sydney.edu.au", "aff_unique_abbr": "USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "id": "4e0ItHjNo9", "title": "Rethinking Counterfactual Fairness: On Which Individuals to Enforce, and How?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Fairness in human and algorithmic decision-making is crucial in areas such as criminal justice, education, and social welfare. Recently, counterfactual fairness has drawn increasing research interest, suggesting that decision-making for individuals should remain the same when intervening with different values on the protected attributes. Nevertheless, the question of \"which attributes and individuals should be protected\" is rarely discussed in the existing counterfactual fairness literature. For example, when considering leg disability as a protected attribute, the algorithms should not treat individuals with leg disabilities differently in college admissions, but one may naturally take into this factor for the purpose of selecting runner athletes. In other words, when and how to enforce fairness is expected to depend on the causal relation between the protected attribute and the outcome of interest. Formally, this paper proposes principal counterfactual fairness using the concept of principal stratification from the causal inference literature, focusing on whether an algorithm is counterfactually fair for individuals whose protected attribute has no individual causal effect on the outcome of interest. To examine whether an algorithm satisfies principal counterfactual fairness, we derive the statistical bounds, and propose a post-processing approach to achieving principal counterfactual fairness with minimal individual decision changes. Experiments are conducted using synthetic and real-world datasets to verify the effectiveness of our methods.", "keywords": "counterfactual fairness;fairness;causal effect;principal stratification", "primary_area": "causal reasoning", "supplementary_material": "", "author": "Haoxuan Li;Chunyuan Zheng;Zhichao Jiang;Zhuangyan Fang;Yue Liu;Zhi Geng;Kun Zhang", "authorids": "~Haoxuan_Li6;~Chunyuan_Zheng1;~Zhichao_Jiang2;~Zhuangyan_Fang1;~Yue_Liu9;~Zhi_Geng1;~Kun_Zhang1", "gender": "M;M;M;M;;M;M", "homepage": "https://haoxuanli-pku.github.io/;;https://zhichaoj-git.github.io;https://www.semanticscholar.org/author/Zhuangyan-Fang/1382582145;;https://stxy.btbu.edu.cn/szdw/bssds/34339356074b408c8650309f05f24558.htm;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "145/4965-1.html;;;https://dblp.uni-trier.de/pid/251/3212;;;96/3115-1", "google_scholar": "gtDqiucAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=zh-CN;;;RGoypN4AAAAJ", "orcid": "0000-0003-3620-3769;0000-0002-0306-7310;;;;;", "linkedin": ";;;;;;", "or_profile": "~Haoxuan_Li6;~Chunyuan_Zheng1;~Zhichao_Jiang2;~Zhuangyan_Fang1;~Yue_Liu9;~Zhi_Geng1;~Kun_Zhang1", "aff": "Peking University;Peking University;SUN YAT-SEN UNIVERSITY;Xiaomi ;;School of mathematical Science, Peking University, Peking University;Carnegie Mellon University", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;sysu.edu.cn;xiaomi.com;;math.pku.edu.cn;cmu.edu", "position": "PhD student;PhD student;Full Professor;Researcher;;Full Professor;Associate Professor", "bibtex": "@misc{\nli2024rethinking,\ntitle={Rethinking Counterfactual Fairness: On Which Individuals to Enforce, and How?},\nauthor={Haoxuan Li and Chunyuan Zheng and Zhichao Jiang and Zhuangyan Fang and Yue Liu and Zhi Geng and Kun Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=4e0ItHjNo9}\n}", "github": "", "project": "", "reviewers": "wXfm;CWdU;R7Tu;tVGj", "site": "https://openreview.net/forum?id=4e0ItHjNo9", "pdf_size": 334128, "rating": "3;3;5;6", "confidence": "5;4;3;4", "soundness": "2;2;2;2", "contribution": "1;2;2;3", "presentation": "2;3;2;2", "wc_summary": "163;75;159;251", "wc_strengths": "111;16;31;136", "wc_weaknesses": "309;236;187;1130", "wc_questions": "143;25;92;166", "wc_review": "726;352;469;1683", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 162.0, 62.24949798994366 ], "wc_strengths_avg": [ 73.5, 51.051444641655344 ], "wc_weaknesses_avg": [ 465.5, 386.0974617891187 ], "wc_questions_avg": [ 106.5, 54.14101956926929 ], "wc_review_avg": [ 807.5, 523.2602125138122 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MN5AFgzzfGoJ:scholar.google.com/&scioq=Rethinking+Counterfactual+Fairness:+On+Which+Individuals+to+Enforce,+and+How%3F&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Peking University;Sun Yat-sen University;Xiaomi Corporation;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;http://www.sysu.edu.cn;https://www.xiaomi.com;https://www.cmu.edu", "aff_unique_abbr": "Peking U;SYSU;Xiaomi;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Peking", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Language Model Detectors Are Easily Optimized Against", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19453", "id": "4eJDMjYZZG", "author_site": "Charlotte Nicks, Eric Mitchell, Rafael Rafailov, Archit Sharma, Christopher Manning, Chelsea Finn, Stefano Ermon", "tldr": "", "abstract": "The fluency and general applicability of large language models (LLMs) has motivated significant interest in detecting whether a piece of text was written by a language model. While both academic and commercial detectors have been deployed in some settings, particularly education, other research has highlighted the fragility of these systems. In this paper, we demonstrate a data-efficient attack that fine-tunes language models to confuse existing detectors, leveraging recent developments in reinforcement learning of language models. We use the `human-ness' score (often just a log probability) of various open-source and commercial detectors as a reward function for reinforcement learning, subject to a KL-divergence constraint that the resulting model does not differ significantly from the original. For a 7B parameter Llama-2 model, fine-tuning for under a day reduces the AUROC of the OpenAI RoBERTa-Large detector from 0.84 to 0.63, while perplexity on OpenWebText increases from 8.7 to only 9.0; with a larger perplexity budget, we can drive AUROC to 0.30 (worse than random). Similar to traditional adversarial attacks, we find that this increase in 'detector evasion' generalizes to other detectors not used during training. In light of our empirical results, we advise against continued reliance on LLM-generated text detectors. Models, datasets, and selected experiment code will be released at https://github.com/charlottttee/llm-detector-evasion.", "keywords": "detector;language model;learning from preferences", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Charlotte Nicks;Eric Mitchell;Rafael Rafailov;Archit Sharma;Christopher D Manning;Chelsea Finn;Stefano Ermon", "authorids": "cnicks13@stanford.edu;~Eric_Mitchell1;~Rafael_Rafailov1;~Archit_Sharma1;~Christopher_D_Manning1;~Chelsea_Finn1;~Stefano_Ermon1", "gender": ";M;M;M;M;F;M", "homepage": ";https://ericmitchell.ai;https://rmrafailov.github.io/;;https://nlp.stanford.edu/~manning/;https://ai.stanford.edu/~cbfinn/;http://cs.stanford.edu/~ermon/", "dblp": ";238/0419;272/5358;220/3163.html;m/ChristopherDManning;131/1783;47/8135", "google_scholar": ";q77J4fgAAAAJ;TwABcRgAAAAJ;_0IIzxgAAAAJ;1zmDOdwAAAAJ;vfPE6hgAAAAJ;", "orcid": ";0000-0002-7487-1744;;;0000-0001-6155-649X;;", "linkedin": ";;;;christopher-manning-011575/;;", "or_profile": "cnicks13@stanford.edu;~Eric_Mitchell1;~Rafael_Rafailov1;~Archit_Sharma1;~Christopher_D_Manning1;~Chelsea_Finn1;~Stefano_Ermon1", "aff": ";Stanford University;Stanford University;Stanford University;Computer Science Department, Stanford University;Google;Stanford University", "aff_domain": ";stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu;google.com;stanford.edu", "position": ";PhD student;PhD student;Graduate Student;Full Professor;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nnicks2024language,\ntitle={Language Model Detectors Are Easily Optimized Against},\nauthor={Charlotte Nicks and Eric Mitchell and Rafael Rafailov and Archit Sharma and Christopher D Manning and Chelsea Finn and Stefano Ermon},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4eJDMjYZZG}\n}", "github": "", "project": "", "reviewers": "iNUy;oZ9u;bGzn", "pdf_size": 893329, "rating": "6;6;6", "confidence": "4;3;4", "soundness": "3;3;2", "contribution": "3;3;2", "presentation": "2;3;2", "wc_summary": "82;127;86", "wc_strengths": "48;87;107", "wc_weaknesses": "62;72;107", "wc_questions": "29;64;1", "wc_review": "221;350;301", "wc_reply_reviewers": "6;0;0", "wc_reply_authors": "757;993;621", "reply_reviewers": "1;0;0", "reply_authors": "1;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 98.33333333333333, 20.336065390226192 ], "wc_strengths_avg": [ 80.66666666666667, 24.499433100017278 ], "wc_weaknesses_avg": [ 80.33333333333333, 19.293061504650375 ], "wc_questions_avg": [ 31.333333333333332, 25.77250904010361 ], "wc_review_avg": [ 290.6666666666667, 53.168495266359464 ], "wc_reply_reviewers_avg": [ 2.0, 2.8284271247461903 ], "wc_reply_authors_avg": [ 790.3333333333334, 153.68654968980064 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10995996193096347572&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=4eJDMjYZZG", "pdf": "https://openreview.net/pdf?id=4eJDMjYZZG", "email": ";stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu;google.com;stanford.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;0;0;0;1;0", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4fH5ELoWFu", "title": "Towards Certified Probabilistic Robustness with High Accuracy", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Adversarial examples pose a security threat to many critical systems built on neural networks (such as face recognition systems and self-driving cars). While many methods have been proposed to build robust models, how to build certifiably robust yet accurate neural network models remains an open problem. For example, adversarial training improves empirical robustness, but they do not provide certification of the model's robustness. Conversely, certified training provides certified robustness but at the cost of a significant accuracy drop. In this work, we propose a novel approach that aims to achieve both high accuracy and certified probabilistic robustness. Our method has two parts which together achieve our goal, \\emph{i.e.}, a probabilistic robust training method with an additional goal of minimizing variance in divergence in a given vicinity and a runtime inference method for certified probabilistic robustness of the predictions. Compared to alternative methods such as randomized smoothing and certified training, our approach avoids introducing strong noise during training, is effective against a variety of perturbations, and most importantly, achieves certified probabilistic robustness without sacrificing accuracy. Our experiments on multiple models trained on different datasets demonstrate that our approach significantly outperforms existing approaches in terms of both certification rate and accuracy.", "keywords": "robustness;adversarial training;probabilistic Robustness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Ruihan Zhang;Peixin Zhang;Jun Sun", "authorids": "~Ruihan_Zhang4;~Peixin_Zhang1;~Jun_Sun12", "gender": "F;M;M", "homepage": ";http://pxzhang.cn;https://sunjun.site", "dblp": ";;", "google_scholar": ";nS25GTQAAAAJ;https://scholar.google.com.sg/citations?user=DVsEyn0AAAAJ", "orcid": ";;", "linkedin": "zhangruihan/;;", "or_profile": "~Ruihan_Zhang4;~Peixin_Zhang1;~Jun_Sun12", "aff": "Singapore Management University;Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg;smu.edu.sg", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\nzhang2024towards,\ntitle={Towards Certified Probabilistic Robustness with High Accuracy},\nauthor={Ruihan Zhang and Peixin Zhang and Jun Sun},\nyear={2024},\nurl={https://openreview.net/forum?id=4fH5ELoWFu}\n}", "github": "", "project": "", "reviewers": "d8Hi;iJ6m;Dkh4;kTWZ", "site": "https://openreview.net/forum?id=4fH5ELoWFu", "pdf_size": 869728, "rating": "3;3;5;5", "confidence": "3;3;4;5", "soundness": "1;2;2;1", "contribution": "1;2;2;3", "presentation": "2;1;2;2", "wc_summary": "44;74;18;65", "wc_strengths": "41;68;47;74", "wc_weaknesses": "192;194;974;203", "wc_questions": "484;273;105;216", "wc_review": "761;609;1144;558", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 1.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 50.25, 21.568205766822608 ], "wc_strengths_avg": [ 57.5, 13.82931668593933 ], "wc_weaknesses_avg": [ 390.75, 336.7650330720219 ], "wc_questions_avg": [ 269.5, 137.79060200173305 ], "wc_review_avg": [ 768.0, 229.56807269304676 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zxhVKVfQM8YJ:scholar.google.com/&scioq=Towards+Certified+Probabilistic+Robustness+with+High+Accuracy&hl=en&as_sdt=0,44", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "4fVuBf5HE9", "title": "Towards Analyzing Self-attention via Linear Neural Network", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-attention is a key component of the transformer architecture which has driven much of recent advances in AI. Theoretical analysis of self-attention has received significant attention and remains a work in progress. In this paper, we analyze gradient flow training of a simplified transformer model consisting of a single linear self-attention layer (thus it lacks softmax, MLP, and layer-normalization) with a single head on a histogram-like problem: the input is a sequence of characters from an alphabet and the output is the vector of counts of each letter in the input sequence. Our analysis goes via a reduction to 2-layer linear neural networks in which the input layer matrix is a diagonal matrix. We provide a complete analysis of gradient flow on these networks. Our reduction to linear neural networks involves one assumption which we empirically verify. Our analysis extends to various extensions of the histogram problem.", "keywords": "transformers;linear neural networks;gradient flow analysis", "primary_area": "learning theory", "supplementary_material": "", "author": "Pritam Chandra;Tanmay Kumar Sinha;Kabir Ahuja;Ankit Garg;Navin Goyal", "authorids": "~Pritam_Chandra1;~Tanmay_Kumar_Sinha1;~Kabir_Ahuja1;~Ankit_Garg1;~Navin_Goyal1", "gender": "M;M;M;;", "homepage": ";;https://kabirahuja2431.github.io/;https://ankit-garg-6.github.io/;", "dblp": ";;https://dblp.uni-trier.de/pid/265/5632;;20/6275", "google_scholar": ";;xQ4sUrYAAAAJ;25Ha82sAAAAJ;", "orcid": ";;;;", "linkedin": "pritamchandra;tanmay-sinha-b747171b3/;kabirahuja2431/;;", "or_profile": "~Pritam_Chandra1;~Tanmay_Kumar_Sinha1;~Kabir_Ahuja1;~Ankit_Garg1;~Navin_Goyal1", "aff": "Microsoft Research;Microsoft Research;Microsoft;Microsoft;Microsoft", "aff_domain": "research.microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Research Fellow;Research Fellow;Research Fellow;Researcher;Researcher", "bibtex": "@misc{\nchandra2024towards,\ntitle={Towards Analyzing Self-attention via Linear Neural Network},\nauthor={Pritam Chandra and Tanmay Kumar Sinha and Kabir Ahuja and Ankit Garg and Navin Goyal},\nyear={2024},\nurl={https://openreview.net/forum?id=4fVuBf5HE9}\n}", "github": "", "project": "", "reviewers": "nN7u;7ZKc;4ez9", "site": "https://openreview.net/forum?id=4fVuBf5HE9", "pdf_size": 541289, "rating": "3;5;5", "confidence": "3;4;4", "soundness": "2;3;2", "contribution": "2;2;1", "presentation": "2;3;2", "wc_summary": "113;59;59", "wc_strengths": "54;29;29", "wc_weaknesses": "225;70;198", "wc_questions": "7;64;4", "wc_review": "399;222;290", "wc_reply_reviewers": "27;0;0", "wc_reply_authors": "21;28;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 77.0, 25.45584412271571 ], "wc_strengths_avg": [ 37.333333333333336, 11.785113019775793 ], "wc_weaknesses_avg": [ 164.33333333333334, 67.60834925428138 ], "wc_questions_avg": [ 25.0, 27.60434748368452 ], "wc_review_avg": [ 303.6666666666667, 72.9032844862897 ], "wc_reply_reviewers_avg": [ 9.0, 12.727922061357855 ], "wc_reply_authors_avg": [ 16.333333333333332, 11.897712198383164 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 0.6666666666666666, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_0JlvKdQYy4J:scholar.google.com/&scioq=Towards+Analyzing+Self-attention+via+Linear+Neural+Network&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4fbFKO4a2W", "title": "Guided Sketch-Based Program Induction by Search Gradients", "track": "main", "status": "Reject", "tldr": "", "abstract": "Many tasks can be easily solved using machine learning techniques. However, some tasks cannot readily be solved using statistical models, requiring a symbolic approach instead. Program induction is one of the ways that such tasks can be solved by means of capturing an interpretable and generalizable algorithm through training. However, contemporary approaches to program induction are not sophisticated enough to readily be applied to various types of tasks as they tend to be formulated as a single, all-encompassing model, usually parameterized by neural networks. In an attempt to make program induction a viable solution for many scenarios, we propose a framework for learning parameterized programs via search gradients using evolution strategies. This formulation departs from traditional program induction as it allows for the programmer to impart task-specific code to the program 'sketch', while also enjoying the benefits of accelerated learning through end-to-end gradient-based optimization.", "keywords": "program induction;program synthesis;optimization;discrete optimization;evolution strategies", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Ahmad Ayaz Amin", "authorids": "~Ahmad_Ayaz_Amin1", "gender": "", "homepage": "https://ayaz-amin.github.io/", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "ahmad-ayaz-amin-68b320177/", "or_profile": "~Ahmad_Ayaz_Amin1", "aff": "Toronto Metropolitan University", "aff_domain": "torontomu.ca", "position": "Undergrad student", "bibtex": "@misc{\namin2024guided,\ntitle={Guided Sketch-Based Program Induction by Search Gradients},\nauthor={Ahmad Ayaz Amin},\nyear={2024},\nurl={https://openreview.net/forum?id=4fbFKO4a2W}\n}", "github": "", "project": "", "reviewers": "EcME;MY2o;y6cE;a6L3", "site": "https://openreview.net/forum?id=4fbFKO4a2W", "pdf_size": 290408, "rating": "1;3;3;3", "confidence": "5;3;4;4", "soundness": "2;2;1;3", "contribution": "1;2;2;2", "presentation": "2;2;1;2", "wc_summary": "65;34;95;27", "wc_strengths": "37;12;28;42", "wc_weaknesses": "133;113;523;112", "wc_questions": "1;2;14;38", "wc_review": "236;161;660;219", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.25, 27.040478915877213 ], "wc_strengths_avg": [ 29.75, 11.409973707244026 ], "wc_weaknesses_avg": [ 220.25, 174.99339273241148 ], "wc_questions_avg": [ 13.75, 14.905955185763842 ], "wc_review_avg": [ 319.0, 198.8303296783466 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sgOg5N870DAJ:scholar.google.com/&scioq=Guided+Sketch-Based+Program+Induction+by+Search+Gradients&hl=en&as_sdt=0,6", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "Toronto Metropolitan University", "aff_unique_dep": "", "aff_unique_url": "https://www.tmu.ca/", "aff_unique_abbr": "TMU", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "The Hedgehog & the Porcupine: Expressive Linear Attentions with Softmax Mimicry", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19452", "id": "4g02l2N2Nx", "author_site": "Michael Zhang, Kush Bhatia, Hermann Kumbong, Christopher Re", "tldr": "", "abstract": "Linear attentions have shown promise for improving Transformer efficiency, reducing attention's quadratic complexity to linear in sequence length. This holds exciting promise for (1) training linear Transformers from scratch, (2) `inetuned-conversion of task-specific Transformers into linear versions that recover task performance, and (3) pretrained-conversion of Transformers, such as language models, into linear versions readily finetunable on downstream tasks. However, linear attentions often underperform compared to standard softmax attention. To close this performance gap, we study the behaviors of softmax and linear attentions in various train-from-scratch and finetuned-conversion settings. We find prior linear attentions lack key properties of softmax attention tied to good performance: low-entropy (or spiky) weights and dot-product monotonicity. We further observe surprisingly simple feature maps that retain these properties match softmax performance, but are inefficient to compute in linear attention. We thus propose Hedgehog, a learnable linear attention that retains the spiky and monotonic properties of softmax attention while maintaining linear complexity. Hedgehog uses simple, trainable MLPs to produce attention weights mimicking softmax attention. Experiments show Hedgehog recovers over 99\\% of standard Transformer performance in train-from-scratch and finetuned-conversion settings, outperforming prior linear attentions by up to 6 perplexity points on WikiText-103 when training causal GPT models from scratch, and up to 8.7 GLUE score points when converting finetuned bidirectional BERT models. Hedgehog also enables pretrained-conversion. Converting a pretrained GPT-2 into a linear attention variant achieves state-of-the-art 16.7 perplexity on WikiText-103 for 125M subquadratic decoder models. We finally turn a pretrained Llama-2 7B into a viable linear attention Llama. With low-rank adaptation, Hedgehog-Llama-2 7B achieves 28.1 higher ROUGE-1 points over the base standard attention model, where prior linear attentions lead to 16.5 point drops.", "keywords": "linear attention;transformers", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Michael Zhang;Kush Bhatia;Hermann Kumbong;Christopher Re", "authorids": "~Michael_Zhang4;~Kush_Bhatia3;~Hermann_Kumbong1;~Christopher_Re1", "gender": "M;;M;", "homepage": "https://michaelzhang.xyz/;;https://kumbong.github.io/;", "dblp": ";;359/5994;", "google_scholar": "DG_asaIAAAAJ;;NnL2qHgAAAAJ;", "orcid": ";;;", "linkedin": ";;hermannkumbong/;", "or_profile": "~Michael_Zhang4;~Kush_Bhatia3;~Hermann_Kumbong1;~Christopher_Re1", "aff": "Stanford University;;Stanford University;", "aff_domain": "stanford.edu;;stanford.edu;", "position": "PhD student;;MS student;", "bibtex": "@inproceedings{\nzhang2024the,\ntitle={The Hedgehog \\& the Porcupine: Expressive Linear Attentions with Softmax Mimicry},\nauthor={Michael Zhang and Kush Bhatia and Hermann Kumbong and Christopher Re},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4g02l2N2Nx}\n}", "github": "", "project": "", "reviewers": "ZtxG;PsQx;y6Q1", "pdf_size": 4785029, "rating": "5;6;8", "confidence": "4;4;4", "soundness": "3;3;3", "contribution": "3;3;3", "presentation": "2;2;4", "wc_summary": "59;79;184", "wc_strengths": "43;10;154", "wc_weaknesses": "44;51;111", "wc_questions": "66;14;43", "wc_review": "212;154;492", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1077;496;549", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 107.33333333333333, 54.822947344661756 ], "wc_strengths_avg": [ 69.0, 61.59545437773797 ], "wc_weaknesses_avg": [ 68.66666666666667, 30.070288030250428 ], "wc_questions_avg": [ 41.0, 21.275964529643932 ], "wc_review_avg": [ 286.0, 147.5759691368031 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 707.3333333333334, 262.2877978269079 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15609051419806727889&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4g02l2N2Nx", "pdf": "https://openreview.net/pdf?id=4g02l2N2Nx", "email": "stanford.edu;;stanford.edu;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "4g67WoYwMV", "title": "PromptFix: Few-shot Backdoor Removal via Adversarial Prompt Tuning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Pre-trained language models (PLMs) have attracted tons of attention over the past few years with their unparalleled performances. Meanwhile, the soaring cost to train PLMs and their amazing generalizability have contributed to few-shot fine-tuning and prompting as the most popular training paradigms for natural language processing (NLP) models. However, existing studies have shown that these NLP models can be backdoored such that model behavior is manipulated when the trigger tokens are presented. In this paper, we propose PromptFix, a novel backdoor mitigation strategy for NLP models via adversarial prompt-tuning in few-shot settings. Unlike existing NLP backdoor removal methods, which rely on accurate trigger inversion and subsequent model fine-tuning, PromptFix keeps the model parameters intact and only utilizes two extra sets of soft tokens which approximate the trigger and counteract it respectively. The use of soft tokens and adversarial optimization eliminates the need to enumerate possible backdoor configurations and enables an adaptive balance between trigger finding and preservation of performance. Experiments with various backdoor attacks validate the effectiveness of the proposed method. The performances when domain shift is present further shows PromptFix's applicability to pretrained models on unknown data which is common in prompt tuning scenarios.", "keywords": "backdoor mitigation;backdoor removal;adversarial training;prompt tuning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Tianrong Zhang;Zhaohan Xi;Ting Wang;Prasenjit Mitra;Jinghui Chen", "authorids": "~Tianrong_Zhang1;~Zhaohan_Xi1;~Ting_Wang1;~Prasenjit_Mitra1;~Jinghui_Chen1", "gender": "M;M;M;M;M", "homepage": "https://zhangtianrong.github.io/profile/#en;https://zhaohan-xi.github.io;https://alps-lab.github.io/;http://www.personal.psu.edu/pum10/;https://jinghuichen.github.io/", "dblp": ";224/9296;12/2633-6.html;19/3308;67/5633", "google_scholar": ";wQgnjMIAAAAJ;cwcBTegAAAAJ;8PbgiPkAAAAJ;mKia7Y4AAAAJ", "orcid": ";;;;", "linkedin": ";;;prasenjit-mitra-962471/;", "or_profile": "~Tianrong_Zhang1;~Zhaohan_Xi1;~Ting_Wang1;~Prasenjit_Mitra1;~Jinghui_Chen1", "aff": "Pennsylvania State University;State University of New York at Binghamton;State University of New York at Stony Brook;Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;binghamton.edu;cs.stonybrook.edu;psu.edu;psu.edu", "position": "PhD student;Assistant Professor;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nzhang2024promptfix,\ntitle={PromptFix: Few-shot Backdoor Removal via Adversarial Prompt Tuning},\nauthor={Tianrong Zhang and Zhaohan Xi and Ting Wang and Prasenjit Mitra and Jinghui Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=4g67WoYwMV}\n}", "github": "", "project": "", "reviewers": "H931;7Ta8;Qy5u", "site": "https://openreview.net/forum?id=4g67WoYwMV", "pdf_size": 467125, "rating": "3;5;5", "confidence": "3;4;4", "soundness": "2;2;2", "contribution": "2;3;3", "presentation": "2;2;2", "wc_summary": "89;59;124", "wc_strengths": "142;30;94", "wc_weaknesses": "235;87;118", "wc_questions": "187;97;42", "wc_review": "653;273;378", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 90.66666666666667, 26.562295750848715 ], "wc_strengths_avg": [ 88.66666666666667, 45.87906809089401 ], "wc_weaknesses_avg": [ 146.66666666666666, 63.73033465748909 ], "wc_questions_avg": [ 108.66666666666667, 59.768070256803696 ], "wc_review_avg": [ 434.6666666666667, 160.2255354873942 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6652982913500088575&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Pennsylvania State University;State University of New York at Binghamton;State University of New York at Stony Brook", "aff_unique_dep": ";;", "aff_unique_url": "https://www.psu.edu;https://www.binghamton.edu;https://www.stonybrook.edu", "aff_unique_abbr": "PSU;SUNY Binghamton;SUNY Stony Brook", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Binghamton;Stony Brook", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diffusion-TS: Interpretable Diffusion for General Time Series Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19451", "id": "4h1apFjO99", "author_site": "Xinyu Yuan, Yan Qiao", "tldr": "", "abstract": "Denoising diffusion probabilistic models (DDPMs) are becoming the leading paradigm for generative models. It has recently shown breakthroughs in audio synthesis, time series imputation and forecasting. In this paper, we propose Diffusion-TS, a novel diffusion-based framework that generates multivariate time series samples of high quality by using an encoder-decoder transformer with disentangled temporal representations, in which the decomposition technique guides Diffusion-TS to capture the semantic meaning of time series while transformers mine detailed sequential information from the noisy model input. Different from existing diffusion-based approaches, we train the model to directly reconstruct the sample instead of the noise in each diffusion step, combining a Fourier-based loss term. Diffusion-TS is expected to generate time series satisfying both interpretablity and realness. In addition, it is shown that the proposed Diffusion-TS can be easily extended to conditional generation tasks, such as forecasting and imputation, without any model changes. This also motivates us to further explore the performance of Diffusion-TS under irregular settings. Finally, through qualitative and quantitative experiments, results show that Diffusion-TS achieves the state-of-the-art results on various realistic analyses of time series.", "keywords": "Diffusion models;Synthetic Time series;Imputation;Forecasting", "primary_area": "generative models", "supplementary_material": "/attachment/fbd19945fb6b17fb5f703c7b893e7ce83afa9b69.zip", "author": "Xinyu Yuan;Yan Qiao", "authorids": "~Xinyu_Yuan3;~Yan_Qiao1", "gender": "M;F", "homepage": "https://y-debug-sys.github.io/;http://faculty.hfut.edu.cn/qiaoyan/zh_CN/index.htm", "dblp": ";65/7820", "google_scholar": "https://scholar.google.co.in/citations?hl=en;3NPBIgcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xinyu_Yuan3;~Yan_Qiao1", "aff": "Hefei University of Technology;Hefei University of Technology", "aff_domain": "hfut.edu.cn;hfut.edu.cn", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nyuan2024diffusionts,\ntitle={Diffusion-{TS}: Interpretable Diffusion for General Time Series Generation},\nauthor={Xinyu Yuan and Yan Qiao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4h1apFjO99}\n}", "github": "", "project": "", "reviewers": "Prro;iyrP;bNeL", "pdf_size": 7259356, "rating": "5;6;8", "confidence": "4;3;4", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "2;3;3", "wc_summary": "79;72;130", "wc_strengths": "27;137;29", "wc_weaknesses": "216;135;59", "wc_questions": "124;204;206", "wc_review": "446;548;424", "wc_reply_reviewers": "263;30;0", "wc_reply_authors": "1355;965;515", "reply_reviewers": "3;1;0", "reply_authors": "4;3;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 93.66666666666667, 25.84999462712173 ], "wc_strengths_avg": [ 64.33333333333333, 51.38957957493804 ], "wc_weaknesses_avg": [ 136.66666666666666, 64.10581530216706 ], "wc_questions_avg": [ 178.0, 38.19249489974001 ], "wc_review_avg": [ 472.6666666666667, 54.020572213021055 ], "wc_reply_reviewers_avg": [ 97.66666666666667, 117.54809890234532 ], "wc_reply_authors_avg": [ 945.0, 343.2200460346103 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1266541908830140535&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4h1apFjO99", "pdf": "https://openreview.net/pdf?id=4h1apFjO99", "email": "hfut.edu.cn;hfut.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hefei University of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hfut.edu.cn/", "aff_unique_abbr": "HUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "4hqe5fgmfj", "title": "Regret Rates for $\\epsilon$-Greedy Strategies for Nonparametric Bandits with Delayed Rewards", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Incorporating delayed feedback is often crucial in applying multi-armed bandit algorithms in real-world sequential decision making problems. In this paper, we present finite time regret upper bounds for $\\epsilon$-greedy type randomized allocation strategies in a nonparametric contextual bandits framework with delayed rewards. The strategies presented differ in how the exploration rate changes as a function of delays. We consider unbounded random delays and use the Nadaraya-Watson estimator for estimating the mean reward functions. We also propose practical data-driven strategies that adaptively choose between the two proposed strategies.", "keywords": "Contextual Bandits;Delayed Rewards;Nonparametric Estimation;Nadaraya-Watson Estimator;Regret", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/c863652ce5a44f6ee404938e74b7197c34dafeb7.pdf", "author": "Sakshi Arya;Yuhong Yang", "authorids": "~Sakshi_Arya1;~Yuhong_Yang1", "gender": "F;M", "homepage": "https://sakshiarya.github.io/about/;http://users.stat.umn.edu/~yangx374/", "dblp": ";52/5811", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-7828-6569;0000-0003-3618-3083", "linkedin": "sakshi-arya-2704a3173/;", "or_profile": "~Sakshi_Arya1;~Yuhong_Yang1", "aff": "Case Western Reserve University;", "aff_domain": "case.edu;", "position": "Assistant Professor;", "bibtex": "@misc{\narya2024regret,\ntitle={Regret Rates for \\${\\textbackslash}epsilon\\$-Greedy Strategies for Nonparametric Bandits with Delayed Rewards},\nauthor={Sakshi Arya and Yuhong Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=4hqe5fgmfj}\n}", "github": "", "project": "", "reviewers": "S8Nn;Zt6w;85Kj;pTZn", "site": "https://openreview.net/forum?id=4hqe5fgmfj", "pdf_size": 822021, "rating": "3;5;5;5", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "contribution": "2;2;2;3", "presentation": "1;2;3;3", "wc_summary": "67;163;64;101", "wc_strengths": "26;57;96;59", "wc_weaknesses": "252;65;88;183", "wc_questions": "258;71;17;8", "wc_review": "603;356;265;351", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 98.75, 39.83952183447989 ], "wc_strengths_avg": [ 59.5, 24.804233509624925 ], "wc_weaknesses_avg": [ 147.0, 75.04332082204252 ], "wc_questions_avg": [ 88.5, 100.78318312099495 ], "wc_review_avg": [ 393.75, 126.10982317012422 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:a-cW-8vMgOcJ:scholar.google.com/&scioq=Regret+Rates+for+%24%5Cepsilon%24-Greedy+Strategies+for+Nonparametric+Bandits+with+Delayed+Rewards&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Case Western Reserve University", "aff_unique_dep": "", "aff_unique_url": "https://www.case.edu", "aff_unique_abbr": "CWRU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "4hrK7d60My", "title": "Memorization for Good: Encryption with Autoregressive Language Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Over-parameterized neural language models (LMs) can memorize and recite long sequences of training data. While such memorization is normally associated with undesired properties such as overfitting and information leaking, our work casts memorization as an unexplored capability of LMs. We propose the first symmetric encryption algorithm with autoregressive language models (SELM). We show that autoregressive LMs can encode arbitrary data into a compact real-valued vector (i.e., encryption) and then losslessly decode the vector to the original message (i.e., decryption) via random subspace optimization and greedy decoding. While SELM is not amenable to conventional cryptanalysis, we investigate its security through a novel empirical variant of the classic IND-CPA (indistinguishability under chosen-plaintext attack) game.", "keywords": "language models;symmetric encryption;cryptography;application", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/52276f3f7303f39867a50633ec51e106986d7550.zip", "author": "Samuel Stevens;Yu Su", "authorids": "~Samuel_Stevens1;~Yu_Su2", "gender": "M;M", "homepage": "https://samuelstevens.me;http://ysu1989.github.io", "dblp": "279/6356;38/1070-1", "google_scholar": "uR-A0LAAAAAJ;rIh5OqoAAAAJ", "orcid": "0009-0000-9493-7766;", "linkedin": ";", "or_profile": "~Samuel_Stevens1;~Yu_Su2", "aff": "Ohio State University, Columbus;Microsoft", "aff_domain": "osu.edu;microsoft.com", "position": "PhD student;Senior Researcher", "bibtex": "@misc{\nstevens2024memorization,\ntitle={Memorization for Good: Encryption with Autoregressive Language Models},\nauthor={Samuel Stevens and Yu Su},\nyear={2024},\nurl={https://openreview.net/forum?id=4hrK7d60My}\n}", "github": "", "project": "", "reviewers": "dUZZ;vHhk;wc1D;XtDw", "site": "https://openreview.net/forum?id=4hrK7d60My", "pdf_size": 1078887, "rating": "3;3;3;6", "confidence": "5;4;3;3", "soundness": "2;2;2;4", "contribution": "2;3;2;3", "presentation": "2;1;2;4", "wc_summary": "153;28;90;53", "wc_strengths": "7;23;5;85", "wc_weaknesses": "183;140;19;142", "wc_questions": "134;60;25;60", "wc_review": "477;251;139;340", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "271;162;103;240", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 81.0, 47.05847426340977 ], "wc_strengths_avg": [ 30.0, 32.51153641401772 ], "wc_weaknesses_avg": [ 121.0, 61.33922073192649 ], "wc_questions_avg": [ 69.75, 39.75157229594824 ], "wc_review_avg": [ 301.75, 123.73232196964543 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 194.0, 65.85969936159745 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11166588927408510344&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Ohio State University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.osu.edu;https://www.microsoft.com", "aff_unique_abbr": "OSU;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "4i4fgCOBDE", "title": "Networked Inequality: Preferential Attachment Bias in Graph Neural Network Link Prediction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph neural network (GNN) link prediction is increasingly deployed in citation, collaboration, and online social networks to recommend academic literature, collaborators, and friends. While prior research has investigated the dyadic fairness of GNN link prediction, the within-group fairness and ``rich get richer'' dynamics of link prediction remain underexplored. However, these aspects have significant consequences for degree and power imbalances in networks. In this paper, we shed light on how degree bias in networks affects Graph Convolutional Network (GCN) link prediction. In particular, we theoretically uncover that GCNs with a symmetric normalized graph filter have a within-group preferential attachment bias. We validate our theoretical analysis on real-world citation, collaboration, and online social networks. We further bridge GCN's preferential attachment bias with unfairness in link prediction and propose a new within-group fairness metric. This metric quantifies disparities in link prediction scores between social groups, towards combating the amplification of degree and power disparities. Finally, we propose a simple training-time strategy to alleviate within-group unfairness, and we show that it is effective on citation, online social, and credit networks.", "keywords": "graph learning;fairness;link prediction", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/55e28c0d8107ea58843db8331acc1284d2c1e5a8.zip", "author": "Arjun Subramonian;Levent Sagun;Yizhou Sun", "authorids": "~Arjun_Subramonian1;~Levent_Sagun1;~Yizhou_Sun1", "gender": "Agender;Non-Binary;F", "homepage": "http://arjunsubramonian.github.io/;http://cims.nyu.edu/~sagun/;http://web.cs.ucla.edu/~yzsun/", "dblp": "282/0168.html;155/9866;37/3868", "google_scholar": "MrdlDhoAAAAJ;-iPZaBcAAAAJ;https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ", "orcid": "0000-0002-0415-3800;0000-0001-5403-4124;", "linkedin": "arjuns22/;;", "or_profile": "~Arjun_Subramonian1;~Levent_Sagun1;~Yizhou_Sun1", "aff": "University of California, Los Angeles;Meta;University of California, Los Angeles", "aff_domain": "ucla.edu;meta.com;ucla.edu", "position": "PhD student;Research scientist;Associate Professor", "bibtex": "@misc{\nsubramonian2024networked,\ntitle={Networked Inequality: Preferential Attachment Bias in Graph Neural Network Link Prediction},\nauthor={Arjun Subramonian and Levent Sagun and Yizhou Sun},\nyear={2024},\nurl={https://openreview.net/forum?id=4i4fgCOBDE}\n}", "github": "", "project": "", "reviewers": "LXYw;bJuY;qGCf;LDLM", "site": "https://openreview.net/forum?id=4i4fgCOBDE", "pdf_size": 3213881, "rating": "5;5;6;8", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "3;4;4;3", "wc_summary": "69;200;58;226", "wc_strengths": "73;98;60;95", "wc_weaknesses": "148;112;131;215", "wc_questions": "87;301;2;2", "wc_review": "377;711;251;538", "wc_reply_reviewers": "136;43;18;6", "wc_reply_authors": "898;450;452;361", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 138.25, 75.41344376170605 ], "wc_strengths_avg": [ 81.5, 15.724185193516387 ], "wc_weaknesses_avg": [ 151.5, 38.81043674065006 ], "wc_questions_avg": [ 98.0, 122.23133804389118 ], "wc_review_avg": [ 469.25, 172.70838862081945 ], "wc_reply_reviewers_avg": [ 50.75, 50.99693618248061 ], "wc_reply_authors_avg": [ 540.25, 209.79081843588867 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8164965809277259, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15580880506040045795&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Los Angeles;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ucla.edu;https://meta.com", "aff_unique_abbr": "UCLA;Meta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4i8QiVIlAi", "title": "Semantic-Guided Consistency and Discrimination for Siamese Representation Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recently, self-supervised representation learning with Siamese structure (Siamese representation learning) has shown promising results. Current methods commonly adopt instance discrimination to learn invariant global representations at the image-level from randomly cropped views, which risks introducing object-irrelevant nuisances of background information in the image-level representations, i.e., random cropping induces nuisances of background. Further works aiming to solve the problem simply match the visual patterns across views independently, failing to look into the foreground and background regions. Intuitively, the nuisances of background could be alleviated by separating foreground and background in random crops. Therefore, we present a new self-supervised learning framework, semantic-guided consistency and discrimination (SCD) that learns to separate the foreground and background semantics in random crops while learning image-level representations. Specifically, we extract foreground and background semantics by aggregating the global feature map encoding the image content, using the learned feature-level saliency maps (indicating the foreground pixels on feature maps) as weights. Then we construct triplets from the foreground and background semantics of the two augmented views and distinguish foreground from background with triplet loss. Our SCD strategy can easily be applied to existing Siamese representation learning frameworks, including contrastive learning (e.g., MoCo-v2) and non-contrastive learning (e.g., BYOL) paradigm. By applying our SCD to both paradigms, we show that our method can achieve consistent improvements on classification and dense prediction tasks.", "keywords": "Contrastive learning;Siamese representation learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Zheng Gao;Ioannis Patras", "authorids": "~Zheng_Gao2;~Ioannis_Patras2", "gender": "M;M", "homepage": "https://zaczgao.github.io/;http://www.eecs.qmul.ac.uk/~ioannisp/", "dblp": "03/4645-3;18/1556", "google_scholar": "SXJFLpAAAAAJ;https://scholar.google.com.tw/citations?user=OBYLxRkAAAAJ", "orcid": "0000-0001-5541-8840;0000-0003-3913-4738", "linkedin": ";ioannis-patras-1053767/", "or_profile": "~Zheng_Gao2;~Ioannis_Patras2", "aff": "Queen Mary, University of London;Queen Mary, University of London", "aff_domain": "qmul.ac.uk;qmul.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@misc{\ngao2024semanticguided,\ntitle={Semantic-Guided Consistency and Discrimination for Siamese Representation Learning},\nauthor={Zheng Gao and Ioannis Patras},\nyear={2024},\nurl={https://openreview.net/forum?id=4i8QiVIlAi}\n}", "github": "", "project": "", "reviewers": "YhuM;131y;CkNt;Exdt;Ue4Q", "site": "https://openreview.net/forum?id=4i8QiVIlAi", "pdf_size": 840901, "rating": "3;3;3;5;6", "confidence": "5;5;4;4;4", "soundness": "2;2;1;3;3", "contribution": "2;1;1;2;3", "presentation": "3;2;2;3;4", "wc_summary": "38;65;55;80;47", "wc_strengths": "17;45;23;19;10", "wc_weaknesses": "84;426;329;94;145", "wc_questions": "2;1;23;129;10", "wc_review": "141;537;430;322;212", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "contribution_avg": [ 1.8, 0.7483314773547883 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 57.0, 14.546477236774544 ], "wc_strengths_avg": [ 22.8, 11.872657663724665 ], "wc_weaknesses_avg": [ 215.6, 137.27140998765913 ], "wc_questions_avg": [ 33.0, 48.641546028061235 ], "wc_review_avg": [ 328.4, 143.20977620260427 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6454972243679027, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RklV1PE58mUJ:scholar.google.com/&scioq=Semantic-Guided+Consistency+and+Discrimination+for+Siamese+Representation+Learning&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Queen Mary, University of London", "aff_unique_dep": "", "aff_unique_url": "https://www.qmul.ac.uk", "aff_unique_abbr": "QMUL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Scalable Neural Network Kernels", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19450", "id": "4iPw1klFWa", "author_site": "Arijit Sehanobish, Krzysztof Choromanski, YUNFAN ZHAO, Kumar Dubey, Valerii Likhosherstov", "tldr": "", "abstract": "We introduce the concept of scalable neural network kernels (SNNKs), the replacements of regular feedforward layers (FFLs), capable of approximating the latter, but with favorable computational properties. SNNKs effectively disentangle the inputs from the parameters of the neural network in the FFL, only to connect them in the final computation via the dot-product kernel. \nThey are also strictly more expressive, as allowing to model complicated relationships beyond the functions of the dot-products of parameter-input vectors. We also introduce the neural network bundling process that applies SNNKs to compactify deep neural network architectures, resulting in additional compression gains. In its extreme version, it leads to the fully bundled network whose optimal parameters can be expressed via explicit formulae for several loss functions (e.g. mean squared error), opening a possibility to bypass backpropagation. As a by-product of our analysis, we introduce the mechanism of the universal random features (or URFs), applied to instantiate several SNNK variants, and interesting on its own in the context of scalable kernel methods. We provide rigorous theoretical analysis of all these concepts as well as an extensive empirical evaluation, ranging from point-wise kernel estimation to Transformers' fine-tuning with novel adapter layers inspired by SNNKs. Our mechanism provides up to 5x reduction in the number of trainable parameters, while maintaining competitive accuracy.", "keywords": "scalable kernel methods;random features;deep neural networks", "primary_area": "metric learning, kernel learning, and sparse coding", "supplementary_material": "", "author": "Arijit Sehanobish;Krzysztof Marcin Choromanski;YUNFAN ZHAO;Kumar Avinava Dubey;Valerii Likhosherstov", "authorids": "~Arijit_Sehanobish1;~Krzysztof_Marcin_Choromanski1;~YUNFAN_ZHAO1;~Kumar_Avinava_Dubey1;~Valerii_Likhosherstov2", "gender": "M;;M;;M", "homepage": "https://github.com/arijitthegame/;;https://yzhao3685.github.io/;https://valerytyumen.github.io/;https://sites.google.com/site/kumaravinavadubey/", "dblp": "249/5322;78/11411;304/4614;232/4391.html;10/7789", "google_scholar": "MEby6-QAAAAJ;;wy_rqnwAAAAJ;iiVVfxUAAAAJ;tBbUAfsAAAAJ", "orcid": "0000-0003-2769-2003;;;;", "linkedin": "arijit-sehanobish-b76627112/;;;;", "or_profile": "~Arijit_Sehanobish1;~Krzysztof_Marcin_Choromanski1;~YUNFAN_ZHAO1;~Valerii_Likhosherstov2;~Kumar_A_Dubey1", "aff": "Kensho Technologies;Google Brain Robotics & Columbia University;Harvard University;Waymo;Google Research", "aff_domain": "kensho.com;columbia.edu;g.harvard.edu;waymo.com;google.com", "position": "Applied Scientist;research scientist & adjunct assistant professor;Postdoc;Researcher;Research Scientist", "bibtex": "@inproceedings{\nsehanobish2024scalable,\ntitle={Scalable Neural Network Kernels},\nauthor={Arijit Sehanobish and Krzysztof Marcin Choromanski and YUNFAN ZHAO and Kumar Avinava Dubey and Valerii Likhosherstov},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4iPw1klFWa}\n}", "github": "", "project": "", "reviewers": "hP8B;YEib;uu7e;zChr", "pdf_size": 2818029, "rating": "5;5;8;8", "confidence": "4;3;3;3", "soundness": "3;2;4;4", "contribution": "3;2;4;4", "presentation": "3;3;3;3", "wc_summary": "33;219;99;221", "wc_strengths": "12;175;112;255", "wc_weaknesses": "28;197;126;104", "wc_questions": "131;10;21;179", "wc_review": "204;601;358;759", "wc_reply_reviewers": "49;27;0;0", "wc_reply_authors": "851;1599;816;1375", "reply_reviewers": "1;1;0;0", "reply_authors": "2;5;2;3", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 143.0, 80.46117075956576 ], "wc_strengths_avg": [ 138.5, 88.89460051094217 ], "wc_weaknesses_avg": [ 113.75, 60.26763227471277 ], "wc_questions_avg": [ 85.25, 71.890107108002 ], "wc_review_avg": [ 480.5, 214.2084265382667 ], "wc_reply_reviewers_avg": [ 19.0, 20.530465167647808 ], "wc_reply_authors_avg": [ 1160.25, 336.438237273946 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14725734791680925783&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=4iPw1klFWa", "pdf": "https://openreview.net/pdf?id=4iPw1klFWa", "email": "kensho.com;columbia.edu;g.harvard.edu;waymo.com;google.com", "author_num": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Kensho Technologies;Google;Harvard University;Waymo", "aff_unique_dep": ";Google Brain Robotics;;", "aff_unique_url": "https://www.kensho.com;https://ai.google;https://www.harvard.edu;https://www.waymo.com", "aff_unique_abbr": ";Google;Harvard;Waymo", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4iQuByhNie", "title": "ContextNER: Contextual Phrase Generation at Scale", "track": "main", "status": "Reject", "tldr": "", "abstract": "Named Entity Recognition (NER) has seen significant progress in recent years, with numerous state-of-the-art (SOTA) models achieving high performance. \nHowever, very few studies have focused on the generation of entities' context.\nIn this paper, we introduce ContextNER, a task that aims to generate the relevant context for entities in a sentence, where the context is a phrase describing the entity but not necessarily present in the sentence. \nTo facilitate research in this task, we also present the EDGAR10-Q dataset, which consists of annual and quarterly reports from the top 1500 publicly traded companies. \nThe dataset is the largest of its kind, containing 1M sentences, 2.8M entities, and an average of 35 tokens per sentence, making it a challenging dataset. \nWe propose a baseline approach that combines a phrase generation algorithm with inferencing using a 220M language model, achieving a ROUGE-L score of 27% on the test split.\nAdditionally, we perform a one-shot inference with ChatGPT, which obtains a 30% ROUGE-L, highlighting the difficulty of the dataset. \nWe also evaluate models such as T5 and BART, which achieve a maximum ROUGE-L of 49% after supervised finetuning on EDGAR10-Q. \nWe also find that T5-large, when pre-finetuned on EDGAR10-Q, achieve SOTA results on downstream finance tasks such as Headline, FPB, and FiQA SA, outperforming vanilla version by 10.81 points.\nTo our surprise, this 66x smaller pre-finetuned model also surpasses the finance-specific LLM BloombergGPT-50B by 15 points. \nWe hope that our dataset and generated artifacts will encourage further research in this direction, leading to the development of more sophisticated language models for financial text analysis", "keywords": "Novel task;Relevant context of entities;NER", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/d5e1e45eb895a73a8684f5d8ac14bc25d41f4eb5.zip", "author": "Himanshu Gupta;Shreyas Verma;Santosh Mashetty;Swaroop Mishra", "authorids": "~Himanshu_Gupta5;~Shreyas_Verma1;~Santosh_Mashetty1;~Swaroop_Mishra1", "gender": "M;M;M;M", "homepage": "https://him1411.github.io;;;https://swarooprm.github.io/", "dblp": ";;;249/2784", "google_scholar": "ydjuhxsAAAAJ;j6dIihMAAAAJ;eQN-aNAAAAAJ;-7LK2SwAAAAJ", "orcid": ";;;", "linkedin": "himanshugupta14/;shreyas-verma/;santoshmashetty/;", "or_profile": "~Himanshu_Gupta5;~Shreyas_Verma1;~Santosh_Mashetty1;~Swaroop_Mishra1", "aff": "Amazon;Simplr AI;Arizona State University;Google", "aff_domain": "amazon.com;simplr.ai;asu.edu;google.com", "position": "Researcher;Researcher;PhD student;Researcher", "bibtex": "@misc{\ngupta2024contextner,\ntitle={Context{NER}: Contextual Phrase Generation at Scale},\nauthor={Himanshu Gupta and Shreyas Verma and Santosh Mashetty and Swaroop Mishra},\nyear={2024},\nurl={https://openreview.net/forum?id=4iQuByhNie}\n}", "github": "", "project": "", "reviewers": "btb9;uqH6;kwQd", "site": "https://openreview.net/forum?id=4iQuByhNie", "pdf_size": 648322, "rating": "3;3;6", "confidence": "2;3;4", "soundness": "2;2;3", "contribution": "2;2;2", "presentation": "2;3;3", "wc_summary": "117;67;88", "wc_strengths": "13;17;177", "wc_weaknesses": "279;172;214", "wc_questions": "36;31;2", "wc_review": "445;287;481", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "530;754;451", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 90.66666666666667, 20.499322482029065 ], "wc_strengths_avg": [ 69.0, 76.38498979948002 ], "wc_weaknesses_avg": [ 221.66666666666666, 44.01767321832852 ], "wc_questions_avg": [ 23.0, 14.98888477061141 ], "wc_review_avg": [ 404.3333333333333, 84.25886039791634 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 578.3333333333334, 128.3337662330361 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10881753058535838737&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Amazon;Simplr AI;Arizona State University;Google", "aff_unique_dep": "Amazon.com, Inc.;;;Google", "aff_unique_url": "https://www.amazon.com;https://www.simplr.ai;https://www.asu.edu;https://www.google.com", "aff_unique_abbr": "Amazon;Simplr AI;ASU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "4j4reehkjZ", "title": "JEN-1: Text-Guided Universal Music Generation with Omnidirectional Diffusion Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Music generation has attracted growing interest with the advancement of deep generative models. \nHowever, generating music conditioned on textual descriptions, known as text-to-music, remains challenging due to the complexity of musical structures and high sampling rate requirements.\nDespite the task's significance, prevailing generative models exhibit limitations in music quality, computational efficiency, and generalization ability. \nThis paper introduces JEN-1, a universal high-fidelity model for text-to-music generation. \nJEN-1 is a diffusion model incorporating both autoregressive and non-autoregressive training in an end-to-end manner, enabling up to 48kHz high-fidelity stereo music generation. \nThrough multi-task in-context learning, JEN-1 performs various generation tasks including text-guided music generation, music inpainting, and continuation. \nEvaluations demonstrate JEN-1's superior performance over state-of-the-art methods in text-music alignment and music quality while maintaining computational efficiency.\nOur anonymous demo pages are available at https://anonymous.4open.science/w/Jen1-Demo-Page-21D4", "keywords": "multimodal learning;text-to-music;music generation;non-autoregressive", "primary_area": "generative models", "supplementary_material": "", "author": "Peike Li;Boyu Chen;Yao Yao;Yikai Wang;Allen Wang;Alex Wang", "authorids": "~Peike_Li1;~Boyu_Chen3;~Yao_Yao5;~Yikai_Wang2;~Allen_Wang1;~Alex_Wang3", "gender": ";;M;M;M;M", "homepage": ";;https://yaoyao1995.github.io/;https://yikaiw.github.io/;;", "dblp": "251/5626;;07/4410-6;85/9555-1;;", "google_scholar": "dOzTcvwAAAAJ;o5wjqPEAAAAJ;hNO0NdEAAAAJ;MnW5aegAAAAJ;;", "orcid": ";;0000-0001-9887-4301;;;", "linkedin": "peikeli/;boyu-chen-356004236/;yao-yao-6119661b5/;;allen-wang-js/;dr-alex-wang-5a734b44", "or_profile": "~Peike_Li1;~Boyu_Chen3;~Yao_Yao5;~Yikai_Wang2;~Allen_Wang1;~Alex_Wang3", "aff": "Futureverse AI;futureverse;Tsinghua University;Tsinghua University;;Futureverse", "aff_domain": "futureverse.com;futureverse.com;tsinghua.edu.cn;tsinghua.edu.cn;;futureverse.com", "position": "Principal Researcher;Researcher;PhD student;Postdoc;;Principal Researcher", "bibtex": "@misc{\nli2024jen,\ntitle={{JEN}-1: Text-Guided Universal Music Generation with Omnidirectional Diffusion Models},\nauthor={Peike Li and Boyu Chen and Yao Yao and Yikai Wang and Allen Wang and Alex Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=4j4reehkjZ}\n}", "github": "", "project": "", "reviewers": "Yv1M;TQzw;i8hT;seXG", "site": "https://openreview.net/forum?id=4j4reehkjZ", "pdf_size": 399984, "rating": "3;3;3;6", "confidence": "4;5;4;3", "soundness": "2;2;2;2", "contribution": "2;1;2;3", "presentation": "2;1;2;3", "wc_summary": "69;43;186;94", "wc_strengths": "64;54;71;37", "wc_weaknesses": "251;148;635;92", "wc_questions": "67;38;477;13", "wc_review": "451;283;1369;236", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.0, 53.91196527673611 ], "wc_strengths_avg": [ 56.5, 12.776932339180638 ], "wc_weaknesses_avg": [ 281.5, 211.91094827780844 ], "wc_questions_avg": [ 148.75, 190.47621242559399 ], "wc_review_avg": [ 584.75, 459.7871110633703 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16426015309601482257&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;2;1", "aff_unique_norm": "Futureverse AI;Futureverse;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": ";;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Futureverse AI;;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;2", "aff_country_unique": "United States;;China" }, { "id": "4jBL79L5QS", "title": "Beyond Shortest-Paths: A Benchmark for Reinforcement Learning on Traffic Engineering", "track": "main", "status": "Reject", "tldr": "", "abstract": "Selecting efficient routes for data packets is an essential task in computer networking. Given the dynamic of today\u2019s network traffic, the optimal route varies greatly with the current network state. Despite the wealth of existing techniques, Traffic Engineering in networks with changing conditions is still a largely unsolved problem. Recent work aims at replacing Traffic Engineering heuristics with Reinforcement Learning, but does not provide a reference framework for training and evaluating under realistic network conditions in a reproducible manner. We fill this gap by casting distributed Traffic Engineering as a Swarm Markov Decision Process, and introducing a training and evaluation framework powered by a faithful network simulation engine that implements it. We show the effectiveness and versatility of our framework on a variety of scenarios, including ones where the agents outperform popular shortest-path routing algorithms.", "keywords": "Traffic Engineering;Routing Optimization;Multi-Agent Reinforcement Learning;Benchmark;Framework;Computer Networks", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Andreas Boltres;Niklas Freymuth;Patrick Jahnke;Gerhard Neumann", "authorids": "~Andreas_Boltres1;~Niklas_Freymuth1;~Patrick_Jahnke1;~Gerhard_Neumann2", "gender": ";M;M;M", "homepage": "https://boltr.es;;https://www.linkedin.com/in/patrick-jahnke-dr-ing-332007103/;https://alr.anthropomatik.kit.edu/", "dblp": ";255/7209;;60/4878", "google_scholar": "gltZtVsAAAAJ;FK1DbrcAAAAJ;LswjmU8AAAAJ;https://scholar.google.com.tw/citations?user=GL360kMAAAAJ", "orcid": ";;;", "linkedin": "andreas-boltres-81a989215;;;", "or_profile": "~Andreas_Boltres1;~Niklas_Freymuth1;~Patrick_Jahnke1;~Gerhard_Neumann1", "aff": "Karlsruher Institut f\u00fcr Technologie;Amazon;;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;amazon.com;;kit.edu", "position": "PhD student;Intern;;Full Professor", "bibtex": "@misc{\nboltres2024beyond,\ntitle={Beyond Shortest-Paths: A Benchmark for Reinforcement Learning on Traffic Engineering},\nauthor={Andreas Boltres and Niklas Freymuth and Patrick Jahnke and Gerhard Neumann},\nyear={2024},\nurl={https://openreview.net/forum?id=4jBL79L5QS}\n}", "github": "", "project": "", "reviewers": "XHPP;3jBT;HzR2;Jksb;dSxa", "site": "https://openreview.net/forum?id=4jBL79L5QS", "pdf_size": 5085972, "rating": "3;3;3;3;6", "confidence": "4;4;5;4;3", "soundness": "2;3;2;2;3", "contribution": "1;2;2;2;3", "presentation": "2;2;3;2;3", "wc_summary": "79;53;131;110;74", "wc_strengths": "79;15;133;43;89", "wc_weaknesses": "308;65;543;84;54", "wc_questions": "343;50;174;55;1", "wc_review": "809;183;981;292;218", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "895;642;1084;678;127", "reply_reviewers": "0;0;0;0;0", "reply_authors": "2;1;2;1;1", "rating_avg": [ 3.6, 1.2 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 89.4, 27.659356463952665 ], "wc_strengths_avg": [ 71.8, 40.37028610252843 ], "wc_weaknesses_avg": [ 210.8, 190.64459079659198 ], "wc_questions_avg": [ 124.6, 123.14641691904804 ], "wc_review_avg": [ 496.6, 331.6809310165418 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 685.2, 321.4227123275516 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7905694150420948, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_jPggHSMK20J:scholar.google.com/&scioq=Beyond+Shortest-Paths:+A+Benchmark+for+Reinforcement+Learning+on+Traffic+Engineering&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie;Amazon;Karlsruhe Institute of Technology", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.kit.edu;https://www.amazon.com;https://www.kit.edu", "aff_unique_abbr": "KIT;Amazon;KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States" }, { "id": "4kJfWZChJI", "title": "Generalization or Specificity? Spectral Meta Estimation and Ensemble (SMEE) with Domain-specific Experts", "track": "main", "status": "Reject", "tldr": "", "abstract": "Existing domain generalization (DG) methodologies strive to construct a unified model trained on diverse source domains, with the goal of achieving robust performance on any unseen test domain. However, in practice, not all source domains contribute equally to effective knowledge transfer for a specific test domain. Consequently, the reliability of single-model generalization often falls short of classic empirical risk minimization (ERM). This paper departs from the conventional approaches and advocates for a paradigm that prioritizes specificity over broad generalization. We propose the Spectral Meta Estimation and Ensemble (SMEE) approach, which capitalizes on domain-specific expert models and leverages unsupervised ensemble learning to construct a weighted ensemble for test samples. Our comprehensive investigation reveals three key insights: (1) The proposed meta performance estimation strategy for model selection within the sources plays a pivotal role in accommodating stochasticity; (2) The proposed spectral unsupervised ensemble method for transferability estimation excels in constructing robust learners for multi-class classification tasks, while being entirely hyperparameter-free; and (3) Multi-expert test-time transferability estimation and ensemble proves to be a promising alternative to the prevailing single-model DG paradigm. Experiments conducted on the DomainBed benchmark substantiate the superiority of our approach, consistently surpassing state-of-the-art DG techniques. Importantly, our approach offers a noteworthy performance enhancement while maintaining remarkable computational efficiency, executing in mere milliseconds per test sample during inference.", "keywords": "Domain Generalization;Ensemble Learning;Spectral Analysis;Test-time Adaptation;Transfer Learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/2b4a23f22275f78a699f340afea98e7f9431b220.zip", "author": "Siyang Li;Ziwei Wang;Dongrui Wu", "authorids": "~Siyang_Li5;~Ziwei_Wang5;~Dongrui_Wu1", "gender": "M;F;M", "homepage": ";https://scholar.google.com/citations?user=fjlXqvQAAAAJ&hl=en;https://sites.google.com/site/drwuhust/home", "dblp": ";;", "google_scholar": "5GFZxIkAAAAJ;fjlXqvQAAAAJ;UYGzCPEAAAAJ", "orcid": ";0000-0003-1324-2298;0000-0002-7153-9703", "linkedin": ";;", "or_profile": "~Siyang_Li5;~Ziwei_Wang5;~Dongrui_Wu1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nli2024generalization,\ntitle={Generalization or Specificity? Spectral Meta Estimation and Ensemble ({SMEE}) with Domain-specific Experts},\nauthor={Siyang Li and Ziwei Wang and Dongrui Wu},\nyear={2024},\nurl={https://openreview.net/forum?id=4kJfWZChJI}\n}", "github": "", "project": "", "reviewers": "52us;X4Bt;ExFD;6gJi", "site": "https://openreview.net/forum?id=4kJfWZChJI", "pdf_size": 2213079, "rating": "1;5;6;8", "confidence": "5;3;4;4", "soundness": "2;3;3;4", "contribution": "1;2;3;4", "presentation": "3;3;3;4", "wc_summary": "104;53;81;76", "wc_strengths": "33;41;187;79", "wc_weaknesses": "281;62;194;52", "wc_questions": "164;2;126;4", "wc_review": "582;158;588;211", "wc_reply_reviewers": "1386;0;0;0", "wc_reply_authors": "3201;1186;1251;885", "reply_reviewers": "3;0;0;0", "reply_authors": "5;2;2;2", "rating_avg": [ 5.0, 2.5495097567963922 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.5, 18.117670931993437 ], "wc_strengths_avg": [ 85.0, 61.40032573203501 ], "wc_weaknesses_avg": [ 147.25, 95.41324593577141 ], "wc_questions_avg": [ 74.0, 72.26340706055866 ], "wc_review_avg": [ 384.75, 201.13599255230278 ], "wc_reply_reviewers_avg": [ 346.5, 600.1556048226159 ], "wc_reply_authors_avg": [ 1630.75, 917.0388146092836 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5547001962252291, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rjgNhRqq1t4J:scholar.google.com/&scioq=Generalization+or+Specificity%3F+Spectral+Meta+Estimation+and+Ensemble+(SMEE)+with+Domain-specific+Experts&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Pessimistic Nonlinear Least-Squares Value Iteration for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19449", "id": "4kLVvIh8cp", "author_site": "Qiwei Di, Heyang Zhao, Jiafan He, Quanquan Gu", "tldr": "", "abstract": "Offline reinforcement learning (RL), where the agent aims to learn the optimal policy based on the data collected by a behavior policy, has attracted increasing attention in recent years. While offline RL with linear function approximation has been extensively studied with optimal results achieved under certain assumptions, many works shift their interest to offline RL with non-linear function approximation.\nHowever, limited works on offline RL with non-linear function approximation have instance-dependent regret guarantees.\n In this paper, we propose an oracle-efficient algorithm, dubbed Pessimistic Nonlinear Least-Square Value Iteration (PNLSVI), for offline RL with non-linear function approximation. Our algorithmic design comprises three innovative components: (1) a variance-based weighted regression scheme that can be applied to a wide range of function classes, (2) a subroutine for variance estimation, and (3) a planning phase that utilizes a pessimistic value iteration approach. Our algorithm enjoys a regret bound that has a tight dependency on the function class complexity and achieves minimax optimal instance-dependent regret when specialized to linear function approximation. Our work extends the previous instance-dependent results within simpler function classes, such as linear and differentiable function to a more general framework. To the best of our knowledge, this is the first statistically optimal algorithm for nonlinear offline RL.", "keywords": "Offline reinforcement learning;instance-dependent;least-squares value iteration", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Qiwei Di;Heyang Zhao;Jiafan He;Quanquan Gu", "authorids": "~Qiwei_Di1;~Heyang_Zhao1;~Jiafan_He1;~Quanquan_Gu1", "gender": "M;M;M;M", "homepage": "https://qiwei-di1234.github.io/;https://web.cs.ucla.edu/~hyzhao/;https://sites.google.com/g.ucla.edu/jiafan-he-homepage;http://web.cs.ucla.edu/~qgu/", "dblp": "354/3878;;214/5785;50/4597", "google_scholar": "SewL0pkAAAAJ;zHQ1ap0AAAAJ;F3AXNBwAAAAJ;GU9HgNAAAAAJ", "orcid": ";;;", "linkedin": "qiwei-di-00776a253/;;;", "or_profile": "~Qiwei_Di1;~Heyang_Zhao1;~Jiafan_He1;~Quanquan_Gu1", "aff": "University of California, Los Angeles;Computer Science Department, University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;cs.ucla.edu;ucla.edu;cs.ucla.edu", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ndi2024pessimistic,\ntitle={Pessimistic Nonlinear Least-Squares Value Iteration for Offline Reinforcement Learning},\nauthor={Qiwei Di and Heyang Zhao and Jiafan He and Quanquan Gu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4kLVvIh8cp}\n}", "github": "", "project": "", "reviewers": "38or;Fsc5;KfKk;FxDp", "pdf_size": 522660, "rating": "5;6;6;8", "confidence": "3;3;4;3", "soundness": "2;3;3;4", "contribution": "2;3;2;3", "presentation": "3;4;2;3", "wc_summary": "41;51;98;174", "wc_strengths": "51;38;102;57", "wc_weaknesses": "125;81;108;144", "wc_questions": "63;55;5;55", "wc_review": "280;225;313;430", "wc_reply_reviewers": "0;9;0;0", "wc_reply_authors": "594;580;446;419", "reply_reviewers": "0;1;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.0, 52.53094326204318 ], "wc_strengths_avg": [ 62.0, 24.093567606313517 ], "wc_weaknesses_avg": [ 114.5, 23.157072353818823 ], "wc_questions_avg": [ 44.5, 23.038012067016545 ], "wc_review_avg": [ 312.0, 75.0299940023988 ], "wc_reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 509.75, 77.99479149276571 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13706149271000326202&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=4kLVvIh8cp", "pdf": "https://openreview.net/pdf?id=4kLVvIh8cp", "email": "ucla.edu;cs.ucla.edu;ucla.edu;cs.ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "4l4Gfc1B6E", "title": "Offline Robustness of Distributional Actor-Critic Ensemble Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Offline reinforcement learning (RL) focuses on learning policies using static datasets without further exploration. With the introduction of distributional reinforcement learning into offline RL, current methods excel at quantifying the risk and ensuring the security of learned policies. However, these algorithms can not effectively balance the distribution shift and robustness, and even a minor perturbation in observations can significantly impair policy performance. In this paper, we propose Offline Robustness of Distributional actor-critic Ensemble Reinforcement Learning (ORDER) to improve the robustness of policies. In ORDER, we introduce two approaches to enhance the robustness: i) introduce the smoothing technique to policies and distribution functions for states near the dataset; ii) strengthen the quantile network. In addition to improving the robustness, we also theoretically prove that ORDER converges to a conservative lower bound, which can alleviate the distribution shift. In our experiments, we validate the effectiveness of ORDER in the D4RL benchmark through comparative experiments and ablation studies.", "keywords": "Offline reinforcement learning;Distributional reinforcement learning;Robustness", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Zhongcui Ma;Jianxiang Zhu;Dandan Lai;Zhiyuan Xu;Jian Tang;Yangchun Zhang;Yan Peng;Yaxin Peng", "authorids": "~Zhongcui_Ma1;~Jianxiang_Zhu1;~Dandan_Lai1;~Zhiyuan_Xu1;~Jian_Tang5;~Yangchun_Zhang1;~Yan_Peng3;~Yaxin_Peng1", "gender": ";M;;M;M;;F;F", "homepage": "https://www.mdmlab-shu.com/author/zhongcui-ma/;;https://www.mdmlab-shu.com/author/dandan-lai/;https://xuzhiyuan1528.github.io/;https://ecs.syr.edu/faculty/tang;;;https://www.mdmlab-shu.com/author/yaxin-peng/", "dblp": ";;;;181/2667-8;https://dblp.uni-trier.de/pid/324/8512;00/2434-1;20/7643.html", "google_scholar": ";;https://scholar.google.com.hk/citations?user=JoM-YngAAAAJ;jKHMVnYAAAAJ;;https://scholar.google.com.hk/citations?user=CjY56LgAAAAJ;boOwRJUAAAAJ;https://scholar.google.com.hk/citations?user=4cRt3XoAAAAJ", "orcid": "0009-0005-9495-5936;0009-0003-2855-5710;0009-0002-1881-4064;0000-0003-2879-3244;;0000-0002-0540-9070;0000-0003-1312-9527;0000-0002-2983-555X", "linkedin": ";;;zhiyuan-xu-19a66191;;;;", "or_profile": "~Zhongcui_Ma1;~Jianxiang_Zhu1;~Dandan_Lai1;~Zhiyuan_Xu1;~Jian_Tang5;~Yangchun_Zhang1;~Yan_Peng3;~Yaxin_Peng1", "aff": "Shanghai University;Shanghai University;Shanghai University;Midea;x-humanoid;Shanghai University;Shanghai University;Shanghai University", "aff_domain": "shu.edu.cn;shu.edu.cn;shu.edu.cn;midea.com;x-humanoid.com;shu.edu.cn;shu.edu.cn;shu.edu.cn", "position": "MS student;MS student;PhD student;Researcher;Researcher;Lecturer;dean;Full Professor", "bibtex": "@misc{\nma2024offline,\ntitle={Offline Robustness of Distributional Actor-Critic Ensemble Reinforcement Learning},\nauthor={Zhongcui Ma and Jianxiang Zhu and Dandan Lai and Zhiyuan Xu and Jian Tang and Yangchun Zhang and Yan Peng and Yaxin Peng},\nyear={2024},\nurl={https://openreview.net/forum?id=4l4Gfc1B6E}\n}", "github": "", "project": "", "reviewers": "kZn3;fvaB;1eAG;cjFe", "site": "https://openreview.net/forum?id=4l4Gfc1B6E", "pdf_size": 1374788, "rating": "3;3;3;5", "confidence": "3;4;3;3", "soundness": "2;2;3;2", "contribution": "2;2;2;2", "presentation": "2;3;2;2", "wc_summary": "39;66;86;105", "wc_strengths": "43;30;39;65", "wc_weaknesses": "272;201;170;72", "wc_questions": "91;4;60;51", "wc_review": "445;301;355;293", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "169;772;292;211", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.0, 24.464259645450134 ], "wc_strengths_avg": [ 44.25, 12.871965661856 ], "wc_weaknesses_avg": [ 178.75, 71.87271735505762 ], "wc_questions_avg": [ 51.5, 31.18092365533773 ], "wc_review_avg": [ 348.5, 60.60321773635456 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 361.0, 241.3741908324086 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:N3DyagCY-bIJ:scholar.google.com/&scioq=Offline+Robustness+of+Distributional+Actor-Critic+Ensemble+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;0;0;0", "aff_unique_norm": "Shanghai University;Midea Group;x-humanoid", "aff_unique_dep": ";;", "aff_unique_url": "https://www.shu.edu.cn;https://www.midea.com;", "aff_unique_abbr": "SHU;Midea;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "4lOWCkhr4g", "title": "Unsupervised ASR via Cross-Lingual Pseudo-Labeling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent work has shown that it is possible to train an *unsupervised* automatic speech recognition (ASR) system using only unpaired audio and text. Existing unsupervised ASR methods assume that no labeled data can be used for training. \nWe argue that even if one does not have any labeled audio for a given language, there is *always* labeled data available for other languages. We show that it is possible to use character-level acoustic models (AMs) from other languages to bootstrap an *unsupervised* AM in a new language. Here, ``unsupervised'' means no labeled audio is available for the *target* language. Our approach is based on two key ingredients: (i) generating pseudo-labels (PLs) of the *target* language using some *other* language AM and (ii) constraining these PLs with a *target language model*. Our approach is effective on Common Voice: e.g. transfer of English AM to Swahili achieves 18\\% WER. It also outperforms character-based wav2vec-U 2.0 by 15\\% absolute WER on LJSpeech with 800h of labeled German data instead of 60k hours of unlabeled English data.", "keywords": "ASR;pseudo-labeling;self-training;unsupervised learning;multilingual", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Tatiana Likhomanenko;Loren Lugosch;Ronan Collobert", "authorids": "~Tatiana_Likhomanenko1;~Loren_Lugosch1;~Ronan_Collobert1", "gender": "F;M;M", "homepage": "https://github.com/tlikhomanenko/tlikhomanenko;http://lorenlugosch.com;http://ronan.collobert.com", "dblp": "202/2094;;03/4032", "google_scholar": "https://scholar.google.ru/citations?user=x7Z3ysQAAAAJ;https://scholar.google.ca/citations?user=xTvZgeoAAAAJ;32w7x1cAAAAJ", "orcid": "0000-0003-0351-9839;;", "linkedin": ";;", "or_profile": "~Tatiana_Likhomanenko1;~Loren_Lugosch1;~Ronan_Collobert1", "aff": "Apple;Apple;Apple", "aff_domain": "apple.com;apple.com;apple.com", "position": "Research Scientist;Researcher;Research Scientist", "bibtex": "@misc{\nlikhomanenko2024unsupervised,\ntitle={Unsupervised {ASR} via Cross-Lingual Pseudo-Labeling},\nauthor={Tatiana Likhomanenko and Loren Lugosch and Ronan Collobert},\nyear={2024},\nurl={https://openreview.net/forum?id=4lOWCkhr4g}\n}", "github": "", "project": "", "reviewers": "26fF;GYQT;mAuZ;UAMi", "site": "https://openreview.net/forum?id=4lOWCkhr4g", "pdf_size": 606406, "rating": "3;6;6;6", "confidence": "5;4;5;4", "soundness": "3;3;3;3", "contribution": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "130;65;80;110", "wc_strengths": "39;58;43;67", "wc_weaknesses": "151;117;84;100", "wc_questions": "305;67;108;63", "wc_review": "625;307;315;340", "wc_reply_reviewers": "0;47;0;0", "wc_reply_authors": "1022;738;535;454", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;1;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 96.25, 25.341418665891617 ], "wc_strengths_avg": [ 51.75, 11.299889379989523 ], "wc_weaknesses_avg": [ 113.0, 24.849547279578356 ], "wc_questions_avg": [ 135.75, 99.29092355296127 ], "wc_review_avg": [ 396.75, 132.3411783988642 ], "wc_reply_reviewers_avg": [ 11.75, 20.351596988934308 ], "wc_reply_authors_avg": [ 687.25, 219.21379404590397 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5082035048589856528&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4lqA5EuieJ", "title": "Prediction Tasks in Graphs: a Framework to Control the Interpretability-Performance Trade-off", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have emerged as state-of-the-art methods for solving graph-level tasks in diverse domains, such as social network analysis and bioinformatics. However, their complex nature results in a lack of human-interpretable predictions, which can hinder their practical impact. Here, we aim at improving GNN interpretability by targeting \\emph{sparsity} during GNN training - i.e, by minimizing the size (and/or number) of subgraphs used to make predictions. Existing solutions in the literature suffer from two main limitations: i) they still rely on information about the entire graph; and/or ii) they do not allow practitioners to directly control the trade-off between predictive performance and sparsity. To address the above limitations, in this paper, we formulate GNN training as a bi-level optimization task, where the trade-off between interpretability and performance can be controlled by a hyperparameter. Our framework relies on reinforcement learning to iteratively maximize predictive performance and sparsity by removing edges or nodes from the input graph. Our empirical results on nine different graph classification datasets show that our method competes in performance with baselines that use information from the whole graph, while relying on significantly sparser subgraphs, leading to more interpretable GNN-based predictions.", "keywords": "Graph Neural Networks; Reinforcement Learning; Graph-level tasks; Interpretability", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/afda2ebb66f1110447b93c944294dc98681a4b55.zip", "author": "Pablo Sanchez Martin;Kinaan Aamir Khan;Isabel Valera", "authorids": "~Pablo_Sanchez_Martin1;~Kinaan_Aamir_Khan1;~Isabel_Valera1", "gender": "M;M;F", "homepage": "https://www.is.mpg.de/person/psanchez;;https://ivaleram.github.io/", "dblp": ";;126/1768.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.es/citations?user=cpdQqpsAAAAJ", "orcid": ";;", "linkedin": ";kinaanaamir/;", "or_profile": "~Pablo_Sanchez_Martin1;~Kinaan_Aamir_Khan1;~Isabel_Valera1", "aff": "Max-Planck Institute;Universit\u00e4t des Saarlandes;Universit\u00e4t des Saarlandes", "aff_domain": "mpg.tuebingen.de;uni-saarland.de;uni-saarland.de", "position": "PhD student;MS student;Full Professor", "bibtex": "@misc{\nmartin2024prediction,\ntitle={Prediction Tasks in Graphs: a Framework to Control the Interpretability-Performance Trade-off},\nauthor={Pablo Sanchez Martin and Kinaan Aamir Khan and Isabel Valera},\nyear={2024},\nurl={https://openreview.net/forum?id=4lqA5EuieJ}\n}", "github": "", "project": "", "reviewers": "CKsQ;Zge8;X2ML;CD1o", "site": "https://openreview.net/forum?id=4lqA5EuieJ", "pdf_size": 1371205, "rating": "3;5;5;6", "confidence": "3;4;4;2", "soundness": "3;3;3;3", "contribution": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "33;100;58;62", "wc_strengths": "39;51;77;52", "wc_weaknesses": "377;138;197;164", "wc_questions": "3;5;37;42", "wc_review": "452;294;369;320", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.25, 23.951774464536026 ], "wc_strengths_avg": [ 54.75, 13.827056809024834 ], "wc_weaknesses_avg": [ 219.0, 93.58685805175853 ], "wc_questions_avg": [ 21.75, 17.851820635442202 ], "wc_review_avg": [ 358.75, 60.197072187939504 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.20751433915982243, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oooGi4-WIp4J:scholar.google.com/&scioq=Prediction+Tasks+in+Graphs:+a+Framework+to+Control+the+Interpretability-Performance+Trade-off&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;Universit\u00e4t des Saarlandes", "aff_unique_dep": ";", "aff_unique_url": "https://www.mpg.de;https://www.uni-saarland.de", "aff_unique_abbr": "MPG;UDS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "4lqo5Jwfnq", "title": "Class-Incremental Learning with Parameter-Efficient Cross-Task Prompts", "track": "main", "status": "Reject", "tldr": "", "abstract": "Class-Incremental Learning (CIL) aims to learn deep models on sequential tasks continually, where each new task includes a batch of new classes and deep models do not have access to task-ID information at the inference time. Recent vast pre-trained models (PTMs) have achieved outstanding performance by prompt technique in practical CIL without the old samples (rehearsal-free) and with a memory constraint (memory-constrained): Prompt-extending and Prompt-fixed methods. However, prompt-extending methods need a large memory buffer to maintain an ever-expanding prompt pool and meet an extra challenging prompt selection problem. Prompt-fixed methods only learn a fixed number of prompts on one of the incremental tasks and can not handle all the incremental tasks effectively. To achieve a good balance between the memory cost and the performance on all the tasks, we propose a Parameter-Efficient Cross-Task Prompt (PECTP) framework with a prompt retention module (PRM). To make the final learned prompts effective on the whole incremental tasks, PRM constrains the evolution of cross-task prompts' parameters from Outer Prompt Granularity and Inner Prompt Granularity. Extensive experiments show the effectiveness of our method.", "keywords": "Class-Incremental Learning; Pre-Trained Model; Prompt Learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/bba64007710d77a6ceac489214c46e9ae4f36546.zip", "author": "qian feng;Hanbin Zhao;Chao Zhang;Hui Qian", "authorids": "~qian_feng1;~Hanbin_Zhao1;~Chao_Zhang19;~Hui_Qian1", "gender": ";M;M;M", "homepage": "https://github.com/RAIAN08;;;", "dblp": ";222/7871;94/3019-29;66/5293", "google_scholar": ";F2kiw10AAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~qian_feng1;~Hanbin_Zhao1;~Chao_Zhang19;~Hui_Qian1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nfeng2024classincremental,\ntitle={Class-Incremental Learning with Parameter-Efficient Cross-Task Prompts},\nauthor={qian feng and Hanbin Zhao and Chao Zhang and Hui Qian},\nyear={2024},\nurl={https://openreview.net/forum?id=4lqo5Jwfnq}\n}", "github": "", "project": "", "reviewers": "2DHh;9kbr;vcUE", "site": "https://openreview.net/forum?id=4lqo5Jwfnq", "pdf_size": 769382, "rating": "3;5;6", "confidence": "5;3;5", "soundness": "2;2;3", "contribution": "2;3;3", "presentation": "2;3;2", "wc_summary": "41;65;42", "wc_strengths": "16;35;114", "wc_weaknesses": "159;276;207", "wc_questions": "3;30;7", "wc_review": "219;406;370", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1913;2158;1372", "reply_reviewers": "0;0;0", "reply_authors": "4;5;3", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 49.333333333333336, 11.08552609887726 ], "wc_strengths_avg": [ 55.0, 42.43426288586461 ], "wc_weaknesses_avg": [ 214.0, 48.02082881417188 ], "wc_questions_avg": [ 13.333333333333334, 11.897712198383164 ], "wc_review_avg": [ 331.6666666666667, 81.01165896887244 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1814.3333333333333, 328.3801997008278 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ACthZbY0vNkJ:scholar.google.com/&scioq=Class-Incremental+Learning+with+Parameter-Efficient+Cross-Task+Prompts&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "4nyTlyTtfX", "title": "Heterogeneous Decision Making towards Mixed Autonomy: When Uncertainty-aware Planning Meets Bounded Rationality", "track": "main", "status": "Reject", "tldr": "", "abstract": "The past few years have witnessed a rapid growth of the deployment of automated vehicles (AVs). Clearly,\nAVs and human-driven vehicles (HVs) will co-exist for many years to come, and AVs will have to operate around HVs, pedestrians, cyclists, and more, calling for fundamental breakthroughs in AI designed for mixed traffic to achieve mixed autonomy. Thus motivated, we study heterogeneous decision making by AVs and HVs in a mixed traffic environment, aiming to capture the interactions between human and machine decision-making and develop an AI foundation that enables vehicles to operate safely and efficiently. There are a number of challenges to achieve mixed autonomy, including 1) humans drivers make driving decisions with bounded rationality, and it remains open to develop accurate models for HVs' decision making; and 2) uncertainty-aware planning plays a critical role for AVs to take safety maneuvers in response to the human behavior. In this paper, we introduce a formulation of AV-HV interaction, where the HV makes decisions with bounded rationality and the AV employs uncertainty-aware planning based on the prediction on HV's future actions. We conduct a comprehensive analysis on AV and HV's learning regret to answer the questions: 1) \\\"How does the overall learning performance depend on HV's bounded rationality and Av's planning?\"; 2) \"How do different decision making strategies impact the overall learning performance?\" Our findings reveal some intriguing phenomena, such as Goodhart's Law in AV's learning performance and compounding effects in HV's decision making process. By examining the dynamics of the regrets, we gain insights into the interplay between human and machine decision making in mixed autonomy.", "keywords": "Mixed Autonomy;Reinforcement Learning;Bounded Rationality;Regret Analysis", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Hang Wang;Qiaoyi Fang;Junshan Zhang", "authorids": "~Hang_Wang2;~Qiaoyi_Fang1;~Junshan_Zhang1", "gender": "M;;M", "homepage": "https://ustcmike.github.io/;https://github.com/fangqyi;https://faculty.engineering.ucdavis.edu/jzhang/", "dblp": ";;59/1232.html", "google_scholar": "Xdb3u_q3RKwC;;UtAdFs8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hang_Wang2;~Qiaoyi_Fang1;~Junshan_Zhang1", "aff": "University of California, Davis;University of California, Davis;University of California, Davis", "aff_domain": "ucdavis.edu;ucdavis.edu;ucdavis.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nwang2024heterogeneous,\ntitle={Heterogeneous Decision Making towards Mixed Autonomy: When Uncertainty-aware Planning Meets Bounded Rationality},\nauthor={Hang Wang and Qiaoyi Fang and Junshan Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=4nyTlyTtfX}\n}", "github": "", "project": "", "reviewers": "Sgxf;dAQk;BMkw;DRuU", "site": "https://openreview.net/forum?id=4nyTlyTtfX", "pdf_size": 611295, "rating": "3;3;5;6", "confidence": "2;3;2;2", "soundness": "1;1;3;3", "contribution": "2;1;3;2", "presentation": "2;2;2;4", "wc_summary": "87;108;82;95", "wc_strengths": "46;195;35;111", "wc_weaknesses": "117;624;102;114", "wc_questions": "183;86;10;16", "wc_review": "433;1013;229;336", "wc_reply_reviewers": "0;1071;50;0", "wc_reply_authors": "1357;3118;369;998", "reply_reviewers": "0;4;1;0", "reply_authors": "3;8;2;4", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 1.0 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 93.0, 9.82344135219425 ], "wc_strengths_avg": [ 96.75, 63.72744699107285 ], "wc_weaknesses_avg": [ 239.25, 222.20640742336843 ], "wc_questions_avg": [ 73.75, 69.79389299931621 ], "wc_review_avg": [ 502.75, 303.30049043811323 ], "wc_reply_reviewers_avg": [ 280.25, 456.99582875558065 ], "wc_reply_authors_avg": [ 1460.5, 1020.2079444897496 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 4.25, 2.277608394786075 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-f77RWILAWcJ:scholar.google.com/&scioq=Heterogeneous+Decision+Making+towards+Mixed+Autonomy:+When+Uncertainty-aware+Planning+Meets+Bounded+Rationality&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Davis", "aff_unique_dep": "", "aff_unique_url": "https://www.ucdavis.edu", "aff_unique_abbr": "UC Davis", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Davis", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DREAM: Dual Structured Exploration with Mixup for Open-set Graph Domain Adaption", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19448", "id": "4olqbTBt1Y", "author_site": "Nan Yin, Mengzhu Wang, Zhenghan Chen, Li Shen, Huan Xiong, Bin Gu, Xiao Luo", "tldr": "", "abstract": "Recently, numerous graph neural network methods have been developed to tackle domain shifts in graph data. However, these methods presuppose that unlabeled target graphs belong to categories previously seen in the source domain. This assumption could not hold true for in-the-wild target graphs. In this paper, we delve deeper to explore a more realistic problem open-set graph domain adaptation. Our objective is to not only identify target graphs from new categories but also accurately classify remaining target graphs into their respective categories under domain shift and label scarcity. To solve this challenging problem, we introduce a new method named Dual Structured Exploration with Mixup (DREAM). DREAM incorporates a graph-level representation learning branch as well as a subgraph-enhanced branch, which jointly explores graph topological structures from both global and local viewpoints. To maximize the use of unlabeled target graphs, we train these two branches simultaneously using posterior regularization to enhance their inter-module consistency. To accommodate the open-set setting, we amalgamate dissimilar samples to generate virtual unknown samples belonging to novel classes. Moreover, to alleviate domain shift, we establish a k nearest neighbor-based graph-of-graphs and blend multiple neighbors of each sample to produce cross-domain virtual samples for inter-domain consistency learning. Extensive experiments validate the effectiveness of the proposed DREAM in comparison to various state-of-the-art approaches in different settings.", "keywords": "Open-set Recognization;Graph Classification;Domain Adaptation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Nan Yin;Mengzhu Wang;Zhenghan Chen;Li Shen;Huan Xiong;Bin Gu;Xiao Luo", "authorids": "~Nan_Yin4;~Mengzhu_Wang3;~Zhenghan_Chen3;~Li_Shen1;~Huan_Xiong1;~Bin_Gu1;~Xiao_Luo3", "gender": "M;;;M;M;M;M", "homepage": ";;;https://sites.google.com/site/mathshenli/home;https://scholar.google.com/citations?user=l4hm14MAAAAJ&hl=en;https://mbzuai.ac.ae/study/faculty/bin-gu/;http://luoxiao12.github.io", "dblp": "135/8983;;;91/3680-8;;29/1758-1;50/1585-1", "google_scholar": "https://scholar.google.com.hk/citations?user=NoOK0pIAAAAJ;;;yVhgENIAAAAJ;l4hm14MAAAAJ;Vo8OgCgAAAAJ;https://scholar.google.com.hk/citations?", "orcid": ";;;;;0000-0001-6049-1815;", "linkedin": "yin-nan-b32943173;;;;;;%E9%9C%84-%E7%BD%97-303548214/", "or_profile": "~Nan_Yin4;~Mengzhu_Wang3;~Zhenghan_Chen3;~Li_Shen1;~Huan_Xiong1;~Bin_Gu1;~Xiao_Luo3", "aff": "Mohamed bin Zayed University of Artificial Intelligence;;;JD Explore Academy;Harbin Institute of Technology;Mohamed bin Zayed University of Artificial Intelligence;University of California, Los Angeles", "aff_domain": "mbzuai.ac.ae;;;jd.com;hit.edu.cn;mbzuai.ac.ae;cs.ucla.edu", "position": "Postdoc;;;Researcher;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nyin2024dream,\ntitle={{DREAM}: Dual Structured Exploration with Mixup for Open-set Graph Domain Adaption},\nauthor={Nan Yin and Mengzhu Wang and Zhenghan Chen and Li Shen and Huan Xiong and Bin Gu and Xiao Luo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4olqbTBt1Y}\n}", "github": "", "project": "", "reviewers": "qwA4;Pqqy;ysQV;9UFe;SVBJ", "pdf_size": 5099568, "rating": "3;5;8;8;8", "confidence": "4;3;4;4;4", "soundness": "2;2;4;3;3", "contribution": "2;2;4;3;3", "presentation": "2;2;4;3;3", "wc_summary": "78;58;204;91;34", "wc_strengths": "45;20;396;167;27", "wc_weaknesses": "231;114;75;43;35", "wc_questions": "2;5;32;2;2", "wc_review": "356;197;707;303;98", "wc_reply_reviewers": "97;0;81;31;0", "wc_reply_authors": "1758;738;530;430;187", "reply_reviewers": "1;0;2;1;0", "reply_authors": "5;3;3;3;1", "rating_avg": [ 6.4, 2.0591260281974 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "contribution_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 93.0, 58.74691481260952 ], "wc_strengths_avg": [ 131.0, 142.86637113050782 ], "wc_weaknesses_avg": [ 99.6, 71.3515241603149 ], "wc_questions_avg": [ 8.6, 11.757550765359253 ], "wc_review_avg": [ 332.2, 207.37540837813918 ], "wc_reply_reviewers_avg": [ 41.8, 40.48407094154441 ], "wc_reply_authors_avg": [ 728.6, 544.3688455450036 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 3.0, 1.2649110640673518 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.33995005182504257, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4463789579044625548&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=4olqbTBt1Y", "pdf": "https://openreview.net/pdf?id=4olqbTBt1Y", "email": "mbzuai.ac.ae;;;jd.com;hit.edu.cn;mbzuai.ac.ae;cs.ucla.edu", "author_num": 7, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;JD;Harbin Institute of Technology;University of California, Los Angeles", "aff_unique_dep": ";JD Explore Academy;;", "aff_unique_url": "https://mbzuai.ac.ae;;http://www.hit.edu.cn/;https://www.ucla.edu", "aff_unique_abbr": "MBZUAI;;HIT;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Harbin;Los Angeles", "aff_country_unique_index": "0;2;0;3", "aff_country_unique": "United Arab Emirates;;China;United States" }, { "id": "4pW8NL1UwH", "title": "LIRE: Listwise Reward Enhancement for Preference Alignment", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, tremendous strides have been made in the domain of Natural Language Generation (NLG) due to the vast advances in Large Language Models (LLMs). However, often trained on large-scale unsupervised data, LLMs can generate toxic or unhelpful content for lack of human supervision. Leveraging reinforcement learning with human feedback (RLHF) turns out a good remedy for this problem and has been prevalent among researchers. However, RLHF is notoriously unstable and hyperparameter-sensitive, which hinders an all-compassing and sustainable LLM system. For the above reason, we propose a new approach: LIRE, which stands for Listwise Reward Enhancement for Preference Alignment, to optimize rewards through a listwise paradigm. We directly incorporate the rewards of multiple candidates into the listwise loss and optimize against it in a compact and effective framework, without explicit modeling of the Bradley-Terry model. Furthermore, we propose a self-enhancement algorithm to progressively optimize the reward through iterative training. Our work also entails extensive experiments to demonstrate the stability and consistency of the model performance without heavy hyperparameter tuning, while still surpassing the state-of-the-art methods in preference alignment tasks.", "keywords": "LLM;RLHF;Preference alignment", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Mingye Zhu", "authorids": "~Mingye_Zhu1", "gender": "", "homepage": "https://github.com/stevie1023", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Mingye_Zhu1", "aff": "University of Science and Technology of China", "aff_domain": "ustc.edu.cn", "position": "PhD student", "bibtex": "@misc{\nzhu2024lire,\ntitle={{LIRE}: Listwise Reward Enhancement for Preference Alignment},\nauthor={Mingye Zhu},\nyear={2024},\nurl={https://openreview.net/forum?id=4pW8NL1UwH}\n}", "github": "", "project": "", "reviewers": "XkkK;6BAr;Cmfo;Z7nt;Vfn5", "site": "https://openreview.net/forum?id=4pW8NL1UwH", "pdf_size": 942745, "rating": "5;5;5;5;6", "confidence": "3;3;4;3;3", "soundness": "2;2;2;2;3", "contribution": "2;2;2;3;2", "presentation": "3;2;2;2;3", "wc_summary": "280;70;88;186;127", "wc_strengths": "18;102;97;84;71", "wc_weaknesses": "161;165;248;479;94", "wc_questions": "24;23;34;182;1", "wc_review": "483;360;467;931;293", "wc_reply_reviewers": "69;0;0;0;0", "wc_reply_authors": "981;619;848;1166;620", "reply_reviewers": "1;0;0;0;0", "reply_authors": "2;1;2;2;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 150.2, 76.11675242678184 ], "wc_strengths_avg": [ 74.4, 30.19006459085505 ], "wc_weaknesses_avg": [ 229.4, 134.01880465069073 ], "wc_questions_avg": [ 52.8, 65.49320575449028 ], "wc_review_avg": [ 506.8, 223.31896471191155 ], "wc_reply_reviewers_avg": [ 13.8, 27.6 ], "wc_reply_authors_avg": [ 846.8, 211.29637952411773 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14469823784875334267&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "4pnhzuRtJ2", "title": "Optimized Tradeoffs for Private Majority Ensembling", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study the problem of computing an $(m\\epsilon, \\delta)$-differentially private majority of $K$ $(\\epsilon, \\Delta)$-differentially private algorithms for $m < K$ and $\\delta \\geq \\Delta \\geq 0$. Standard methods, such as subsampling or randomized response, are widely used but do they provide optimal privacy-utility tradeoffs? Surprisingly, we show that an $(m\\epsilon, \\delta)$-private majority algorithm with maximal utility can be computed tractably for any $m < K$. Specifically, we introduce Data-dependent Randomized Response Majority (DaRRM), a general privacy framework characterized by a data-dependent noise function $\\gamma$ that allows for efficient utility optimization over the class of all private algorithms subject to privacy constraints. By deriving a structural understanding of DaRRM, our novel learning approach is made tractable by critically reducing infinitely many privacy constraints into a polynomial set. Theoretically, we show DaRRM enjoys a privacy gain of a factor of 2 over common baselines under i.i.d. teachers and $\\delta = 0$. Lastly, we demonstrate the empirical effectiveness of our first-of-its-kind privacy-constrained utility optimization for ensembling labels and gradients from private teachers through applications of private semi-supervised knowledge transfer and private distributed Sign-SGD, highlighting the outstanding performance of our DaRRM framework with an optimized $\\gamma$ against several baselines.", "keywords": "differential privacy;ensemble learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/e3b4b35f51ebae896e00c19c8c75bc25a60f2a7d.pdf", "author": "Shuli Jiang;Qiuyi Zhang;Gauri Joshi", "authorids": "~Shuli_Jiang1;~Qiuyi_Zhang1;~Gauri_Joshi1", "gender": "F;M;", "homepage": "https://www.andrew.cmu.edu/user/shulij/;https://qiuyiz.github.io;", "dblp": "224/6441;133/8559;", "google_scholar": "D0IvR7MAAAAJ;mE11hO8AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shuli_Jiang1;~Qiuyi_Zhang1;~Gauri_Joshi1", "aff": "Carnegie Mellon University;Google;", "aff_domain": "cmu.edu;google.com;", "position": "PhD student;Researcher;", "bibtex": "@misc{\njiang2024optimized,\ntitle={Optimized Tradeoffs for Private Majority Ensembling},\nauthor={Shuli Jiang and Qiuyi Zhang and Gauri Joshi},\nyear={2024},\nurl={https://openreview.net/forum?id=4pnhzuRtJ2}\n}", "github": "", "project": "", "reviewers": "pyVN;3EZ4;SRCJ;DeQA", "site": "https://openreview.net/forum?id=4pnhzuRtJ2", "pdf_size": 1980631, "rating": "3;3;6;8", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "contribution": "3;3;2;3", "presentation": "2;2;3;3", "wc_summary": "89;14;67;53", "wc_strengths": "63;64;49;68", "wc_weaknesses": "230;608;35;46", "wc_questions": "57;1;23;31", "wc_review": "439;687;174;198", "wc_reply_reviewers": "224;91;17;6", "wc_reply_authors": "849;684;342;60", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 55.75, 27.307279249313726 ], "wc_strengths_avg": [ 61.0, 7.176350047203662 ], "wc_weaknesses_avg": [ 229.75, 231.71358937274266 ], "wc_questions_avg": [ 28.0, 20.024984394500787 ], "wc_review_avg": [ 374.5, 208.06789757192243 ], "wc_reply_reviewers_avg": [ 84.5, 86.92094109016537 ], "wc_reply_authors_avg": [ 483.75, 305.43606123049716 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JiKpLYVEx0oJ:scholar.google.com/&scioq=Optimized+Tradeoffs+for+Private+Majority+Ensembling&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "4qFIkOhq24", "title": "Fundamental Limitation of Alignment in Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "An important aspect in developing language models that interact with humans is\naligning their behavior to be useful and unharmful for their human users. This is\nusually achieved by tuning the model in a way that enhances desired behaviors\nand inhibits undesired ones, a process referred to as alignment. In this paper, we\npropose a theoretical approach called Behavior Expectation Bounds (BEB) which\nallows us to formally investigate several inherent characteristics and limitations of\nalignment in large language models. Importantly, we prove that within the limits\nof this framework, for any behavior that has a finite probability of being exhibited\nby the model, there exist prompts that can trigger the model into outputting this\nbehavior, with probability that increases with the length of the prompt. This implies\nthat any alignment process that attenuates an undesired behavior but does not\nremove it altogether, is not safe against adversarial prompting attacks. Furthermore,\nour framework hints at the mechanism by which leading alignment approaches\nsuch as reinforcement learning from human feedback make the LLM prone to\nbeing prompted into the undesired behaviors. This theoretical result is being\nexperimentally demonstrated in large scale by the so called contemporary \u201cchatGPT\njailbreaks\", where adversarial users trick the LLM into breaking its alignment\nguardrails by triggering it into acting as a malicious persona. Our results expose\nfundamental limitations in alignment of LLMs and bring to the forefront the need\nto devise reliable mechanisms for ensuring AI safety.", "keywords": "Large language models;Alignment;Deep learning", "primary_area": "learning theory", "supplementary_material": "/attachment/a847fed0b934f5857cb4fdfe250696a0f7403216.zip", "author": "Yotam Wolf;Noam Wies;Oshri Avnery;Yoav Levine;Amnon Shashua", "authorids": "~Yotam_Wolf1;~Noam_Wies1;~Oshri_Avnery1;~Yoav_Levine1;~Amnon_Shashua1", "gender": "M;M;M;M;M", "homepage": ";;;;http://www.cs.huji.ac.il/~shashua/", "dblp": ";236/6106;;199/1895;47/1492", "google_scholar": "jlPKaIIAAAAJ;https://scholar.google.co.il/citations?user=FxlR8voAAAAJ;;;https://scholar.google.com.tw/citations?user=dwi5wvYAAAAJ", "orcid": ";0000-0002-1337-2298;;;", "linkedin": ";noam-wies-a5ab1663/;oshri-avnery/;;", "or_profile": "~Yotam_Wolf1;~Noam_Wies1;~Oshri_Avnery1;~Yoav_Levine1;~Amnon_Shashua1", "aff": "Hebrew University of Jerusalem;Hebrew University of Jerusalem;Hebrew University of Jerusalem;;Hebrew University, Hebrew University of Jerusalem", "aff_domain": "huji.ac.il;huji.ac.il;huji.ac.il;;cs.huji.ac.il", "position": "PhD student;PhD student;MS student;;Professor", "bibtex": "@misc{\nwolf2024fundamental,\ntitle={Fundamental Limitation of Alignment in Large Language Models},\nauthor={Yotam Wolf and Noam Wies and Oshri Avnery and Yoav Levine and Amnon Shashua},\nyear={2024},\nurl={https://openreview.net/forum?id=4qFIkOhq24}\n}", "github": "", "project": "", "reviewers": "Cy5r;edXZ;5KAT;e9bS", "site": "https://openreview.net/forum?id=4qFIkOhq24", "pdf_size": 1637993, "rating": "6;6;6;8", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "contribution": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "126;64;88;158", "wc_strengths": "59;69;89;124", "wc_weaknesses": "69;340;56;187", "wc_questions": "397;143;74;29", "wc_review": "651;616;307;498", "wc_reply_reviewers": "16;383;0;23", "wc_reply_authors": "754;1314;306;563", "reply_reviewers": "1;2;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 109.0, 35.90264614203248 ], "wc_strengths_avg": [ 85.25, 24.843258642939738 ], "wc_weaknesses_avg": [ 163.0, 114.22565386111826 ], "wc_questions_avg": [ 160.75, 142.31369400026128 ], "wc_review_avg": [ 518.0, 134.36331344530024 ], "wc_reply_reviewers_avg": [ 105.5, 160.43144953530776 ], "wc_reply_authors_avg": [ 734.25, 370.548495476638 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 208, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10671962345198545886&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Jerusalem;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Israel" }, { "title": "Learning Delays in Spiking Neural Networks using Dilated Convolutions with Learnable Spacings", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19447", "id": "4r2ybzJnmN", "author_site": "Ilyass Hammouamri, Ismail Khalfaoui Hassani, Timoth\u00e9e Masquelier", "tldr": "", "abstract": "Spiking Neural Networks (SNNs) are a promising research direction for building power-efficient information processing systems, especially for temporal tasks such as speech recognition. In SNNs, delays refer to the time needed for one spike to travel from one neuron to another. These delays matter because they influence the spike arrival times, and it is well-known that spiking neurons respond more strongly to coincident input spikes. More formally, it has been shown theoretically that plastic delays greatly increase the expressivity in SNNs. Yet, efficient algorithms to learn these delays have been lacking. Here, we propose a new discrete-time algorithm that addresses this issue in deep feedforward SNNs using backpropagation, in an offline manner. To simulate delays between consecutive layers, we use 1D convolutions across time. The kernels contain only a few non-zero weights \u2013 one per synapse \u2013 whose positions correspond to the delays. These positions are learned together with the weights using the recently proposed Dilated Convolution with Learnable Spacings (DCLS). We evaluated our method on three datasets: the Spiking Heidelberg Dataset (SHD), the Spiking Speech Commands (SSC) and its non spiking version Google Speech Commands v0.02 (GSC) benchmarks, which require detecting temporal patterns. We used feedforward SNNs with two or three hidden fully connected layers, and vanilla leaky integrate-and-fire neurons. We showed that fixed random delays help and that learning them helps even more. Furthermore, our method outperformed the state-of-the-art in the three datasets without using recurrent connections and with substantially fewer parameters. Our work demonstrates the potential of delay learning in developing accurate and precise models for temporal data processing. Our code is based on PyTorch / SpikingJelly and available at: https://github.com/Thvnvtos/SNN-delays", "keywords": "Spiking Neural Networks;Delays;Neuromorphic Computing;Speech Recognition", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Ilyass Hammouamri;Ismail Khalfaoui-Hassani;Timoth\u00e9e Masquelier", "authorids": "~Ilyass_Hammouamri1;~Ismail_Khalfaoui-Hassani1;~Timoth\u00e9e_Masquelier1", "gender": "M;;M", "homepage": ";;https://ismakh.com", "dblp": ";07/7226;308/6076", "google_scholar": "https://scholar.google.com/citations?hl=en;fkzUZ-oAAAAJ;sFQlNhIAAAAJ", "orcid": ";0000-0001-8629-9506;0009-0007-8858-4037", "linkedin": "hammouamri-ilyass/;;ismail-khalfaoui-hassani/", "or_profile": "~Ilyass_Hammouamri1;~Timoth\u00e9e_Masquelier1;~Ismail_Khalfaoui_Hassani1", "aff": "Universit\u00e9 de Toulouse;CNRS;University of Toulouse", "aff_domain": "univ-toulouse.fr;cnrs.fr;univ-tlse3.fr", "position": "PhD student;Principal Researcher;PhD student", "bibtex": "@inproceedings{\nhammouamri2024learning,\ntitle={Learning Delays in Spiking Neural Networks using Dilated Convolutions with Learnable Spacings},\nauthor={Ilyass Hammouamri and Ismail Khalfaoui-Hassani and Timoth{\\'e}e Masquelier},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4r2ybzJnmN}\n}", "github": "", "project": "", "reviewers": "7RCo;Wbur;ybas;sQsE", "pdf_size": 603710, "rating": "6;6;8;8", "confidence": "3;4;5;4", "soundness": "3;2;3;3", "contribution": "2;2;4;2", "presentation": "3;3;3;3", "wc_summary": "104;63;118;65", "wc_strengths": "56;51;64;51", "wc_weaknesses": "145;307;120;847", "wc_questions": "5;66;236;57", "wc_review": "310;487;538;1020", "wc_reply_reviewers": "0;68;0;498", "wc_reply_authors": "244;707;664;701", "reply_reviewers": "0;1;0;3", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.5, 24.026027553467927 ], "wc_strengths_avg": [ 55.5, 5.315072906367325 ], "wc_weaknesses_avg": [ 354.75, 293.1265724904516 ], "wc_questions_avg": [ 91.0, 86.89361311396829 ], "wc_review_avg": [ 588.75, 262.9670844421408 ], "wc_reply_reviewers_avg": [ 141.5, 207.6890704875921 ], "wc_reply_authors_avg": [ 579.0, 194.1120810253705 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18230794924674691098&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=4r2ybzJnmN", "pdf": "https://openreview.net/pdf?id=4r2ybzJnmN", "email": "univ-toulouse.fr;cnrs.fr;univ-tlse3.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Universit\u00e9 de Toulouse;Centre National de la Recherche Scientifique;University of Toulouse", "aff_unique_dep": ";;", "aff_unique_url": "https://www.univ-toulouse.fr;https://www.cnrs.fr;https://www.univ-toulouse.fr", "aff_unique_abbr": "UT;CNRS;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Learning 3D Particle-based Simulators from RGB-D Videos", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19446", "id": "4rBEgZCubP", "author_site": "William Whitney, Tatiana Lopez-Guevara, Tobias Pfaff, Yulia Rubanova, Thomas Kipf, Kimberly Stachenfeld, Kelsey Allen", "tldr": "", "abstract": "Realistic simulation is critical for applications ranging from robotics to animation. Traditional analytic simulators sometimes struggle to capture sufficiently realistic simulation which can lead to problems including the well known \"sim-to-real\" gap in robotics. Learned simulators have emerged as an alternative for better capturing real-world physical dynamics, but require access to privileged ground truth physics information such as precise object geometry or particle tracks. Here we propose a method for learning simulators directly from observations. Visual Particle Dynamics (VPD) jointly learns a latent particle-based representation of 3D scenes, a neural simulator of the latent particle dynamics, and a renderer that can produce images of the scene from arbitrary views. VPD learns end to end from posed RGB-D videos and does not require access to privileged information. Unlike existing 2D video prediction models, we show that VPD's 3D structure enables scene editing and long-term predictions. These results pave the way for downstream applications ranging from video editing to robotic planning.", "keywords": "simulation;dynamics;nerf;particle dynamics", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "William F Whitney;Tatiana Lopez-Guevara;Tobias Pfaff;Yulia Rubanova;Thomas Kipf;Kim Stachenfeld;Kelsey R Allen", "authorids": "~William_F_Whitney1;~Tatiana_Lopez-Guevara1;~Tobias_Pfaff1;~Yulia_Rubanova2;~Thomas_Kipf2;~Kim_Stachenfeld1;~Kelsey_R_Allen1", "gender": ";F;M;F;F;M;F", "homepage": "http://willwhitney.com;;http://tobiaspfaff.com;https://neurokim.com/;;http://tkipf.github.io/;https://yuliarubanova.github.io/", "dblp": "160/8671;208/0996;67/7591;155/1888;153/9528;186/8206;222/3085", "google_scholar": "aQcYWDMAAAAJ;Op4nexcAAAAJ;3oUgDKQAAAAJ;jNtH2WUAAAAJ;kpcjFekAAAAJ;83HL5FwAAAAJ;u_HzE9wAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;thomas-kipf-6b260410a;https://linkedin.com/in/yulia-rubanova-031702100", "or_profile": "~William_F_Whitney1;~Tatiana_Lopez-Guevara1;~Tobias_Pfaff1;~Kim_Stachenfeld1;~Kelsey_R_Allen1;~Thomas_N._Kipf1;~Yulia_Rubanova1", "aff": "Google DeepMind;Google;Deepmind;Google DeepMind;Google;Google;Google DeepMind", "aff_domain": "deepmind.com;google.com;google.com;deepmind.com;deepmind.com;google.com;deepmind.com", "position": "Researcher;Researcher;Research scientist;Research Scientist;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nwhitney2024learning,\ntitle={Learning 3D Particle-based Simulators from {RGB}-D Videos},\nauthor={William F Whitney and Tatiana Lopez-Guevara and Tobias Pfaff and Yulia Rubanova and Thomas Kipf and Kim Stachenfeld and Kelsey R Allen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4rBEgZCubP}\n}", "github": "", "project": "", "reviewers": "HnBD;CLWA;cZVt;8vFC", "pdf_size": 11133369, "rating": "6;6;6;8", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "contribution": "3;3;2;3", "presentation": "3;2;3;3", "wc_summary": "71;73;138;44", "wc_strengths": "29;46;98;57", "wc_weaknesses": "83;373;121;32", "wc_questions": "12;1;132;161", "wc_review": "195;493;489;294", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "444;1041;1004;420", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 34.57238782612506 ], "wc_strengths_avg": [ 57.5, 25.42144763777232 ], "wc_weaknesses_avg": [ 152.25, 131.3037984979871 ], "wc_questions_avg": [ 76.5, 70.85372255569922 ], "wc_review_avg": [ 367.75, 128.13152422413464 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 727.25, 295.6614406715898 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1085887278127634261&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4rBEgZCubP", "pdf": "https://openreview.net/pdf?id=4rBEgZCubP", "email": "deepmind.com;google.com;google.com;deepmind.com;deepmind.com;google.com;deepmind.com", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "DeepMind;DeepMind", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;0;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "4rCDEEnTvX", "title": "From Language Modeling to Instruction Following: Understanding the Behavior Shift in LLMs after Instruction Tuning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large Language Models (LLMs) have achieved remarkable success, demonstrating powerful instruction-following capabilities across diverse tasks. Instruction fine-tuning is critical in enabling LLMs to align with user intentions and effectively follow instructions. In this work, we investigate how the instruction fine-tuning modifies pre-trained models, focusing on two perspectives: instruction recognition and knowledge evolution. To study the behavior shift of LLMs, we employ a suite of local and global explanation methods, including a gradient-based approach for input-output attribution and techniques for interpreting patterns and concepts in self-attention and feed-forward layers. Our findings reveal three significant impacts of instruction fine-tuning: 1) It empowers LLMs to better recognize the instruction parts from user prompts, thereby facilitating high-quality response generation and addressing the ``lost-in-the-middle'' issue observed in pre-trained models; 2) It aligns the knowledge stored in feed-forward layers with user-oriented tasks, exhibiting minimal shifts across linguistic levels. 3) It facilitates the learning of word-word relations with instruction verbs through the self-attention mechanism, particularly in the lower and middle layers, indicating enhanced recognition of instruction words. These insights contribute to a deeper understanding of the behavior shifts in LLMs after instruction fine-tuning and lay the groundwork for future research aimed at interpreting and optimizing LLMs for various applications.", "keywords": "Large Language Model;Instruction Tuning;Interpretation;Instruction Following;Supervised Fine-tuning", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Xuansheng Wu;Wenlin Yao;Jianshu Chen;Xiaoman Pan;Xiaoyang Wang;Ninghao Liu;Dong Yu", "authorids": "~Xuansheng_Wu1;~Wenlin_Yao1;~Jianshu_Chen1;~Xiaoman_Pan2;~Xiaoyang_Wang1;~Ninghao_Liu2;~Dong_Yu2", "gender": ";M;M;M;M;M;M", "homepage": "https://github.com/JacksonWuxs;https://wenlinyao.github.io/;https://chenjianshu.github.io/;https://sites.google.com/view/dongyu888/;https://cobweb.cs.uga.edu/~ninghaoliu/;https://panx27.github.io/homepage/;https://xyang0.github.io/", "dblp": "304/1261;203/8711;11/3124;71/4598-1;145/4489;148/9210;81/1832-1", "google_scholar": "https://scholar.google.com/citations?hl=en;qwo2A24AAAAJ;jQeFWdoAAAAJ;tMY31_gAAAAJ;Nir-EDYAAAAJ;tRPF03IAAAAJ;EeppWmkAAAAJ", "orcid": "0000-0002-7816-7658;;;0000-0003-0520-6844;0000-0002-9170-2424;;0000-0002-0746-1059", "linkedin": ";;;dongyu/;;;xiaoyang-wang-001", "or_profile": "~Xuansheng_Wu1;~Wenlin_Yao1;~Jianshu_Chen1;~Dong_Yu2;~Ninghao_Liu1;~Xiaoman_Pan1;~Xiaoyang_Wang2", "aff": "University of Georgia;Tencent AI Lab;Amazon;Tencent AI Lab;University of Georgia;Tencent AI Lab;Tencent AI Lab", "aff_domain": "uga.edu;tencent.com;amazon.com;tencent.com;uga.edu;tencent.com;tencent.com", "position": "PhD student;Researcher;Principal Scientist;Distinguished Scientist;Assistant Professor;Researcher;Senior Researcher", "bibtex": "@misc{\nwu2024from,\ntitle={From Language Modeling to Instruction Following: Understanding the Behavior Shift in {LLM}s after Instruction Tuning},\nauthor={Xuansheng Wu and Wenlin Yao and Jianshu Chen and Xiaoman Pan and Xiaoyang Wang and Ninghao Liu and Dong Yu},\nyear={2024},\nurl={https://openreview.net/forum?id=4rCDEEnTvX}\n}", "github": "", "project": "", "reviewers": "e43k;zeP7;QqaN", "site": "https://openreview.net/forum?id=4rCDEEnTvX", "pdf_size": 13402389, "rating": "3;5;6", "confidence": "4;4;3", "soundness": "2;2;2", "contribution": "2;3;2", "presentation": "2;1;2", "wc_summary": "124;133;82", "wc_strengths": "980;71;56", "wc_weaknesses": "2;647;131", "wc_questions": "2;94;236", "wc_review": "1108;945;505", "wc_reply_reviewers": "178;306;0", "wc_reply_authors": "1722;1714;452", "reply_reviewers": "2;2;0", "reply_authors": "5;3;1", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 113.0, 22.22611077089287 ], "wc_strengths_avg": [ 369.0, 432.0856396595471 ], "wc_weaknesses_avg": [ 260.0, 278.671850031538 ], "wc_questions_avg": [ 110.66666666666667, 96.25429283356088 ], "wc_review_avg": [ 852.6666666666666, 254.68455434561048 ], "wc_reply_reviewers_avg": [ 161.33333333333334, 125.47863917372108 ], "wc_reply_authors_avg": [ 1296.0, 596.8070598331312 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 1.632993161855452 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1012915126425971164&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;1;0;1;1", "aff_unique_norm": "University of Georgia;Tencent;Amazon", "aff_unique_dep": ";Tencent AI Lab;Amazon.com, Inc.", "aff_unique_url": "https://www.uga.edu;https://ai.tencent.com;https://www.amazon.com", "aff_unique_abbr": "UGA;Tencent AI Lab;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;1;1", "aff_country_unique": "United States;China" }, { "id": "4sGoA7Eih8", "title": "Unmasking Transformers: A Theoretical Approach to Data Recovery via Attention Weights", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In the realm of deep learning, transformers have emerged as a dominant architecture, particularly in natural language processing tasks. However, with their widespread adoption, concerns regarding the security and privacy of the data processed by these models have arisen. In this paper, we address a pivotal question: Can the data fed into transformers be recovered using their attention weights and outputs? We introduce a theoretical framework to tackle this problem. Specifically, we present an algorithm that aims to recover the input data $X \\in \\mathbb{R}^{d \\times n}$ from given attention weights $W = QK^\\top \\in \\mathbb{R}^{d \\times d}$ and output $B \\in \\mathbb{R}^{n \\times n}$ by minimizing the loss function $L(X)$. This loss function captures the discrepancy between the expected output and the actual output of the transformer. Our findings have significant implications for the Localized Layer-wise Mechanism (LLM), suggesting potential vulnerabilities in the model's design from a security and privacy perspective. This work underscores the importance of understanding and safeguarding the internal workings of transformers to ensure the confidentiality of processed data.", "keywords": "Attention;inversion", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/176cb0bc7f4e0761deab33bd6b6ada6eef2bf3ad.pdf", "author": "Yichuan Deng;Zhao Song;Shenghao Xie;Chiwun Yang", "authorids": "~Yichuan_Deng1;~Zhao_Song3;~Shenghao_Xie2;~Chiwun_Yang1", "gender": "M;M;M;M", "homepage": "https://homes.cs.washington.edu/~ycdeng/;https://www.youtube.com/@zhaosong2031;https://sites.google.com/view/shenghaoxie/;https://christianyang37.github.io/", "dblp": "183/7224-2;76/4051-2;;355/2807", "google_scholar": "https://scholar.google.com/citations?hl=en;yDZct7UAAAAJ;JKjz5D8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yichuan_Deng1;~Zhao_Song3;~Shenghao_Xie2;~Chiwun_Yang1", "aff": "Department of Computer Science, University of Washington;Adobe;The Chinese University of Hong Kong, Shenzhen;SUN YAT-SEN UNIVERSITY", "aff_domain": "cs.washington.edu;adobe.com;cuhk.edu.cn;sysu.edu.cn", "position": "PhD student;Researcher;Undergrad student;Undergrad student", "bibtex": "@misc{\ndeng2024unmasking,\ntitle={Unmasking Transformers: A Theoretical Approach to Data Recovery via Attention Weights},\nauthor={Yichuan Deng and Zhao Song and Shenghao Xie and Chiwun Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=4sGoA7Eih8}\n}", "github": "", "project": "", "reviewers": "vHCT;wffn;K9uu;SERj", "site": "https://openreview.net/forum?id=4sGoA7Eih8", "pdf_size": 374694, "rating": "1;3;3;5", "confidence": "1;4;4;4", "soundness": "1;3;2;3", "contribution": "1;3;2;3", "presentation": "1;3;2;3", "wc_summary": "17;83;51;48", "wc_strengths": "13;69;15;11", "wc_weaknesses": "35;102;115;82", "wc_questions": "9;4;4;6", "wc_review": "74;258;185;147", "wc_reply_reviewers": "0;57;201;0", "wc_reply_authors": "14;155;243;187", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 49.75, 23.35995505132662 ], "wc_strengths_avg": [ 27.0, 24.289915602982237 ], "wc_weaknesses_avg": [ 83.5, 30.36856927812043 ], "wc_questions_avg": [ 5.75, 2.0463381929681126 ], "wc_review_avg": [ 166.0, 66.42665127793211 ], "wc_reply_reviewers_avg": [ 64.5, 82.17207554881402 ], "wc_reply_authors_avg": [ 149.75, 84.4670793859951 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17752848074473007675&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Washington;Adobe;Chinese University of Hong Kong;Sun Yat-sen University", "aff_unique_dep": "Department of Computer Science;Adobe Inc.;;", "aff_unique_url": "https://www.washington.edu;https://www.adobe.com;https://www.cuhk.edu.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "UW;Adobe;CUHK;SYSU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Seattle;;Shenzhen", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;China" }, { "id": "4stB7DFLp6", "title": "InstructRetro: Instruction Tuning post Retrieval-Augmented Pretraining", "track": "main", "status": "Reject", "tldr": "", "abstract": "Pretraining auto-regressive large language models (LLMs) with retrieval demonstrates better perplexity and factual accuracy by leveraging external databases. However, the size of existing pretrained retrieval-augmented LLM is still limited (e.g., Retro has 7.5B parameters), which limits the effectiveness of instruction tuning and zero-shot generalization. In this work, we introduce Retro 48B, the largest LLM pretrained with retrieval before instruction tuning. Specifically, we continue to pretrain the 43B GPT model on additional 100 billion tokens using the Retro augmentation method by retrieving from 1.2 trillion tokens. The obtained foundation model, Retro 48B, largely outperforms the original 43B GPT in terms of perplexity. After instruction tuning on Retro, InstructRetro demonstrates significant improvement over the instruction tuned GPT on zero-shot question answering (QA) tasks. Specifically, the average improvement of InstructRetro is 7% over its GPT counterpart across 8 short-form QA tasks, and 10% over GPT across 4 challenging long-form QA tasks. Surprisingly, we find that one can ablate the encoder from InstructRetro architecture and directly use its decoder backbone, while achieving comparable results. We hypothesize that pretraining with retrieval makes its decoder good at incorporating context for QA. Our results highlights the promising direction to obtain a better GPT decoder for QA through continued pretraining with retrieval before instruction tuning.", "keywords": "Large Language Models;Pretraining;Retrieval;Instruction Tuning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Boxin Wang;Wei Ping;Lawrence McAfee;Peng Xu;Bo Li;Mohammad Shoeybi;Bryan Catanzaro", "authorids": "~Boxin_Wang1;~Wei_Ping1;~Lawrence_McAfee1;~Peng_Xu7;~Bo_Li19;~Mohammad_Shoeybi1;~Bryan_Catanzaro1", "gender": ";M;M;M;F;M;M", "homepage": "https://wbx.life;https://wpingnet.github.io/;https://nvidia.com;https://scholar.google.com.hk/citations?user=PQ26NTIAAAAJ&hl=en;http://boli.cs.illinois.edu/;;https://ctnzr.io", "dblp": "236/6319;08/8399.html;;84/586-8;50/3402-26;53/9742;14/4826", "google_scholar": "YOf2ATIAAAAJ;6gKEYRgAAAAJ;;https://scholar.google.com.hk/citations?user=PQ26NTIAAAAJ;K8vJkTcAAAAJ;62ElavIAAAAJ;UZ6kI2AAAAAJ", "orcid": ";;;;;;0000-0003-0034-7728", "linkedin": ";wei-ping/;;;;shoeybi/;bryancatanzaro/", "or_profile": "~Boxin_Wang1;~Wei_Ping1;~Lawrence_McAfee1;~Peng_Xu7;~Bo_Li19;~Mohammad_Shoeybi1;~Bryan_Catanzaro1", "aff": "NVIDIA;NVIDIA;NVIDIA;NVIDIA;University of Illinois, Urbana Champaign;NVIDIA;NVIDIA", "aff_domain": "nvidia.com;nvidia.com;nvidia.com;nvidia.com;illinois.edu;nvidia.com;nvidia.com", "position": "Senior Research Scientist;Principal Researcher;Researcher;Researcher;Assistant Professor;Director of Applied Resesrch;Vice President", "bibtex": "@misc{\nwang2024instructretro,\ntitle={InstructRetro: Instruction Tuning post Retrieval-Augmented Pretraining},\nauthor={Boxin Wang and Wei Ping and Lawrence McAfee and Peng Xu and Bo Li and Mohammad Shoeybi and Bryan Catanzaro},\nyear={2024},\nurl={https://openreview.net/forum?id=4stB7DFLp6}\n}", "github": "", "project": "", "reviewers": "p68m;A1jh;gdXh", "site": "https://openreview.net/forum?id=4stB7DFLp6", "pdf_size": 765842, "rating": "5;6;8", "confidence": "3;3;4", "soundness": "3;3;3", "contribution": "3;3;3", "presentation": "3;3;2", "wc_summary": "97;61;563", "wc_strengths": "14;79;45", "wc_weaknesses": "61;35;659", "wc_questions": "1;39;58", "wc_review": "173;214;1325", "wc_reply_reviewers": "0;0;48", "wc_reply_authors": "604;1187;2791", "reply_reviewers": "0;0;1", "reply_authors": "1;2;5", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 240.33333333333334, 228.63265053112795 ], "wc_strengths_avg": [ 46.0, 26.54555832275273 ], "wc_weaknesses_avg": [ 251.66666666666666, 288.2236785708088 ], "wc_questions_avg": [ 32.666666666666664, 23.697163449568293 ], "wc_review_avg": [ 570.6666666666666, 533.6567769227293 ], "wc_reply_reviewers_avg": [ 16.0, 22.627416997969522 ], "wc_reply_authors_avg": [ 1527.3333333333333, 924.7025948319216 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18383393396082703199&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "NVIDIA;University of Illinois Urbana-Champaign", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://illinois.edu", "aff_unique_abbr": "NVIDIA;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4u0ruVk749", "title": "DFITE: Estimation of Individual Treatment Effect Using Diffusion Model", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning individualized treatment effects (ITE) from observational data is a challenging task due to the absence of unobserved confounders. Previous methods mostly focus on assuming the Ignorability assumption ignoring the unobserved confounders or overlooking the impact of an apriori knowledge on the generation process of the latent variable, which can be quite impractical in real-world scenarios. Motivated by the recent advances in the latent variable modeling, we propose to capture the unobserved latent space using diffusion model, and accordingly to estimate the causal effect. More concretely, we build on the reverse diffusion process for the unobserved confounders as a Markov chain conditioned on an apriori knowledge. In order to implement our model in a feasible way, we derive the variational bound in closed form. In the experiments, we compare our model with the state-of-the-art methods based on both synthetic and benchmark datasets , where we can empirically demonstrate consistent improvements of our model on $\\sqrt{\\epsilon_{PEHE}}$ and $\\epsilon_{ATE}$, respectively", "keywords": "Individual Treatment Effect;Causal inference;diffusion model", "primary_area": "causal reasoning", "supplementary_material": "", "author": "Zhenlei Wang;Xu Chen;Xiaoxiao Xu;Lantao Hu;Peng Jiang;Kun Gai", "authorids": "~Zhenlei_Wang1;~Xu_Chen13;~Xiaoxiao_Xu2;~Lantao_Hu1;~Peng_Jiang6;~Kun_Gai1", "gender": "M;M;F;M;M;M", "homepage": "https://causal-rec.github.io/;https://gsai.ruc.edu.cn/chenxu;https://scholar.google.com/citations?hl=zh-CN&user=1I2OrQEAAAAJ&view_op=list_works&sortby=pubdate;;;", "dblp": "223/8301;83/6331-17;116/1526;;;59/2902", "google_scholar": "7x0kGsUAAAAJ;loPoqy0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;P0EK1y8AAAAJ;https://scholar.google.com/citations?hl=en;PXO4ygEAAAAJ", "orcid": ";0000-0003-0144-1775;0009-0007-5493-5628;;0000-0002-9266-0780;", "linkedin": ";;;;;", "or_profile": "~Zhenlei_Wang1;~Xu_Chen13;~Xiaoxiao_Xu2;~Lantao_Hu1;~Peng_Jiang6;~Kun_Gai1", "aff": "Renmin University of China;Renmin University of China;Kuaishou Technology;;Kuaishou Technology;Kuaishou- \u5feb\u624b\u79d1\u6280", "aff_domain": "ruc.edu.cn;ruc.edu.cn;kuaishou.com;;kuaishou.com;kuaishou.com", "position": "PhD student;Associate Professor;Engineer;;Vice President;Instructor", "bibtex": "@misc{\nwang2024dfite,\ntitle={{DFITE}: Estimation of Individual Treatment Effect Using Diffusion Model},\nauthor={Zhenlei Wang and Xu Chen and Xiaoxiao Xu and Lantao Hu and Peng Jiang and Kun Gai},\nyear={2024},\nurl={https://openreview.net/forum?id=4u0ruVk749}\n}", "github": "", "project": "", "reviewers": "UsZZ;otL9;RF2t;Jxsb;1VfB", "site": "https://openreview.net/forum?id=4u0ruVk749", "pdf_size": 1723154, "rating": "1;3;3;3;5", "confidence": "4;3;4;4;3", "soundness": "2;2;2;1;3", "contribution": "1;2;2;2;2", "presentation": "1;2;2;3;2", "wc_summary": "39;73;67;96;55", "wc_strengths": "10;81;18;68;39", "wc_weaknesses": "247;271;18;285;151", "wc_questions": "144;184;102;79;384", "wc_review": "440;609;205;528;629", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 3.0, 1.2649110640673518 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.0, 0.6324555320336759 ], "contribution_avg": [ 1.8, 0.4000000000000001 ], "presentation_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_avg": [ 66.0, 18.973665961010276 ], "wc_strengths_avg": [ 43.2, 27.56374430297887 ], "wc_weaknesses_avg": [ 194.4, 99.84307687566525 ], "wc_questions_avg": [ 178.6, 108.81470488863167 ], "wc_review_avg": [ 482.2, 153.84849690523467 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6454972243679028, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UYZWJsefyc8J:scholar.google.com/&scioq=DFITE:+Estimation+of+Individual+Treatment+Effect+Using+Diffusion+Model&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "Renmin University of China;Kuaishou Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.kuaishou.com", "aff_unique_abbr": "RUC;Kuaishou", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "4uaogMQgNL", "title": "UpFusion: Novel View Diffusion from Unposed Sparse View Observations", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose UpFusion, a system that can perform novel view synthesis and infer 3D representations for an object given a sparse set of reference images without corresponding pose information. Current sparse-view 3D inference methods typically rely on camera poses to geometrically aggregate information from input views, but are not robust in-the-wild when such information is unavailable/inaccurate. In contrast, UpFusion sidesteps this requirement by learning to implicitly leverage the available images as context in a conditional generative model for synthesizing novel views. We incorporate two complementary forms of conditioning into diffusion models for leveraging the input views: a) via inferring query-view aligned features using a scene-level transformer, b) via intermediate attentional layers that can directly observe the input image tokens. We show that this mechanism allows generating high-fidelity novel views while improving the synthesis quality given additional (unposed) images. We evaluate our approach on the Co3D dataset and demonstrate the benefits of our method over pose-reliant alternates, Finally, we also show that our learned model can generalize beyond the training categories, and hope that this provides a stepping stone to reconstructing generic objects from in-the-wild image collections.", "keywords": "Novel View Synthesis;Diffusion;3D;Generative Models;Transformers", "primary_area": "generative models", "supplementary_material": "", "author": "Bharath Raj Nagoor Kani;Hsin-Ying Lee;Sergey Tulyakov;Shubham Tulsiani", "authorids": "~Bharath_Raj_Nagoor_Kani1;~Hsin-Ying_Lee2;~Sergey_Tulyakov1;~Shubham_Tulsiani1", "gender": "M;M;M;M", "homepage": "https://thatbrguy.github.io;http://hsinyinglee.com/;http://www.stulyakov.com/;https://shubhtuls.github.io/", "dblp": "364/1857;149/7976-1.html;40/6115;135/6623", "google_scholar": "DcZsAGEAAAAJ;;mgzXR0sAAAAJ;06rffEkAAAAJ", "orcid": ";;;", "linkedin": ";;sergeytulyakov/;", "or_profile": "~Bharath_Raj_Nagoor_Kani1;~Hsin-Ying_Lee2;~Sergey_Tulyakov1;~Shubham_Tulsiani1", "aff": "Carnegie Mellon University;Snap Inc.;Snap Inc.;Carnegie Mellon University", "aff_domain": "cmu.edu;snap.com;snapchat.com;cmu.edu", "position": "MS student;Researcher;Director of Research;Assistant Professor", "bibtex": "@misc{\nkani2024upfusion,\ntitle={UpFusion: Novel View Diffusion from Unposed Sparse View Observations},\nauthor={Bharath Raj Nagoor Kani and Hsin-Ying Lee and Sergey Tulyakov and Shubham Tulsiani},\nyear={2024},\nurl={https://openreview.net/forum?id=4uaogMQgNL}\n}", "github": "", "project": "", "reviewers": "rcbB;yMjS;Egt9;UGEs", "site": "https://openreview.net/forum?id=4uaogMQgNL", "pdf_size": 17124243, "rating": "5;5;5;6", "confidence": "5;4;5;4", "soundness": "2;3;2;2", "contribution": "2;2;2;2", "presentation": "3;2;3;2", "wc_summary": "51;71;83;29", "wc_strengths": "110;41;92;17", "wc_weaknesses": "285;220;271;127", "wc_questions": "7;117;97;5", "wc_review": "453;449;543;178", "wc_reply_reviewers": "87;0;0;0", "wc_reply_authors": "300;485;554;91", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 58.5, 20.512191496766015 ], "wc_strengths_avg": [ 65.0, 37.52998800959041 ], "wc_weaknesses_avg": [ 225.75, 61.93292742959919 ], "wc_questions_avg": [ 56.5, 50.997548960709864 ], "wc_review_avg": [ 405.75, 136.75776943194123 ], "wc_reply_reviewers_avg": [ 21.75, 37.67210506462308 ], "wc_reply_authors_avg": [ 357.5, 179.71992098818652 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11188550895155497640&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;Snap Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.snapinc.com", "aff_unique_abbr": "CMU;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "PAC Prediction Sets Under Label Shift", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19445", "id": "4vPVBh3fhz", "author_site": "Wenwen Si, Sangdon Park, Insup Lee, Edgar Dobriban, Osbert Bastani", "tldr": "", "abstract": "Prediction sets capture uncertainty by predicting sets of labels rather than individual labels, enabling downstream decisions to conservatively account for all plausible outcomes. Conformal inference algorithms construct prediction sets guaranteed to contain the true label with high probability. These guarantees fail to hold in the face of distribution shift, which is precisely when reliable uncertainty quantification can be most useful. We propose a novel algorithm for constructing prediction sets with PAC guarantees in the label shift setting, where the probabilities of labels can differ between the source and target distributions. Our algorithm relies on constructing confidence intervals for importance weights by propagating uncertainty through a Gaussian elimination algorithm. We evaluate our approach on four datasets: the CIFAR-10 and ChestX-Ray image datasets, the tabular CDC Heart Dataset, and the AGNews text dataset. Our algorithm satisfies the PAC guarantee while producing smaller prediction set sizes compared to several baselines.", "keywords": "prediction set;label shift;distribution-free uncertainty quantification;probably approximately correct;Clopper-Pearson binomial interval;rejection sampling", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Wenwen Si;Sangdon Park;Insup Lee;Edgar Dobriban;Osbert Bastani", "authorids": "~Wenwen_Si1;~Sangdon_Park1;~Insup_Lee1;~Edgar_Dobriban2;~Osbert_Bastani1", "gender": "F;M;;;M", "homepage": ";https://sangdon.github.io/;https://www.cis.upenn.edu/~lee/;https://statistics.wharton.upenn.edu/profile/dobriban/;http://obastani.github.io", "dblp": "226/6274;119/1530-1;l/InsupLee.html;99/11269;21/11275", "google_scholar": "IFgBKbIAAAAJ;Vi2E2F4AAAAJ;qPlUgrgAAAAJ;aGvH4yMAAAAJ;cxYepGkAAAAJ", "orcid": ";;0000-0003-2672-1132;;", "linkedin": ";;;edgar-dobriban/;", "or_profile": "~Wenwen_Si1;~Sangdon_Park1;~Insup_Lee1;~Edgar_Dobriban2;~Osbert_Bastani1", "aff": "University of Pennsylvania;POSTECH;University of Pennsylvania;The Wharton School, University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;postech.ac.kr;upenn.edu;wharton.upenn.edu;upenn.edu", "position": "PhD student;Assistant Professor;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nsi2024pac,\ntitle={{PAC} Prediction Sets Under Label Shift},\nauthor={Wenwen Si and Sangdon Park and Insup Lee and Edgar Dobriban and Osbert Bastani},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4vPVBh3fhz}\n}", "github": "", "project": "", "reviewers": "2Pyj;onAF;mNQN;y3Aa;tiZb", "pdf_size": 1381636, "rating": "6;6;6;6;8", "confidence": "3;3;3;3;4", "soundness": "3;3;3;2;3", "contribution": "3;2;2;2;3", "presentation": "3;3;3;3;3", "wc_summary": "76;67;84;33;86", "wc_strengths": "48;8;58;27;107", "wc_weaknesses": "62;2;150;59;75", "wc_questions": "223;56;174;20;55", "wc_review": "409;133;466;139;323", "wc_reply_reviewers": "15;279;79;14;10", "wc_reply_authors": "888;516;954;192;508", "reply_reviewers": "1;3;1;1;1", "reply_authors": "2;3;2;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.2, 19.30181338631166 ], "wc_strengths_avg": [ 49.6, 33.493880038000974 ], "wc_weaknesses_avg": [ 69.6, 47.39873416031276 ], "wc_questions_avg": [ 105.6, 78.49738849159252 ], "wc_review_avg": [ 294.0, 136.8181274539306 ], "wc_reply_reviewers_avg": [ 79.4, 103.03513963692194 ], "wc_reply_authors_avg": [ 611.6, 279.1312236207193 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11840371556296107071&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=4vPVBh3fhz", "pdf": "https://openreview.net/pdf?id=4vPVBh3fhz", "email": "upenn.edu;postech.ac.kr;upenn.edu;wharton.upenn.edu;upenn.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Pennsylvania;Pohang University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.postech.ac.kr", "aff_unique_abbr": "UPenn;POSTECH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;South Korea" }, { "id": "4w4PDIT3h4", "title": "Focus on Primary: Differential Diverse Data Augmentation for Generalization in Visual Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "In reinforcement learning, it is common for the agent to overfit the training environment, making generalization to unseen environments extremely challenging. Visual reinforcement learning that relies on observed images as input is particularly constrained by generalization and sample efficiency. To address these challenges, various data augmentation methods are consistently attempted to improve the generalization capability and reduce the training cost. However, the naive use of data augmentation can often lead to breakdowns in learning. In this paper, we propose two novel approaches: Diverse Data Augmentation (DDA) and Differential Diverse Data Augmentation (D3A). Leveraging a pre-trained encoder-decoder model, we segment primary pixels to avoid inappropriate data augmentation affecting critical information. DDA improves the generalization capability of the agent in complex environments through consistency of encoding. D3A uses proper data augmentation for primary pixels to further improve generalization while satisfying semantic-invariant state transformation. We extensively evaluate our methods on a series of generalization tasks of DeepMind Control Suite. The results demonstrate that our methods significantly improve the generalization performance of the agent in unseen environments, and enable the selection of more diverse data augmentations to improve the sample efficiency of off-policy algorithms.", "keywords": "Visual Reinforcement Learning;Data Augmentation;Generalization", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/746288b2c1978e73fb972b9b6987c3232b790b8d.zip", "author": "Junhong Wu;Jie Liu;Xi Xiong;Daolong An;Shuai L\u00fc", "authorids": "~Junhong_Wu2;~Jie_Liu34;~Xi_Xiong1;~Daolong_An4;~Shuai_L\u00fc1", "gender": ";;M;M;M", "homepage": ";;https://teachers.jlu.edu.cn/lus/en/tdcy/43488/content/1757.htm;;https://lus-jlu.github.io", "dblp": ";https://dblp.uni-trier.de/pid/03/2134-14.html;;371/6068;27/10828-1", "google_scholar": ";;;zvlfGUgAAAAJ;S1T_HV0AAAAJ", "orcid": "0009-0009-9951-0505;;;0009-0005-0020-1990;0000-0002-8081-4498", "linkedin": ";;;andaolong-0a5689179;", "or_profile": "~Junhong_Wu2;~Jie_Liu34;~Xi_Xiong1;~Daolong_An4;~Shuai_L\u00fc1", "aff": "Jilin University;Jilin University;Jilin University;Jilin University;Jilin University", "aff_domain": "jlu.edu.cn;jlu.edu.cn;jlu.edu.cn;jlu.edu.cn;jlu.edu.cn", "position": "MS student;Associate Professor;MS student;MS student;Associate Professor", "bibtex": "@misc{\nwu2024focus,\ntitle={Focus on Primary: Differential Diverse Data Augmentation for Generalization in Visual Reinforcement Learning},\nauthor={Junhong Wu and Jie Liu and Xi Xiong and Daolong An and Shuai L{\\\"u}},\nyear={2024},\nurl={https://openreview.net/forum?id=4w4PDIT3h4}\n}", "github": "", "project": "", "reviewers": "3m8H;XRib;xszw;ziWN", "site": "https://openreview.net/forum?id=4w4PDIT3h4", "pdf_size": 2427548, "rating": "3;3;5;5", "confidence": "4;5;3;3", "soundness": "2;2;3;3", "contribution": "1;2;2;3", "presentation": "2;1;3;3", "wc_summary": "33;168;98;99", "wc_strengths": "10;113;18;34", "wc_weaknesses": "183;354;175;155", "wc_questions": "5;37;5;59", "wc_review": "231;672;296;347", "wc_reply_reviewers": "243;32;0;70", "wc_reply_authors": "1153;1176;680;852", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 99.5, 47.74149138851865 ], "wc_strengths_avg": [ 43.75, 40.90461465409496 ], "wc_weaknesses_avg": [ 216.75, 79.89485277538222 ], "wc_questions_avg": [ 26.5, 22.863726730347352 ], "wc_review_avg": [ 386.5, 169.88304800656243 ], "wc_reply_reviewers_avg": [ 86.25, 93.83063199190337 ], "wc_reply_authors_avg": [ 965.25, 208.48186371960512 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kzLnSsWpc6cJ:scholar.google.com/&scioq=Focus+on+Primary:+Differential+Diverse+Data+Augmentation+for+Generalization+in+Visual+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Jilin University", "aff_unique_dep": "", "aff_unique_url": "http://www.jlu.edu.cn", "aff_unique_abbr": "JLU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "4x5SXqqrdv", "title": "Improving Private Training via In-distribution Public Data Synthesis and Generalization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "To alleviate the utility degradation of deep learning classification with differential privacy (DP), employing extra public data or pre-trained models has been widely explored. Recently, the use of in-distribution public data has been investigated, where a tiny subset of data owners share their data publicly. In this paper, to mitigate memorization and overfitting by the limited-sized in-distribution public data, we leverage recent diffusion models and employ various augmentation techniques for improving diversity. We then explore the optimization to discover flat minima to public data and suggest weight multiplicity to enhance the generalization of the private training. While assuming 4\\% of training data as public, our method brings significant performance gain even without using pre-trained models, i.e., achieving 85.78\\% on CIFAR-10 with a privacy budget of $\\varepsilon=2$ and $\\delta=10^{-5}$.", "keywords": "Differential Privacy;Privacy;Optimization;DP-SGD;Diffusion;Synthesis", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/e565f490d512e3062f409456d28c0f7d8b3f3e0a.zip", "author": "Jinseong Park;Yujin Choi;Jaewook Lee", "authorids": "~Jinseong_Park1;~Yujin_Choi1;~Jaewook_Lee1", "gender": "M;F;M", "homepage": "https://github.com/JinseongP;;http://slcf.snu.ac.kr", "dblp": "178/8948-1;251/3065;39/4985-1", "google_scholar": "o4-E5z0AAAAJ;3u0-O2sAAAAJ;teMdzbwAAAAJ", "orcid": "0000-0003-1931-8441;0000-0001-9150-704X;", "linkedin": "jinseong-park-a84740226/;;", "or_profile": "~Jinseong_Park1;~Yujin_Choi1;~Jaewook_Lee1", "aff": "Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\npark2024improving,\ntitle={Improving Private Training via In-distribution Public Data Synthesis and Generalization},\nauthor={Jinseong Park and Yujin Choi and Jaewook Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=4x5SXqqrdv}\n}", "github": "", "project": "", "reviewers": "9BFG;8NJG;nRz4;jmD4", "site": "https://openreview.net/forum?id=4x5SXqqrdv", "pdf_size": 1125449, "rating": "3;5;5;6", "confidence": "3;4;4;3", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "2;2;2;4", "wc_summary": "97;68;51;70", "wc_strengths": "48;76;30;89", "wc_weaknesses": "92;801;340;260", "wc_questions": "21;59;2;4", "wc_review": "258;1004;423;423", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 71.5, 16.469669092000604 ], "wc_strengths_avg": [ 60.75, 23.12331074911203 ], "wc_weaknesses_avg": [ 373.25, 262.6798193618992 ], "wc_questions_avg": [ 21.5, 22.874658467395747 ], "wc_review_avg": [ 527.0, 283.5145498911828 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9IwEeQN7TZgJ:scholar.google.com/&scioq=Improving+Private+Training+via+In-distribution+Public+Data+Synthesis+and+Generalization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "4xXOc9nssp", "title": "Patio: Framework for Private Release of Ratios", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "Averages and ratios are some of the most basic primitives in data analytics, statistics, and machine learning. In this work, we study the differentially private (DP) release of ratios.\nFor tasks for which the numerator $a(\\cdot)$ and denominator $b(\\cdot)$ satisfy a certain general co-monotonicity property, we give a new mechanism \\emph{Patio} (Private rATIO) for privately releasing the ratio $a(\\mathbf{x})/b(\\mathbf{x})$ for an input dataset $\\mathbf{x}$, with strong theoretical guarantees and practical performance.\nWe also prove that under general conditions on $a(\\cdot)$ and $b(\\cdot)$, the variance of our mechanism matches up to a $1+o(1)$ factor the variance of the Laplace distribution scaled with the \\emph{local} sensitivity. This is in contrast with the standard Laplace mechanism, which scales the noise with---the potentially much larger---\\emph{global} sensitivity.\n\nOur algorithm can be applied to a variety of tasks and settings including estimating averages, the Jaccard similarity coefficient, and several metrics quantifying the utility of a classifier such as its precision, sensitivity, specificity and $F$-score. For the above-mentioned statistics, our MSE matches that of the Laplace distribution scaled to the local sensitivity of the given task. We perform empirical evaluation showing the better utility of our algorithm compared to natural and state-of-the-art baselines.", "keywords": "privacy;ratio;average", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Badih Ghazi;Pritish Kamath;Ravi Kumar;Pasin Manurangsi;Adam Sealfon;Jakub Tetek", "authorids": "~Badih_Ghazi1;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2;~Adam_Sealfon1;~Jakub_Tetek1", "gender": ";M;M;M;;", "homepage": "https://sites.google.com/view/badihghazi/home;https://pritishkamath.github.io/;https://sites.google.com/site/ravik53/;https://pasin30055.github.io/;https://asealfon.github.io/;https://sites.google.com/view/jakub-tetek/", "dblp": "125/2134;https://dblp.org/pers/k/Kamath:Pritish.html;k/RaviKumar.html;133/2059;150/6253;211/6738", "google_scholar": "GBJLTN8AAAAJ;1JFARhUAAAAJ;J_XhIsgAAAAJ;35hM-PkAAAAJ;nrlhJMcAAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;0000-0002-2203-2586;;;0000-0002-2046-1627", "linkedin": "badih-ghazi-608379132/;;ravi-kumar-a3a9631;;;", "or_profile": "~Badih_Ghazi1;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2;~Adam_Sealfon1;~Jakub_Tetek1", "aff": "Google;Google Research;Google;Google;Google;University of Copenhagen", "aff_domain": "google.com;google.com;google.com;google.com;google.com;diku.dk", "position": "Researcher;Research Scientist;Research Scientist;Research Scientist;Researcher;PhD student", "bibtex": "@misc{\nghazi2024patio,\ntitle={Patio: Framework for Private Release of Ratios},\nauthor={Badih Ghazi and Pritish Kamath and Ravi Kumar and Pasin Manurangsi and Adam Sealfon and Jakub Tetek},\nyear={2024},\nurl={https://openreview.net/forum?id=4xXOc9nssp}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=4xXOc9nssp", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zeVd-8HpjGcJ:scholar.google.com/&scioq=Patio:+Framework+for+Private+Release+of+Ratios&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Google;University of Copenhagen", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.ku.dk", "aff_unique_abbr": "Google;UCPH", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Denmark" }, { "id": "4y3GDTFv70", "title": "A Latent Space Theory for Emergent Abilities in Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Languages are not created randomly but rather to communicate information. There is a strong association between languages and their underlying meanings, resulting in a sparse joint distribution that is heavily peaked according to their correlations. Moreover, these peak values happen to match with the marginal distribution of languages due to the sparsity. With the advent of LLMs trained on big data and large models, we can now precisely assess the marginal distribution of languages, providing a convenient means of exploring the sparse structures in the joint distribution for effective inferences. In this paper, we categorize languages as either unambiguous or {\\epsilon}-ambiguous and present quantitative results to demonstrate that the emergent abilities of LLMs, such as language understanding, in-context learning, chain-of-thought prompting, and effective instruction fine-tuning, can all be attributed to Bayesian inference on the sparse joint distribution of languages.", "keywords": "Large Language Model (LLMs); Emergent Abilities; Bayesian Inference; Latent Space", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/0f4b531ed89e9be272c73c78c0b0dc0aa2ea6dcd.zip", "author": "Hui Jiang", "authorids": "~Hui_Jiang1", "gender": "M", "homepage": "https://wiki.eecs.yorku.ca/user/hj/", "dblp": "64/3246", "google_scholar": "https://scholar.google.ca/citations?user=lQi05ZkAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Hui_Jiang1", "aff": "York University", "aff_domain": "yorku.ca", "position": "Full Professor", "bibtex": "@misc{\njiang2024a,\ntitle={A Latent Space Theory for Emergent Abilities in Large Language Models},\nauthor={Hui Jiang},\nyear={2024},\nurl={https://openreview.net/forum?id=4y3GDTFv70}\n}", "github": "", "project": "", "reviewers": "syQW;5EvL;aMxK;Zr2w", "site": "https://openreview.net/forum?id=4y3GDTFv70", "pdf_size": 895127, "rating": "1;3;3;6", "confidence": "4;4;3;3", "soundness": "1;3;2;3", "contribution": "1;2;2;3", "presentation": "2;2;2;3", "wc_summary": "65;31;52;53", "wc_strengths": "6;50;27;46", "wc_weaknesses": "127;279;106;33", "wc_questions": "1;122;2;69", "wc_review": "199;482;187;201", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;218;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;1;0", "rating_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 50.25, 12.234684303242156 ], "wc_strengths_avg": [ 32.25, 17.469616481193857 ], "wc_weaknesses_avg": [ 136.25, 89.49685748672967 ], "wc_questions_avg": [ 48.5, 50.59891303180336 ], "wc_review_avg": [ 267.25, 124.10152094152593 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 54.5, 94.39676901250381 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.7001400420140049, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8083484720921169366&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "York University", "aff_unique_dep": "", "aff_unique_url": "https://www.yorku.ca", "aff_unique_abbr": "York U", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Space and time continuous physics simulation from partial observations", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19444", "id": "4yaFQ7181M", "author_site": "Steeven Janny, Madiha Nadri, Julie Digne, Christian Wolf", "tldr": "", "abstract": "Modern techniques for physical simulations rely on numerical schemes and mesh-refinement methods to address trade-offs between precision and complexity, but these handcrafted solutions are tedious and require high computational power. Data-driven methods based on large-scale machine learning promise high adaptivity by integrating long-range dependencies more directly and efficiently. In this work, we focus on computational fluid dynamics and address the shortcomings of a large part of the literature, which are based on fixed support for computations and predictions in the form of regular or irregular grids. We propose a novel setup to perform predictions in a continuous spatial and temporal domain while being trained on sparse observations. We formulate the task as a double observation problem and propose a solution with two interlinked dynamical systems defined on, respectively, the sparse positions and the continuous domain, which allows to forecast and interpolate a solution from the initial condition. Our practical implementation involves recurrent GNNs and a spatio-temporal attention observer capable of interpolating the solution at arbitrary locations. Our model not only generalizes to new initial conditions (as standard auto-regressive models do) but also performs evaluation at arbitrary space and time locations. We evaluate on three standard datasets in fluid dynamics and compare to strong baselines, which are outperformed in classical settings and the extended new task requiring continuous predictions.", "keywords": "Physics;simulation;interpolation", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Steeven JANNY;Madiha Nadri;Julie Digne;Christian Wolf", "authorids": "~Steeven_JANNY2;~Madiha_Nadri1;~Julie_Digne1;~Christian_Wolf5", "gender": "M;F;F;M", "homepage": "https://steevenjanny.github.io/;;https://perso.liris.cnrs.fr/julie.digne/;https://chriswolfvision.github.io/www/", "dblp": "228/8300;;11/8698;38/2606-1.html", "google_scholar": "IC0ceIgAAAAJ;KOXeslUAAAAJ;https://scholar.google.fr/citations?user=EOBpDNQAAAAJ;idYS1AIAAAAJ", "orcid": ";;0000-0003-0905-0840;", "linkedin": ";;;christian-wolf-522761249/", "or_profile": "~Steeven_JANNY2;~Madiha_Nadri1;~Julie_Digne1;~Christian_Wolf5", "aff": "Naver Labs Europe;Universit\u00e9 Claude Bernard Lyon1 - LAGEPP;LIRIS, CNRS;Naver Labs Europe", "aff_domain": "naverlabs.com;univ-lyon1.fr;liris.cnrs.fr;naverlabs.com", "position": "Researcher;Associate Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\njanny2024space,\ntitle={Space and time continuous physics simulation from partial observations},\nauthor={Steeven JANNY and Madiha Nadri and Julie Digne and Christian Wolf},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4yaFQ7181M}\n}", "github": "", "project": "", "reviewers": "eMKs;VJPj;fAqF;Kpnn;uqXM", "pdf_size": 23760872, "rating": "6;8;8;8;8", "confidence": "2;3;3;3;4", "soundness": "3;4;3;4;3", "contribution": "3;3;3;3;3", "presentation": "4;3;3;4;4", "wc_summary": "71;168;69;44;52", "wc_strengths": "28;112;68;80;32", "wc_weaknesses": "29;250;250;109;141", "wc_questions": "133;176;4;103;195", "wc_review": "261;706;391;336;420", "wc_reply_reviewers": "0;0;65;26;0", "wc_reply_authors": "653;433;514;598;630", "reply_reviewers": "0;0;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.6, 0.7999999999999999 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 80.8, 44.77231287302455 ], "wc_strengths_avg": [ 64.0, 31.292171544972714 ], "wc_weaknesses_avg": [ 155.8, 85.12907846323722 ], "wc_questions_avg": [ 122.2, 67.30646328548247 ], "wc_review_avg": [ 422.8, 151.61451117884462 ], "wc_reply_reviewers_avg": [ 18.2, 25.47469332494505 ], "wc_reply_authors_avg": [ 565.6, 81.3402729279906 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.790569415042095, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17782672727549054795&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=4yaFQ7181M", "pdf": "https://openreview.net/pdf?id=4yaFQ7181M", "email": "naverlabs.com;univ-lyon1.fr;liris.cnrs.fr;naverlabs.com", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "NAVER LABS;Universit\u00e9 Claude Bernard Lyon1;CNRS", "aff_unique_dep": ";LAGEPP;LIRIS", "aff_unique_url": "https://labs.naver.com;https://www.universite-lyon1.fr;https://www.cnrs.fr", "aff_unique_abbr": "NLE;UCBL;CNRS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lyon", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Unknown;France" }, { "title": "Beyond Vanilla Variational Autoencoders: Detecting Posterior Collapse in Conditional and Hierarchical Variational Autoencoders", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19443", "id": "4zZFGliCl9", "author_site": "Hien Dang, Tho-Huu Tran, Tan Nguyen, Nhat Ho", "tldr": "", "abstract": "The posterior collapse phenomenon in variational autoencoder (VAE), where the variational posterior distribution closely matches the prior distribution, can hinder the quality of the learned latent variables. As a consequence of posterior collapse, the latent variables extracted by the encoder in VAE preserve less information from the input data and thus fail to produce meaningful representations as input to the reconstruction process in the decoder. While this phenomenon has been an actively addressed topic related to VAE performance, the theory for posterior collapse remains underdeveloped, especially beyond the standard VAE. In this work, we advance the theoretical understanding of posterior collapse to two important and prevalent yet less studied classes of VAE: conditional VAE and hierarchical VAE. Specifically, via a non-trivial theoretical analysis of linear conditional VAE and hierarchical VAE with two levels of latent, we prove that the cause of posterior collapses in these models includes the correlation between the input and output of the conditional VAE and the effect of learnable encoder variance in the hierarchical VAE. We empirically validate our theoretical findings for linear conditional and hierarchical VAE and demonstrate that these results are also predictive for non-linear cases with extensive experiments.", "keywords": "variational autoencoders;posterior collapse", "primary_area": "generative models", "supplementary_material": "/attachment/edc531461a86d46ea17a15ff21f0575acc5e5447.zip", "author": "Hien Dang;Tho Tran Huu;Tan Minh Nguyen;Nhat Ho", "authorids": "~Hien_Dang1;~Tho_Tran_Huu1;~Tan_Minh_Nguyen1;~Nhat_Ho1", "gender": ";M;M;M", "homepage": ";;https://tanmnguyen89.github.io/;https://nhatptnk8912.github.io/", "dblp": ";337/2038;255/4725;203/4479", "google_scholar": ";fG3mIYEAAAAJ;OizOh88AAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";;;", "linkedin": ";;;nhat-pham-minh-ho-267b8164/", "or_profile": "~Hien_Dang1;~Tho_Tran_Huu1;~Tan_Minh_Nguyen1;~Nhat_Ho1", "aff": ";National University of Singapore;National University of Singapore;University of Texas, Austin", "aff_domain": ";u.nus.edu;nus.edu.sg;utexas.edu", "position": ";PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ndang2024beyond,\ntitle={Beyond Vanilla Variational Autoencoders: Detecting Posterior Collapse in Conditional and Hierarchical Variational Autoencoders},\nauthor={Hien Dang and Tho Tran Huu and Tan Minh Nguyen and Nhat Ho},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4zZFGliCl9}\n}", "github": "", "project": "", "reviewers": "1CHE;fUP2;VR5S;Bn1w;vZoP", "pdf_size": 19577412, "rating": "6;6;6;6;8", "confidence": "4;4;3;2;4", "soundness": "3;3;4;4;4", "contribution": "3;3;3;3;4", "presentation": "3;2;3;3;4", "wc_summary": "39;73;71;44;73", "wc_strengths": "21;57;38;92;94", "wc_weaknesses": "95;319;281;295;52", "wc_questions": "27;25;37;137;30", "wc_review": "182;474;427;568;249", "wc_reply_reviewers": "55;6;0;0;0", "wc_reply_authors": "1284;1267;2309;2130;723", "reply_reviewers": "1;1;0;0;0", "reply_authors": "3;2;4;4;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "contribution_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 60.0, 15.20526224699857 ], "wc_strengths_avg": [ 60.4, 28.959281759049205 ], "wc_weaknesses_avg": [ 208.4, 111.64515215628487 ], "wc_questions_avg": [ 51.2, 43.09245873699945 ], "wc_review_avg": [ 380.0, 143.3555021615843 ], "wc_reply_reviewers_avg": [ 12.2, 21.525798475317934 ], "wc_reply_authors_avg": [ 1542.6, 591.097487729393 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.8, 1.16619037896906 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.37500000000000006, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15624987190754924422&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4zZFGliCl9", "pdf": "https://openreview.net/pdf?id=4zZFGliCl9", "email": ";u.nus.edu;nus.edu.sg;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "National University of Singapore;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.utexas.edu", "aff_unique_abbr": "NUS;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Singapore;United States" }, { "id": "4zfbwpGhd8", "title": "Vision-Language Instruction-enhanced Tuning via Parameter-efficient Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Instruction tuning has shown promising potential for developing general-purpose AI capabilities in large-scale pretrained models. In multimodal community, this has motivated growing research on enhancing instruction tuning to integrate multimodal information for creative applications. However, existing works have two main limitations: the high training costs and heavy computing resource dependence of full model fine-tuning, and the lack of semantic information in instructions, which hinders multimodal alignment. In this paper, we propose a novel architecture called Vision-Language Instruction-enhanced Tuning via Parameter-efficient Learning (VITAL). Our proposed VITAL first enables lightweight model training using only 2% of parameters through automatic mode approximation. More importantly, VITAL enhances instruction semantics from two perspectives: 1) aggregating more context via enhanced instruction mixture to aid multimodal fusion, and 2) strengthening the connection between the proposed parameter-efficient tuning method and mutual information through our proposed score-based information bottleneck. Validation experiments on six multimodal downstream benchmarks demonstrate that VITAL outperforms state-of-the-art approaches in most cases, even surpassing the performance of full fine-tuning. Besides, extensive experiments on the few-shot setting as well as various visualization analyses have also fully validated our advantages.", "keywords": "Parameter-efficient Learning;Instruction Tuning;MultiModal", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yihang Zhai;Haixin Wang;Jianlong Chang;Xinlong Yang;Yang Xian;Jinan Sun;Shikun Zhang;Qi Tian", "authorids": "~Yihang_Zhai1;~Haixin_Wang3;~Jianlong_Chang2;~Xinlong_Yang1;~Yang_Xian1;~Jinan_Sun1;~Shikun_Zhang2;~Qi_Tian3", "gender": "M;;M;M;M;M;M;M", "homepage": ";https://willdreamer.github.io/;https://jianlongchange.github.io/;https://xinlong-yang.github.io/;https://github.com/XianYang521225?tab=repositories;;;https://www.qitian1987.com/index.html", "dblp": ";81/5956-3;92/2332;347/2205;https://dblp.org/;16/10588;83/3715.html;78/1467-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;RGZUJOkAAAAJ;RDwnNsQAAAAJ;;https://scholar.google.com/;;uiklLscAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-5714-0149;;;;;;0000-0002-7252-5047", "linkedin": ";;;;https://www.linkedin.com/?original_referer=https%3A%2F%2Fwww.google.com.hk%2F;;;", "or_profile": "~Yihang_Zhai1;~Haixin_Wang3;~Jianlong_Chang2;~Xinlong_Yang1;~Yang_Xian1;~Jinan_Sun1;~Shikun_Zhang2;~Qi_Tian3", "aff": "Peking University;Peking University;Huawei Technologies Ltd.;Peking University;;Peking University;Peking University;Huawei Technologies Ltd.", "aff_domain": "pku.edu.cn;pku.edu.cn;huawei.com;pku.edu.cn;;pku.edu.cn;pku.edu.cn;huawei.com", "position": "MS student;MS student;Principal Researcher;MS student;;Associate Professor;Full Professor;Principal Researcher", "bibtex": "@misc{\nzhai2024visionlanguage,\ntitle={Vision-Language Instruction-enhanced Tuning via Parameter-efficient Learning},\nauthor={Yihang Zhai and Haixin Wang and Jianlong Chang and Xinlong Yang and Yang Xian and Jinan Sun and Shikun Zhang and Qi Tian},\nyear={2024},\nurl={https://openreview.net/forum?id=4zfbwpGhd8}\n}", "github": "", "project": "", "reviewers": "tPPR;GA5A;mPuJ;W366", "site": "https://openreview.net/forum?id=4zfbwpGhd8", "pdf_size": 7959, "rating": "3;3;5;5", "confidence": "5;4;4;3", "soundness": "2;2;3;3", "contribution": "1;2;2;3", "presentation": "2;2;3;2", "wc_summary": "60;56;88;41", "wc_strengths": "11;22;56;80", "wc_weaknesses": "110;331;41;121", "wc_questions": "5;2;43;50", "wc_review": "186;411;228;292", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.25, 16.990806337546196 ], "wc_strengths_avg": [ 42.25, 27.38955092731533 ], "wc_weaknesses_avg": [ 150.75, 108.49049497536639 ], "wc_questions_avg": [ 25.0, 21.66794868002045 ], "wc_review_avg": [ 279.25, 84.91576708715526 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jgMwmOOqDFcJ:scholar.google.com/&scioq=Vision-Language+Instruction-enhanced+Tuning+via+Parameter-efficient+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0;0;1", "aff_unique_norm": "Peking University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Peking U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Habitat 3.0: A Co-Habitat for Humans, Avatars, and Robots", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19442", "id": "4znwzG92CE", "author_site": "Xavier Puig, Eric Undersander, Andrew Szot, Mikael Dallaire Cote, Tsung-Yen Yang, Ruslan Partsey, Ruta Desai, Alexander Clegg, Michal Hlavac, So Yeon Min, Vladim\u00edr Vondru\u0161, Theophile Gervet, Vincent-Pierre Berges, John Turner, Oleksandr Maksymets, Zsolt Kira, Mrinal Kalakrishnan, Jitendra Malik, Devendra Singh Chaplot, Unnat Jain, Dhruv Batra, Akshara Rai, Roozbeh Mottaghi", "tldr": "", "abstract": "We present Habitat 3.0: a simulation platform for studying collaborative human-robot tasks in home environments. Habitat 3.0 offers contributions across three dimensions: (1) Accurate humanoid simulation: addressing challenges in modeling complex deformable bodies and diversity in appearance and motion, all while ensuring high simulation speed. (2) Human-in-the-loop infrastructure: enabling real human interaction with simulated robots via mouse/keyboard or a VR interface, facilitating evaluation of robot policies with human input. (3) Collaborative tasks: studying two collaborative tasks, Social Navigation and Social Rearrangement. Social Navigation investigates a robot's ability to locate and follow humanoid avatars in unseen environments, whereas Social Rearrangement addresses collaboration between a humanoid and robot while rearranging a scene. These contributions allow us to study end-to-end learned and heuristic baselines for human-robot collaboration in-depth, as well as evaluate them with humans in the loop. Our experiments demonstrate that learned robot policies lead to efficient task completion when collaborating with unseen humanoid agents and human partners that might exhibit behaviors that the robot has not seen before. Additionally, we observe emergent behaviors during collaborative task execution, such as the robot yielding space when obstructing a humanoid agent, thereby allowing the effective completion of the task by the humanoid agent. Furthermore, our experiments using the human-in-the-loop tool demonstrate that our automated evaluation with humanoids can provide an indication of the relative ordering of different policies when evaluated with real human collaborators. Habitat 3.0 unlocks interesting new features in simulators for Embodied AI, and we hope it paves the way for a new frontier of embodied human-AI interaction capabilities. For more details and visualizations, visit: https://aihabitat.org/habitat3.", "keywords": "Embodied AI;Simulation", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/ac36aa06501b2d84b16e4f776320cf4bb0a6b707.zip", "author": "Xavier Puig;Eric Undersander;Andrew Szot;Mikael Dallaire Cote;Tsung-Yen Yang;Ruslan Partsey;Ruta Desai;Alexander Clegg;Michal Hlavac;So Yeon Min;Vladim\u00edr Vondru\u0161;Theophile Gervet;Vincent-Pierre Berges;John M Turner;Oleksandr Maksymets;Zsolt Kira;Mrinal Kalakrishnan;Jitendra Malik;Devendra Singh Chaplot;Unnat Jain;Dhruv Batra;Akshara Rai;Roozbeh Mottaghi", "authorids": "~Xavier_Puig1;~Eric_Undersander2;~Andrew_Szot1;~Mikael_Dallaire_Cote1;~Tsung-Yen_Yang2;~Ruslan_Partsey1;~Ruta_Desai1;~Alexander_Clegg1;~Michal_Hlavac1;~So_Yeon_Min2;~Vladim\u00edr_Vondru\u01611;~Theophile_Gervet1;~Vincent-Pierre_Berges1;~John_M_Turner1;~Oleksandr_Maksymets1;~Zsolt_Kira1;~Mrinal_Kalakrishnan1;~Jitendra_Malik2;~Devendra_Singh_Chaplot2;~Unnat_Jain1;~Dhruv_Batra1;~Akshara_Rai1;~Roozbeh_Mottaghi1", "gender": "M;;M;Not Specified;;M;;M;M;F;M;M;;;M;M;M;M;;;Not Specified;;", "homepage": "https://people.csail.mit.edu/xavierpuig/;https://www.ericundersander.com/;https://www.andrewszot.com;https://github.com/0mdc;https://sites.google.com/view/tyjimmyyang;;;;http://hlavac.design;;https://magnum.graphics;https://theophilegervet.github.io;;http://johnmturner.com/;https://research.fb.com/people/maksymets-oleksandr/;https://faculty.cc.gatech.edu/~zk15;;https://people.eecs.berkeley.edu/~malik/;;;https://dhruvbatra.com;https://ai.facebook.com/people/akshara-rai;http://roozbehm.info", "dblp": "50/8429;209/9989;;;204/7980;;;165/9761;;78/84;;;227/3339.html;;239/4227;36/4127;46/4195;58/2944;;;67/6586;;36/633", "google_scholar": ";;IwIWKPYAAAAJ;;g-hQdY8AAAAJ;6h1O4AMAAAAJ;;https://scholar.google.com/citations?hl=en;;dkRTvvcAAAAJ;;-o8kQPwAAAAJ;JEr3qVwAAAAJ;;https://scholar.google.com.ua/citations?user=ZKDLDQoAAAAJ;2a5XgNAAAAAJ;DMTuJzAAAAAJ;oY9R5YQAAAAJ;;;_bs7PqgAAAAJ;;CCV58dgAAAAJ", "orcid": ";;;;;0000-0003-3694-7506;;;;;;;;;;0000-0002-2626-2004;;0000-0003-3695-1580;;;;;", "linkedin": ";ericu;;https://linkedin.com/in/mdcote;tsung-yen-yang;;;alexander-clegg-68336839;michalhlavac;;;theophile-gervet/;vincentpierreberges/;;maksymets/;;mrinalkalakrishnan/;;;;;;roozbeh-mottaghi-63397aa0", "or_profile": "~Xavier_Puig1;~Eric_Undersander2;~Andrew_Szot1;~Mikael_Dallaire_Cote1;~Tsung-Yen_Yang2;~Ruslan_Partsey1;~Ruta_Desai1;~Alexander_Clegg1;~Michal_Hlavac1;~So_Yeon_Min2;~Vladim\u00edr_Vondru\u01611;~Theophile_Gervet1;~Vincent-Pierre_Berges1;~John_M_Turner1;~Oleksandr_Maksymets1;~Zsolt_Kira1;~Mrinal_Kalakrishnan1;~Jitendra_Malik2;~Devendra_Singh_Chaplot2;~Unnat_Jain1;~Dhruv_Batra1;~Akshara_Rai1;~Roozbeh_Mottaghi1", "aff": "Meta;Meta ;Georgia Institute of Technology;;Meta AI;Ukrainian Catholic University;;Meta AI;Meta;Carnegie Mellon University;Magnum Engine;;Meta;;Meta;Georgia Institute of Technology;Meta;University of California, Berkeley;;;Georgia Institute of Technology;FAIR, Meta AI;University of Washington", "aff_domain": "fb.com;meta.com;gatech.edu;;meta.com;ucu.edu.ua;;meta.com;meta.com;andrew.cmu.edu;magnum.graphics;;meta.com;;meta.com;gatech.edu;meta.com;berkeley.edu;;;gatech.edu;meta.com;cs.washington.edu", "position": "Researcher;Research Engineer;PhD student;;Researcher;Researcher;;Researcher;Designer;PhD student;Founder;;Researcher;;Researcher;Assistant Professor;Researcher;Full Professor;;;Associate Professor;Researcher;Affiliate Professor ", "bibtex": "@inproceedings{\npuig2024habitat,\ntitle={Habitat 3.0: A Co-Habitat for Humans, Avatars, and Robots},\nauthor={Xavier Puig and Eric Undersander and Andrew Szot and Mikael Dallaire Cote and Tsung-Yen Yang and Ruslan Partsey and Ruta Desai and Alexander Clegg and Michal Hlavac and So Yeon Min and Vladim{\\'\\i}r Vondru{\\v{s}} and Theophile Gervet and Vincent-Pierre Berges and John M Turner and Oleksandr Maksymets and Zsolt Kira and Mrinal Kalakrishnan and Jitendra Malik and Devendra Singh Chaplot and Unnat Jain and Dhruv Batra and Akshara Rai and Roozbeh Mottaghi},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=4znwzG92CE}\n}", "github": "", "project": "", "reviewers": "ndxG;FYWe;HoUA", "pdf_size": 8674247, "rating": "6;6;8", "confidence": "3;4;5", "soundness": "2;3;4", "contribution": "2;3;4", "presentation": "3;3;4", "wc_summary": "300;89;102", "wc_strengths": "276;66;215", "wc_weaknesses": "230;93;120", "wc_questions": "193;272;2", "wc_review": "999;520;439", "wc_reply_reviewers": "70;193;0", "wc_reply_authors": "1781;2020;428", "reply_reviewers": "1;1;0", "reply_authors": "4;4;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 163.66666666666666, 96.54820327461074 ], "wc_strengths_avg": [ 185.66666666666666, 88.20556797743679 ], "wc_weaknesses_avg": [ 147.66666666666666, 59.25275427259807 ], "wc_questions_avg": [ 155.66666666666666, 113.34411713401313 ], "wc_review_avg": [ 652.6666666666666, 247.1171562010394 ], "wc_reply_reviewers_avg": [ 87.66666666666667, 79.77607550025397 ], "wc_reply_authors_avg": [ 1409.6666666666667, 700.9671097055046 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 23, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 111, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4645667612256940045&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=4znwzG92CE", "pdf": "https://openreview.net/pdf?id=4znwzG92CE", "email": "fb.com;meta.com;gatech.edu;;meta.com;ucu.edu.ua;;meta.com;meta.com;andrew.cmu.edu;magnum.graphics;;meta.com;;meta.com;gatech.edu;meta.com;berkeley.edu;;;gatech.edu;meta.com;cs.washington.edu", "author_num": 23, "aff_unique_index": "0;0;1;0;2;0;0;3;4;0;0;1;0;5;1;0;6", "aff_unique_norm": "Meta;Georgia Institute of Technology;Ukrainian Catholic University;Carnegie Mellon University;Magnum Engine;University of California, Berkeley;University of Washington", "aff_unique_dep": "Meta Platforms, Inc.;;;;;;", "aff_unique_url": "https://meta.com;https://www.gatech.edu;https://ucu.edu.ua;https://www.cmu.edu;;https://www.berkeley.edu;https://www.washington.edu", "aff_unique_abbr": "Meta;Georgia Tech;UCU;CMU;;UC Berkeley;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;Ukraine;" }, { "id": "506Sxc0Adp", "title": "Beyond Scale: the Diversity Coefficient as a Data Quality Metric Demonstrates LLMs are Pre-trained on Formally Diverse Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Current trends to pre-train capable Large Language Models (LLMs) mostly focus on scaling of model and dataset size. However, the of pre-training data is an important factor for training powerful LLMs, yet it is a nebulous concept that has not been fully characterized. Therefore, we use the recently proposed Task2Vec diversity coefficient to understand formal aspects of data \\textit{quality} that go beyond scale alone. Specifically, we measure the diversity coefficient of publicly available pre-training datasets to demonstrate that their formal diversity is high when compared to theoretical lower and upper bounds. In addition, to build confidence in the diversity coefficient, we conduct interpretability experiments and find that the coefficient aligns with intuitive properties of diversity, e.g., it increases as the number of latent concepts increases. We conclude the diversity coefficient is reliable and conjecture it can be used to build useful diverse datasets for LLMs.", "keywords": "machine learning;large language models;metrics;data diversity;data;data quality", "primary_area": "generative models", "supplementary_material": "/attachment/e6f362b9be0e39c559d4f1f7e49f1e2d1661d1d1.pdf", "author": "Brando Miranda;Alycia Lee;Sudharsan Sundar;Sanmi Koyejo", "authorids": "~Brando_Miranda1;~Alycia_Lee1;sjsundar@stanford.edu;~Sanmi_Koyejo1", "gender": "M;;;", "homepage": "https://cbmm.mit.edu/about/people/miranda;;;", "dblp": ";;;", "google_scholar": "_NQJoBkAAAAJ;;;", "orcid": ";;;", "linkedin": "brando-miranda-40821046/;;;", "or_profile": "~Brando_Miranda1;~Alycia_Lee1;sjsundar@stanford.edu;~Sanmi_Koyejo1", "aff": "Stanford University;;;", "aff_domain": "stanford.edu;;;", "position": "PhD student;;;", "bibtex": "@misc{\nmiranda2024beyond,\ntitle={Beyond Scale: the Diversity Coefficient as a Data Quality Metric Demonstrates {LLM}s are Pre-trained on Formally Diverse Data},\nauthor={Brando Miranda and Alycia Lee and Sudharsan Sundar and Sanmi Koyejo},\nyear={2024},\nurl={https://openreview.net/forum?id=506Sxc0Adp}\n}", "github": "", "project": "", "reviewers": "FQiZ;kREU;Z6o3;H6fT", "site": "https://openreview.net/forum?id=506Sxc0Adp", "pdf_size": 476111, "rating": "1;3;6;6", "confidence": "4;4;4;2", "soundness": "1;2;3;4", "contribution": "1;2;3;3", "presentation": "1;3;2;3", "wc_summary": "65;115;83;101", "wc_strengths": "63;28;57;87", "wc_weaknesses": "185;143;336;80", "wc_questions": "283;125;202;1", "wc_review": "596;411;678;269", "wc_reply_reviewers": "183;28;51;14", "wc_reply_authors": "2281;2430;2380;1220", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;5;3", "rating_avg": [ 4.0, 2.1213203435596424 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 91.0, 18.81488772222678 ], "wc_strengths_avg": [ 58.75, 21.00446381129497 ], "wc_weaknesses_avg": [ 186.0, 94.32125953357493 ], "wc_questions_avg": [ 152.75, 103.90951592611718 ], "wc_review_avg": [ 488.5, 159.41533803244906 ], "wc_reply_reviewers_avg": [ 69.0, 67.1304699819687 ], "wc_reply_authors_avg": [ 2077.75, 498.11664045683113 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9241919353973672522&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "50P9TDPEsh", "title": "Critique Ability of Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Critical thinking is essential for rational decision-making and problem-solving. This skill hinges on the ability to provide precise and reasoned critiques and is a hallmark of human intelligence. In the era of large language models (LLMs), this study explores the ability of LLMs to deliver accurate critiques across various tasks. We are interested in this topic as a capable critic model could not only serve as a reliable evaluator, but also as a source of supervised signals for model tuning. Particularly, if a model can self-critique, it has the potential for autonomous self-improvement. To examine this, we introduce a unified evaluation framework for assessing the critique abilities of LLMs. We develop a benchmark called CriticBench, which comprises $3$K high-quality natural language queries and corresponding model responses; and annotate the correctness of these responses. The benchmark cover tasks such as math problem-solving, code completion, and question answering. We evaluate multiple LLMs on the collected dataset and our analysis reveals several noteworthy insights: (1) Critique is generally challenging for most LLMs, and this capability often emerges only when models are sufficiently large. (2) In particular, self-critique is especially difficult. Even top-performing LLMs struggle to achieve satisfactory performance. (3) Models tend to have lower critique accuracy on problems where they are most uncertain. To this end, we introduce a simple yet effective baseline named self-check, which leverages self-critique to improve task performance for various models. We hope this study serves as an initial exploration into understanding the critique abilities of LLMs, and aims to inform future research, including the development of more proficient critic models and the application of critiques across diverse tasks.", "keywords": "LLM;Large Language Models;Critique;Self-Critique;Emergent Ability;Reasoning", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/0a07401ab8e4b7b35d1688974279a382f8ddc398.pdf", "author": "Liangchen Luo;Zi Lin;Yinxiao Liu;Lei Shu;Yun Zhu;Jingbo Shang;Lei Meng", "authorids": "~Liangchen_Luo1;~Zi_Lin1;~Yinxiao_Liu2;~Lei_Shu1;~Yun_Zhu5;~Jingbo_Shang2;~Lei_Meng2", "gender": "M;F;M;F;;M;F", "homepage": "https://www.luolc.com;https://zi-lin.com/;;https://leishu02.github.io/;;https://shangjingbo1226.github.io/;https://scholar.google.com/citations?user=7XxgNUsAAAAJ&hl=en&oi=ao", "dblp": "225/6429;81/2999;;19/2932-4;;151/3145.html;", "google_scholar": "8ei4_E4AAAAJ;kgZYttUAAAAJ;https://scholar.google.com/citations?hl=en;Q0zkC-kAAAAJ;;0SkFI4MAAAAJ;", "orcid": ";;;;;;", "linkedin": ";zi-lin/;;shu-lei-8b361642/;;;", "or_profile": "~Liangchen_Luo1;~Zi_Lin1;~Yinxiao_Liu2;~Lei_Shu1;~Yun_Zhu5;~Jingbo_Shang2;~Lei_Meng2", "aff": "Google DeepMind;University of California, San Diego;Research, Google;Google;;University of California, San Diego;", "aff_domain": "google.com;ucsd.edu;research.google.com;google.com;;ucsd.edu;", "position": "Researcher;Graduate student;Researcher;Researcher;;Assistant Professor;", "bibtex": "@misc{\nluo2024critique,\ntitle={Critique Ability of Large Language Models},\nauthor={Liangchen Luo and Zi Lin and Yinxiao Liu and Lei Shu and Yun Zhu and Jingbo Shang and Lei Meng},\nyear={2024},\nurl={https://openreview.net/forum?id=50P9TDPEsh}\n}", "github": "", "project": "", "reviewers": "qpMo;WsbC;qunD", "site": "https://openreview.net/forum?id=50P9TDPEsh", "pdf_size": 1144366, "rating": "3;5;6", "confidence": "4;3;4", "soundness": "3;3;3", "contribution": "3;3;2", "presentation": "2;3;3", "wc_summary": "37;77;103", "wc_strengths": "57;69;57", "wc_weaknesses": "153;162;201", "wc_questions": "72;1;51", "wc_review": "319;309;412", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "796;357;862", "reply_reviewers": "0;0;0", "reply_authors": "1;1;2", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.33333333333333, 27.145697428669774 ], "wc_strengths_avg": [ 61.0, 5.656854249492381 ], "wc_weaknesses_avg": [ 172.0, 20.83266665599966 ], "wc_questions_avg": [ 41.333333333333336, 29.78067979225607 ], "wc_review_avg": [ 346.6666666666667, 46.37767662236745 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 671.6666666666666, 224.1284354015696 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17074660022459503280&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Google;University of California, San Diego", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.ucsd.edu", "aff_unique_abbr": "DeepMind;UCSD", "aff_campus_unique_index": "1;2;2;1", "aff_campus_unique": ";San Diego;Mountain View", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "50vyPuz0iv", "title": "Iteratively Refined Behavior Regularization for Offline Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "One of the fundamental challenges for offline reinforcement learning (RL) is ensuring robustness to data distribution. \nWhether the data originates from a near-optimal policy or not, we anticipate that an algorithm should demonstrate its ability to learn an effective control policy that seamlessly aligns with the inherent distribution of offline data. Unfortunately, behavior regularization, a simple yet effective offline RL algorithm, tends to struggle in this regard. In this paper, we propose a new algorithm that substantially enhances behavior-regularization based on conservative policy iteration. Our key observation is that by iteratively refining the reference policy used for behavior regularization, conservative policy update guarantees gradual improvement, while also implicitly avoiding querying out-of-sample actions to prevent catastrophic learning failures. We prove that in the tabular setting this algorithm is capable of learning the optimal policy covered by the offline dataset, commonly referred to as the in-sample optimal policy. We then explore several implementation details of the algorithm when function approximations are applied. The resulting algorithm is easy to implement, requiring only a few lines of code modification to existing methods. Experimental results on the D4RL benchmark indicate that our method outperforms previous state-of-the-art baselines in most tasks, clearly demonstrate its superiority over behavior regularization.", "keywords": "Offline Reinforcement Learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Xiaohan Hu;Yi Ma;Chenjun Xiao;YAN ZHENG;Jianye HAO", "authorids": "~Xiaohan_Hu1;~Yi_Ma5;~Chenjun_Xiao1;~YAN_ZHENG1;~Jianye_HAO1", "gender": "M;;;M;M", "homepage": ";https://mayi1996.top/;https://chenjun-x.github.io/;https://yanzzzzz.github.io;http://www.icdai.org/jianye.html", "dblp": ";69/1112-5.html;178/8641;10/2381-2;21/7664.html", "google_scholar": ";TdVWzqgAAAAJ;;https://scholar.google.com.hk/citations?user=tJuhd1kAAAAJ;", "orcid": "0000-0001-7645-201X;0000-0001-9375-6605;0000-0002-5493-1500;;0000-0002-0422-8235", "linkedin": ";;;;", "or_profile": "~Xiaohan_Hu1;~Yi_Ma5;~Chenjun_Xiao1;~YAN_ZHENG1;~Jianye_HAO1", "aff": "Tianjin University;Tianjin University;Huawei Technologies Ltd.;Tianjin Unibersity, China;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;huawei.com;tju.edu.cn;tju.edu.cn", "position": "MS student;PhD student;Researcher;Associate Professor;Associate Professor", "bibtex": "@misc{\nhu2024iteratively,\ntitle={Iteratively Refined Behavior Regularization for Offline Reinforcement Learning},\nauthor={Xiaohan Hu and Yi Ma and Chenjun Xiao and YAN ZHENG and Jianye HAO},\nyear={2024},\nurl={https://openreview.net/forum?id=50vyPuz0iv}\n}", "github": "", "project": "", "reviewers": "cTmM;JFNQ;ByMR", "site": "https://openreview.net/forum?id=50vyPuz0iv", "pdf_size": 565586, "rating": "3;3;6", "confidence": "4;5;3", "soundness": "3;2;3", "contribution": "2;2;2", "presentation": "3;2;4", "wc_summary": "48;74;139", "wc_strengths": "53;9;45", "wc_weaknesses": "100;158;205", "wc_questions": "1;116;62", "wc_review": "202;357;451", "wc_reply_reviewers": "142;0;197", "wc_reply_authors": "454;775;1280", "reply_reviewers": "2;0;2", "reply_authors": "3;2;3", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 87.0, 38.27096375408734 ], "wc_strengths_avg": [ 35.666666666666664, 19.136933459209764 ], "wc_weaknesses_avg": [ 154.33333333333334, 42.94440850939994 ], "wc_questions_avg": [ 59.666666666666664, 46.977536002741665 ], "wc_review_avg": [ 336.6666666666667, 102.66558440988014 ], "wc_reply_reviewers_avg": [ 113.0, 82.99799194358059 ], "wc_reply_authors_avg": [ 836.3333333333334, 339.9905227437311 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6920572114126102147&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Tianjin University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.tju.edu.cn;https://www.huawei.com", "aff_unique_abbr": "TJU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "51cjeYcXjs", "title": "Search and Retrieval in Semantic-Structural Representations of Novel Malware", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this study we present a novel representation for binary programs, which captures semantic similarity and structural properties. Our representation is composed in a bottom-up approach and enables new methods of analysis. We show that we can perform search and retrieval of binary executable programs based on similarity of behavioral properties, with an adjustable level of feature resolution. We begin by extracting data dependency graphs (DDG), which are representative of both program structure and operational semantics. We then encode each program as a set of graph hashes representing isomorphic uniqueness, a method we have labeled DDG Fingerprinting. Next, we use k-Nearest Neighbors to search in a metric space constructed from examples. This approach allows us to perform a quantitative analysis of patterns of program operation. By evaluating similarity of behavior we are able to recognize patterns in novel malware with functionality not previously identified. We present experimental results from search based on program semantics and structural properties in a dataset of binary executables with features extracted using our method of representation. We show that the associated metric space allows an adjustable level of resolution. Resolution of the features may be decreased for breadth of search and retrieval, or as the search space is reduced, the resolution may be increased for accuracy and fine-grained analysis of malware behavior.", "keywords": "Malware Analysis;Explainability", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "John Musgrave;Alina Campan;Anca L Ralescu", "authorids": "~John_Musgrave1;~Alina_Campan1;~Anca_L_Ralescu1", "gender": ";F;F", "homepage": "https://johnmusgrave.com;;https://www.ceas3.uc.edu/profiles/ralescal", "dblp": ";63/4876;", "google_scholar": "_CQx1CwAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-7646-4328;;", "linkedin": ";;anca-ralescu-97292b7/", "or_profile": "~John_Musgrave1;~Alina_Campan1;~Anca_L_Ralescu1", "aff": ";Northern Kentucky University;University of Cincinnati", "aff_domain": ";nku.edu;uc.edu", "position": ";Full Professor;Full Professor", "bibtex": "@misc{\nmusgrave2024search,\ntitle={Search and Retrieval in Semantic-Structural Representations of Novel Malware},\nauthor={John Musgrave and Alina Campan and Anca L Ralescu},\nyear={2024},\nurl={https://openreview.net/forum?id=51cjeYcXjs}\n}", "github": "", "project": "", "reviewers": "PPca;5Qou;Qcjm;pKB5", "site": "https://openreview.net/forum?id=51cjeYcXjs", "pdf_size": 337727, "rating": "1;3;3;3", "confidence": "4;4;3;4", "soundness": "1;2;2;2", "contribution": "1;2;2;2", "presentation": "1;1;2;2", "wc_summary": "77;83;79;50", "wc_strengths": "3;20;35;49", "wc_weaknesses": "141;253;35;151", "wc_questions": "52;37;206;5", "wc_review": "273;393;355;255", "wc_reply_reviewers": "0;57;0;0", "wc_reply_authors": "0;534;749;783", "reply_reviewers": "0;1;0;0", "reply_authors": "0;2;1;1", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 72.25, 13.026415470113028 ], "wc_strengths_avg": [ 26.75, 17.122718826167766 ], "wc_weaknesses_avg": [ 145.0, 77.1621668954417 ], "wc_questions_avg": [ 75.0, 77.51451476981585 ], "wc_review_avg": [ 319.0, 56.97367813297646 ], "wc_reply_reviewers_avg": [ 14.25, 24.681724007856502 ], "wc_reply_authors_avg": [ 516.5, 313.1122003371954 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2866760772493619036&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Northern Kentucky University;University of Cincinnati", "aff_unique_dep": ";", "aff_unique_url": "https://nku.edu;https://www.uc.edu", "aff_unique_abbr": "NKU;UC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Be Aware of the Neighborhood Effect: Modeling Selection Bias under Interference", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19441", "id": "52fz5sUAy2", "author_site": "Haoxuan Li, Chunyuan Zheng, Sihao Ding, Peng Wu, Zhi Geng, Fuli Feng, Xiangnan He", "tldr": "", "abstract": "Selection bias in recommender system arises from the recommendation process of system filtering and the interactive process of user selection. Many previous studies have focused on addressing selection bias to achieve unbiased learning of the prediction model, but ignore the fact that potential outcomes for a given user-item pair may vary with the treatments assigned to other user-item pairs, named neighborhood effect. To fill the gap, this paper formally formulates the neighborhood effect as an interference problem from the perspective of causal inference, and introduces a treatment representation to capture the neighborhood effect. On this basis, we propose a novel ideal loss that can be used to deal with selection bias in the presence of neighborhood effect. We further develop two new estimators for estimating the proposed ideal loss. We theoretically establish the connection between the proposed and previous debiasing methods ignoring the neighborhood effect, showing that the proposed methods can achieve unbiased learning when both selection bias and neighborhood effects are present, while the existing methods are biased. Extensive semi-synthetic and real-world experiments are conducted to demonstrate the effectiveness of the proposed methods.", "keywords": "Selection Bias;Neighborhood effect;Recommender system", "primary_area": "causal reasoning", "supplementary_material": "/attachment/c41c6251cb39606f501706bb9c93e3e8467f0795.zip", "author": "Haoxuan Li;Chunyuan Zheng;Sihao Ding;Peng Wu;Zhi Geng;Fuli Feng;Xiangnan He", "authorids": "~Haoxuan_Li6;~Chunyuan_Zheng1;~Sihao_Ding2;~Peng_Wu5;~Zhi_Geng1;~Fuli_Feng1;~Xiangnan_He1", "gender": "M;M;M;M;M;M;M", "homepage": "https://haoxuanli-pku.github.io/;;;https://pengwu.site/;https://stxy.btbu.edu.cn/szdw/bssds/34339356074b408c8650309f05f24558.htm;https://fulifeng.github.io/;http://staff.ustc.edu.cn/~hexn", "dblp": "145/4965-1.html;;https://dblp.uni-trier.de/pid/133/4721-3;15/6146-12;;183/9198;59/1007", "google_scholar": "gtDqiucAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com.sg/citations?user=QePM4u8AAAAJ;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ", "orcid": "0000-0003-3620-3769;0000-0002-0306-7310;0000-0003-1796-8504;0000-0001-7154-8880;;0000-0002-5828-9842;0000-0001-8472-7992", "linkedin": ";;;;;;", "or_profile": "~Haoxuan_Li6;~Chunyuan_Zheng1;~Sihao_Ding2;~Peng_Wu5;~Zhi_Geng1;~Fuli_Feng1;~Xiangnan_He1", "aff": "Peking University;Peking University;;Beijing Technology and Business University;School of mathematical Science, Peking University, Peking University;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;;btbu.edu.cn;math.pku.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;PhD student;;Associate Professor;Full Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nli2024be,\ntitle={Be Aware of the Neighborhood Effect: Modeling Selection Bias under Interference for Recommendation},\nauthor={Haoxuan Li and Chunyuan Zheng and Sihao Ding and Peng Wu and Zhi Geng and Fuli Feng and Xiangnan He},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=52fz5sUAy2}\n}", "github": "", "project": "", "reviewers": "TJ54;g9eU;1Wkk;qJ35", "pdf_size": 662651, "rating": "5;6;8;8", "confidence": "4;2;3;3", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "73;107;92;86", "wc_strengths": "105;16;245;41", "wc_weaknesses": "201;10;82;144", "wc_questions": "5;1;218;5", "wc_review": "384;134;637;276", "wc_reply_reviewers": "0;0;0;20", "wc_reply_authors": "1109;274;861;713", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.5, 12.216791722870616 ], "wc_strengths_avg": [ 101.75, 88.84642648975816 ], "wc_weaknesses_avg": [ 109.25, 71.09632550279937 ], "wc_questions_avg": [ 57.25, 92.82342107464042 ], "wc_review_avg": [ 357.75, 183.99507466233982 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 739.25, 303.5970808489436 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2439006638808384385&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=52fz5sUAy2", "pdf": "https://openreview.net/pdf?id=52fz5sUAy2", "email": "pku.edu.cn;stu.pku.edu.cn;;btbu.edu.cn;math.pku.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;2;2", "aff_unique_norm": "Peking University;Beijing Technology and Business University;University of Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;http://www.btbu.edu.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "Peking U;BTBU;USTC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Peking", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "52igC7K5Mf", "title": "GC-Mixer: A Novel Architecture for Time-varying Granger Causality Inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "The neural network has emerged as a practical approach to evaluate the Granger causality in multivariate time series. However, most existing studies on Granger causality inference are based on time-invariance. In this paper, we propose a novel MLP architecture, Granger Causality Mixer (GC-Mixer), which extracts parameters from the weight matrix and imposes the hierarchical group lasso penalty on these parameters to infer time-invariant Granger causality and automatically select time lags. Furthermore, we extend GC-Mixer by introducing a multi-level fine-tuning algorithm to split time series automatically and infer time-varying Granger causality. We conduct experiments on the VAR and Lorenz-96 datasets, and the results show that GC-Mixer achieves outstanding performances in Granger causality inference.", "keywords": "Granger causality;Time-varying;Time series;Neural network", "primary_area": "causal reasoning", "supplementary_material": "/attachment/9b7ed64ea0567b5351ea85345669a64d4767c037.zip", "author": "Meiliang Liu;Junhao Huang;Yixiao Wang;Zhengye Si;Zhiwen Zhao", "authorids": "~Meiliang_Liu1;~Junhao_Huang4;~Yixiao_Wang2;~Zhengye_Si1;~Zhiwen_Zhao2", "gender": "M;M;F;M;M", "homepage": ";;;;https://cist.bnu.edu.cn/xygk/szdw/zgj/110688.html", "dblp": "381/8230.html;;;;03/10847.html", "google_scholar": "iOwkIcoAAAAJ;;https://scholar.google.com.hk/citations?user=pPac-I4AAAAJ;;", "orcid": "0009-0005-6795-1518;0000-0003-4477-3358;;0000-0003-4176-2717;", "linkedin": ";;;;", "or_profile": "~Meiliang_Liu1;~Junhao_Huang4;~Yixiao_Wang2;~Zhengye_Si1;~Zhiwen_Zhao2", "aff": "Beijing Normal University;;Beijing Normal University;Beijing Normal University;Beijing Normal University", "aff_domain": "bnu.edu.cn;;bnu.edu.cn;bnu.edu.cn;bnu.edu.cn", "position": "PhD student;;MS student;PhD student;Full Professor", "bibtex": "@misc{\nliu2024gcmixer,\ntitle={{GC}-Mixer: A Novel Architecture for Time-varying Granger Causality Inference},\nauthor={Meiliang Liu and Junhao Huang and Yixiao Wang and Zhengye Si and Zhiwen Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=52igC7K5Mf}\n}", "github": "", "project": "", "reviewers": "KhHm;1qK9;NC5z;mLYv", "site": "https://openreview.net/forum?id=52igC7K5Mf", "pdf_size": 2936574, "rating": "3;3;5;6", "confidence": "4;4;4;2", "soundness": "3;2;2;3", "contribution": "2;2;2;3", "presentation": "3;2;2;2", "wc_summary": "74;39;61;39", "wc_strengths": "89;25;11;47", "wc_weaknesses": "776;120;154;72", "wc_questions": "9;18;1;484", "wc_review": "948;202;227;642", "wc_reply_reviewers": "289;0;0;371", "wc_reply_authors": "935;301;260;1043", "reply_reviewers": "1;0;0;3", "reply_authors": "2;1;1;3", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 53.25, 14.972892172189045 ], "wc_strengths_avg": [ 43.0, 29.49576240750525 ], "wc_weaknesses_avg": [ 280.5, 287.5565161842103 ], "wc_questions_avg": [ 128.0, 205.62465805442693 ], "wc_review_avg": [ 504.75, 309.88334498646424 ], "wc_reply_reviewers_avg": [ 165.0, 167.52760966479525 ], "wc_reply_authors_avg": [ 634.75, 356.5966734281182 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PWy9XEi2yEsJ:scholar.google.com/&scioq=GC-Mixer:+A+Novel+Architecture+for+Time-varying+Granger+Causality+Inference&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beijing Normal University", "aff_unique_dep": "", "aff_unique_url": "https://www.bnu.edu.cn", "aff_unique_abbr": "BNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "532tcx7IHF", "title": "RLLTE: Long-Term Evolution Project of Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present RLLTE: a long-term evolution, extremely modular, and open-source framework for reinforcement learning (RL) research and application. Beyond delivering top-notch algorithm implementations, RLLTE also serves as a toolkit for developing algorithms. More specifically, RLLTE decouples the RL algorithms completely from the exploitation-exploration perspective, providing a large number of prototypes to accelerate algorithm development and evolution. In particular, RLLTE is the first RL framework to build a complete and luxuriant ecosystem, which includes model training, evaluation, deployment, benchmark hub, and large language model (LLM)-empowered copilot. RLLTE is expected to set standards for RL engineering practice and be highly stimulative for industry and academia.", "keywords": "reinforcement learning;framework;benchmark;open-source;library;intrinsic reward;data augmentation;copilot;LLM", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Mingqi Yuan;Zequn Zhang;Yang Xu;Jake Shihao Luo;Bo Li;Xin Jin;Wenjun Zeng", "authorids": "~Mingqi_Yuan1;~Zequn_Zhang1;~Yang_Xu18;~Jake_Shihao_Luo1;~Bo_Li27;~Xin_Jin8;~Wenjun_Zeng3", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/yuanmingqi;;;https://github.com/ShihaoLuo;https://www4.comp.polyu.edu.hk/~bo2li/;http://home.ustc.edu.cn/~jinxustc/;https://www.eias.ac.cn/h-col-187.html", "dblp": "282/4291;;;;50/3402-37;68/3340-14;57/145", "google_scholar": "https://scholar.google.com.hk/citations?user=xtj9MIMAAAAJ;ElVJU4MAAAAJ;https://scholar.google.com.hk/citations?user=3IpmW68AAAAJ;;;byaSC-kAAAAJ;_cUfvYQAAAAJ", "orcid": ";0000-0001-5566-761X;;;;0000-0002-1820-8358;", "linkedin": ";;;;;;", "or_profile": "~Mingqi_Yuan1;~Zequn_Zhang1;~Yang_Xu18;~Jake_Shihao_Luo1;~Bo_Li27;~Xin_Jin8;~Wenjun_Zeng3", "aff": "The Hong Kong Polytechnic University;University of Science and Technology of China;Purdue University;;The Hong Kong Polytechnic University;Eastern Institute of Technology, Ningbo;Eastern Institute for Advanced Study", "aff_domain": "polyu.edu.hk;ustc.edu.cn;purdue.edu;;polyu.edu.hk;eitech.edu.cn;eias.ac.cn", "position": "PhD student;PhD student;PhD student;;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nyuan2024rllte,\ntitle={{RLLTE}: Long-Term Evolution Project of Reinforcement Learning},\nauthor={Mingqi Yuan and Zequn Zhang and Yang Xu and Jake Shihao Luo and Bo Li and Xin Jin and Wenjun Zeng},\nyear={2024},\nurl={https://openreview.net/forum?id=532tcx7IHF}\n}", "github": "", "project": "", "reviewers": "N3a2;ZtLn;gfMY", "site": "https://openreview.net/forum?id=532tcx7IHF", "pdf_size": 1160382, "rating": "3;5;6", "confidence": "4;4;3", "soundness": "2;3;3", "contribution": "1;2;3", "presentation": "2;2;3", "wc_summary": "34;43;91", "wc_strengths": "34;37;57", "wc_weaknesses": "123;174;49", "wc_questions": "16;36;73", "wc_review": "207;290;270", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 56.0, 25.019992006393608 ], "wc_strengths_avg": [ 42.666666666666664, 10.208928554075703 ], "wc_weaknesses_avg": [ 115.33333333333333, 51.31817958146562 ], "wc_questions_avg": [ 41.666666666666664, 23.612614331233114 ], "wc_review_avg": [ 255.66666666666666, 35.3679076125361 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13945585868051426758&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;0;3;4", "aff_unique_norm": "Hong Kong Polytechnic University;University of Science and Technology of China;Purdue University;Eastern Institute of Technology;Eastern Institute for Advanced Study", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.polyu.edu.hk;http://www.ustc.edu.cn;https://www.purdue.edu;https://www.eit.edu.cn;", "aff_unique_abbr": "PolyU;USTC;Purdue;;", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Hong Kong SAR;;Ningbo", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States;" }, { "id": "53gU1BASrd", "title": "Evaluating and Finetuning Models For Financial Time Series Forecasting", "track": "main", "status": "Reject", "tldr": "", "abstract": "Time series forecasting is a challenging task as it is subject to a lot of noise, and the predictions often depend on external events. Still, recent deep learning techniques advanced the state-of-the-art on certain datasets, while they keep failing on other noisy datasets. This paper studies the case of financial time series forecasting, a problem that exhibits both a high noise and many unknown dependencies. We will show that the current evaluation pipelines are imperfect and forget a trivial baseline that can beat most models. We propose a new evaluation pipeline that is better suited for our task, and we run this pipeline on recent models. This pipeline is based on the idea of deciding which assets to buy and sell rather than predicting exact prices. Next, as the small datasets used in current approaches limit the size of the models, we train a general model on a massive dataset (containing a hundred times more data points than existing datasets) and show this model can be finetuned to improve the performance on small datasets. All our code and models will be published to help the community bootstrap and evaluate their future models.", "keywords": "time series forecasting;finance;metrics", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/2775d165775e689a4e0d2dea6ef36c81f3cfd422.zip", "author": "Tom Djaaleb;Julien Romero", "authorids": "~Tom_Djaaleb1;~Julien_Romero1", "gender": "M;M", "homepage": ";https://julienromero.fr", "dblp": ";241/9678", "google_scholar": ";nCgiv6YAAAAJ", "orcid": ";0000-0002-7382-9077", "linkedin": "tom-djaaleb/;romerojulien/", "or_profile": "~Tom_Djaaleb1;~Julien_Romero1", "aff": "Ecole Nationale de la Statistique et de l'Administration Economique;T\u00e9l\u00e9com SudParis", "aff_domain": "ensae.fr;telecom-sudparis.eu", "position": "MS student;Associate Professor", "bibtex": "@misc{\ndjaaleb2024evaluating,\ntitle={Evaluating and Finetuning Models For Financial Time Series Forecasting},\nauthor={Tom Djaaleb and Julien Romero},\nyear={2024},\nurl={https://openreview.net/forum?id=53gU1BASrd}\n}", "github": "", "project": "", "reviewers": "pP2m;BmfC;Zb2D;9z9f", "site": "https://openreview.net/forum?id=53gU1BASrd", "pdf_size": 237796, "rating": "3;5;5;5", "confidence": "3;4;4;3", "soundness": "2;2;3;3", "contribution": "1;2;1;1", "presentation": "2;3;3;2", "wc_summary": "40;59;115;126", "wc_strengths": "39;72;149;32", "wc_weaknesses": "260;65;174;317", "wc_questions": "70;30;91;60", "wc_review": "409;226;529;535", "wc_reply_reviewers": "0;0;0;294", "wc_reply_authors": "1123;538;561;810", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.0, 36.33868462121325 ], "wc_strengths_avg": [ 73.0, 46.40581860068843 ], "wc_weaknesses_avg": [ 204.0, 95.03420436874295 ], "wc_questions_avg": [ 62.75, 21.970150204311302 ], "wc_review_avg": [ 424.75, 125.27245307728272 ], "wc_reply_reviewers_avg": [ 73.5, 127.30573435631248 ], "wc_reply_authors_avg": [ 758.0, 236.18742557553736 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Vv05zUyAGaEJ:scholar.google.com/&scioq=Evaluating+and+Finetuning+Models+For+Financial+Time+Series+Forecasting&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique;T\u00e9l\u00e9com SudParis", "aff_unique_dep": ";", "aff_unique_url": "https://ensae.fr;https://www.telecom-sudparis.eu", "aff_unique_abbr": "ENSAE;TSP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "53kW6e1uNN", "title": "AFDGCF: Adaptive Feature De-correlation Graph Collaborative Filtering for Recommendations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Collaborative filtering methods based on graph neural networks (GNNs) have witnessed significant success in recommender systems (RS), capitalizing on their ability to capture collaborative signals within intricate user-item relationships via message-passing mechanisms. However, these GNN-based RS inadvertently introduce a linear correlation between user and item embeddings, contradicting the goal of providing personalized recommendations. While existing research predominantly ascribes this flaw to the over-smoothing problem, this paper underscores the critical, often overlooked role of the over-correlation issue in diminishing the effectiveness of GNN representations and subsequent recommendation performance. The unclear relationship between over-correlation and over-smoothing in RS, coupled with the challenge of adaptively minimizing the impact of over-correlation while preserving collaborative filtering signals, is quite challenging. To this end, this paper aims to address the aforementioned gap by undertaking a comprehensive study of the over-correlation issue in graph collaborative filtering models. Empirical evidence substantiates the widespread prevalence of over-correlation in these models. Furthermore, a theoretical analysis establishes a pivotal connection between the over-correlation and over-smoothing predicaments. Leveraging these insights, we introduce the Adaptive Feature De-correlation Graph Collaborative Filtering (AFDGCF) Framework, which dynamically applies correlation penalties to the feature dimensions of the representation matrix, effectively alleviating both over-correlation and over-smoothing challenges. The efficacy of the proposed framework is corroborated through extensive experiments conducted with four different graph collaborative filtering models across four publicly available datasets, demonstrating the superiority of AFDGCF in enhancing the performance landscape of graph collaborative filtering models.", "keywords": "Collaborative Filtering;Graph Neural Networks;Over-correlation;Over-smoothing", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/3de2578def7d9ce93546f43854f9f782f7d86eeb.zip", "author": "Wei Wu;Chao Wang;Dazhong Shen;Chuan Qin;Hui Xiong", "authorids": "~Wei_Wu25;~Chao_Wang14;~Dazhong_Shen1;~Chuan_Qin1;~Hui_Xiong1", "gender": "M;M;M;M;M", "homepage": "https://github.com/U-rara;https://chaowang-ustc.github.io/;http://www.shendazhong.com/;https://dylan-qin.github.io;https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": "95/6985-45.html;188/7759-86;222/7906;24/2771-2;262/1686-1.html", "google_scholar": ";j08V64UAAAAJ;5vSh09YAAAAJ;0KTz65wAAAAJ;cVDF1tkAAAAJ", "orcid": "0009-0009-1590-601X;0000-0001-7717-447X;0000-0002-3947-4153;0000-0002-5354-8630;0000-0001-6016-6465", "linkedin": ";;;;", "or_profile": "~Wei_Wu25;~Chao_Wang14;~Dazhong_Shen1;~Chuan_Qin1;~Hui_Xiong1", "aff": "University of Science and Technology of China;HKUST Fok Ying Tung Research Institute, The Hong Kong University of Science and Technology\u00a0(Guangzhou);Shanghai Artificial Intelligence Laboratory;BOSS Zhipin;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "ustc.edu.cn;ust.hk;pjlab.org.cn;kanzhun.com;hkust.edu", "position": "PhD student;Postdoc;Researcher;Senior Researcher;Full Professor", "bibtex": "@misc{\nwu2024afdgcf,\ntitle={{AFDGCF}: Adaptive Feature De-correlation Graph Collaborative Filtering for Recommendations},\nauthor={Wei Wu and Chao Wang and Dazhong Shen and Chuan Qin and Hui Xiong},\nyear={2024},\nurl={https://openreview.net/forum?id=53kW6e1uNN}\n}", "github": "", "project": "", "reviewers": "vpbW;HBXd;TKXS;Xkxj", "site": "https://openreview.net/forum?id=53kW6e1uNN", "pdf_size": 6094555, "rating": "5;5;8;8", "confidence": "4;4;5;5", "soundness": "3;3;4;3", "contribution": "2;2;3;3", "presentation": "2;3;4;4", "wc_summary": "73;81;157;210", "wc_strengths": "57;52;87;56", "wc_weaknesses": "228;163;76;37", "wc_questions": "4;71;15;165", "wc_review": "362;367;335;468", "wc_reply_reviewers": "206;60;37;78", "wc_reply_authors": "859;485;507;574", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 130.25, 56.52156668033893 ], "wc_strengths_avg": [ 63.0, 13.982131454109563 ], "wc_weaknesses_avg": [ 126.0, 74.48825410761081 ], "wc_questions_avg": [ 63.75, 63.73921477395215 ], "wc_review_avg": [ 383.0, 50.5618433208284 ], "wc_reply_reviewers_avg": [ 95.25, 65.57200240956502 ], "wc_reply_authors_avg": [ 606.25, 149.56165116767065 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14752669949639676240&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "University of Science and Technology of China;Hong Kong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;BOSS Zhipin", "aff_unique_dep": ";Fok Ying Tung Research Institute;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ust.hk;http://www.shailab.org/;https://www.zhipin.com", "aff_unique_abbr": "USTC;HKUST;Shanghai AI Lab;BOSS\u76f4\u8058", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Guangzhou;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "5451cIQdWp", "title": "On Synthetic Data and Iterative Magnitude Pruning: a Linear Mode Connectivity Study", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent works have shown that distilled data representations can be leveraged for accelerating the training of DNNs. However, to date, very little is understood about the effect of these synthetic data representations in the area of architectural optimization, specifically with Iterative Magnitude Pruning (IMP) and pruning at initialization. We push the boundaries of pruning with distilled data, matching the performance of traditional IMP on ResNet-18 \\& CIFAR-10 while using 150x less training points to find a sparsity mask. We find that distilled data guides IMP to discard parameters contributing to the sharpness of the loss landscape, fostering smoother landscapes. These synthetic subnetworks are stable to SGD noise at initialization in settings when the dense model or subnetworks found with standard IMP are not, such as ResNet-10 on ImageNet-10. In other words, training from initialization across different shuffling of data will result in linear mode connectivity, a phenomenon which rarely happens without some pretraining. We visualize these loss landscapes and quantitatively measure sharpness through hessian approximations to understand these effects. This behavior is heavily linked to the compressed representation of the data, highlighting the importance of synthetic data in neural architectural validation. In order to find both a high performing and robust sparse architecture, a more optimal synthetic data representation is needed that can compress irrelevant noise like distilled data, yet better maintain task-specific information from the real data as dataset complexity increases.", "keywords": "Neural Network Pruning;Linear Mode Connectivity;Dataset Distillation;Sparse Neural Networks", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Luke McDermott;Daniel Cummings", "authorids": "~Luke_McDermott2;~Daniel_Cummings2", "gender": "M;", "homepage": "https://lukemcdermotttt.github.io/;", "dblp": ";", "google_scholar": "l_z4cj0AAAAJ;", "orcid": ";", "linkedin": "lukemcdermott;", "or_profile": "~Luke_McDermott2;~Daniel_Cummings2", "aff": "Modern Intelligence;", "aff_domain": "modernintelligence.ai;", "position": "Researcher;", "bibtex": "@misc{\nmcdermott2024on,\ntitle={On Synthetic Data and Iterative Magnitude Pruning: a Linear Mode Connectivity Study},\nauthor={Luke McDermott and Daniel Cummings},\nyear={2024},\nurl={https://openreview.net/forum?id=5451cIQdWp}\n}", "github": "", "project": "", "reviewers": "rRcD;6HLj;mHnL;qh7B", "site": "https://openreview.net/forum?id=5451cIQdWp", "pdf_size": 2297302, "rating": "3;5;5;6", "confidence": "5;3;3;3", "soundness": "2;3;2;3", "contribution": "1;2;2;3", "presentation": "1;1;2;2", "wc_summary": "88;56;70;79", "wc_strengths": "47;42;24;75", "wc_weaknesses": "404;228;344;157", "wc_questions": "39;37;2;36", "wc_review": "578;363;440;347", "wc_reply_reviewers": "186;0;61;0", "wc_reply_authors": "813;411;776;521", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 73.25, 11.818946653572814 ], "wc_strengths_avg": [ 47.0, 18.289341158171883 ], "wc_weaknesses_avg": [ 283.25, 96.51780923746664 ], "wc_questions_avg": [ 28.5, 15.337861650177967 ], "wc_review_avg": [ 432.0, 91.3318126394084 ], "wc_reply_reviewers_avg": [ 61.75, 75.93541663808791 ], "wc_reply_authors_avg": [ 630.25, 169.29762993025037 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vgEn7HRo_i0J:scholar.google.com/&scioq=On+Synthetic+Data+and+Iterative+Magnitude+Pruning:+a+Linear+Mode+Connectivity+Study&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Modern Intelligence", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "" }, { "id": "54AwQUaDZo", "title": "Bounding the Robustness and Generalization for Individual Treatment Effect", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Individual treatment effect (ITE) estimation has important applications in fields such as healthcare, economics and education, hence attracted increasing attention from both research and industrial community. However, most existing models may not perform well in practice due to the lack of robustness of the ITE estimation predicted by deep neural networks when an imperceptible perturbation has been added to the covariate. To alleviate this problem, in this paper, we first derive an informative generalization bound that demonstrate the expected ITE estimation error is bounded by one of the most important term, the Lipschitz constant of ITE model. In addition, in order to use Integral Probability Metrics (IPM) to measure distances between distributions, we also obtain explicit bounds for the Wasserstein (WASS) and Maximum Mean Discrepancy (MMD) distances. More specifically, we propose two types of regularizations called Lipschitz Regularization and reproducing kernel Hilbert space (RKHS) Regularization for encouraging robustness in estimating ITE from observational data. Extensive experiments on both synthetic examples and standard benchmarks demonstrate our framework\u2019s effectiveness and generality. To benefit this research direction, we release our project at https://github-rite.github.io/rite/.", "keywords": "Individual Treatment Effect;Causal inference", "primary_area": "causal reasoning", "supplementary_material": "", "author": "Zhenlei Wang;Xu Chen;Xiaoxiao Xu;Lantao Hu;Peng Jiang;Kun Gai", "authorids": "~Zhenlei_Wang1;~Xu_Chen13;~Xiaoxiao_Xu2;~Lantao_Hu1;~Peng_Jiang6;~Kun_Gai1", "gender": "M;M;F;M;M;M", "homepage": "https://causal-rec.github.io/;https://gsai.ruc.edu.cn/chenxu;https://scholar.google.com/citations?hl=zh-CN&user=1I2OrQEAAAAJ&view_op=list_works&sortby=pubdate;;;", "dblp": "223/8301;83/6331-17;116/1526;;;59/2902", "google_scholar": "7x0kGsUAAAAJ;loPoqy0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;P0EK1y8AAAAJ;https://scholar.google.com/citations?hl=en;PXO4ygEAAAAJ", "orcid": ";0000-0003-0144-1775;0009-0007-5493-5628;;0000-0002-9266-0780;", "linkedin": ";;;;;", "or_profile": "~Zhenlei_Wang1;~Xu_Chen13;~Xiaoxiao_Xu2;~Lantao_Hu1;~Peng_Jiang6;~Kun_Gai1", "aff": "Renmin University of China;Renmin University of China;Kuaishou Technology;;Kuaishou Technology;Kuaishou- \u5feb\u624b\u79d1\u6280", "aff_domain": "ruc.edu.cn;ruc.edu.cn;kuaishou.com;;kuaishou.com;kuaishou.com", "position": "PhD student;Associate Professor;Engineer;;Vice President;Instructor", "bibtex": "@misc{\nanonymous2024bounding,\ntitle={Bounding the Robustness and Generalization for Individual Treatment Effect},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=54AwQUaDZo}\n}", "github": "", "project": "", "reviewers": "FvrM;nExb;UoGx;WUez", "site": "https://openreview.net/forum?id=54AwQUaDZo", "pdf_size": 1120565, "rating": "3;5;5;5", "confidence": "3;3;4;3", "soundness": "2;3;1;2", "contribution": "2;2;3;1", "presentation": "2;3;2;2", "wc_summary": "86;117;179;119", "wc_strengths": "76;58;106;55", "wc_weaknesses": "66;192;432;375", "wc_questions": "52;2;102;54", "wc_review": "280;369;819;603", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 125.25, 33.677700337166726 ], "wc_strengths_avg": [ 73.75, 20.27775875189366 ], "wc_weaknesses_avg": [ 266.25, 145.69896190433204 ], "wc_questions_avg": [ 52.5, 35.36594407053204 ], "wc_review_avg": [ 517.75, 210.16109892175572 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V_yve7ojoRIJ:scholar.google.com/&scioq=Bounding+the+Robustness+and+Generalization+for+Individual+Treatment+Effect&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "Renmin University of China;Kuaishou Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.kuaishou.com", "aff_unique_abbr": "RUC;Kuaishou", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Identifiable Unsupervised Domain Translation: A Diversified Distribution Matching Approach", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19440", "id": "55uj7mU7Cv", "author_site": "Sagar Shrestha, Xiao Fu", "tldr": "", "abstract": "Unsupervised domain translation (UDT) aims to find functions that convert samples from one domain (e.g., sketches) to another domain (e.g., photos) without changing the high-level semantic meaning (also referred to as \"content\"). The translation functions are often sought by probability distribution matching of the transformed source domain and target domain. CycleGAN stands as arguably the most representative approach among this line of work. However, it was noticed in the literature that CycleGAN and variants could fail to identify the desired translation functions and produce content-misaligned translations.\nThis limitation arises due to the presence of multiple translation functions---referred to as ``measure-preserving automorphism\" (MPA)---in the solution space of the learning criteria. Despite awareness of such identifiability issues, solutions have remained elusive. This study delves into the core identifiability inquiry and introduces an MPA elimination theory. Our analysis shows that MPA is unlikely to exist, if multiple pairs of diverse cross-domain conditional distributions are matched by the learning function.\nOur theory leads to a UDT learner using distribution matching over auxiliary variable-induced subsets of the domains---other than over the entire data domains as in the classical approaches. The proposed framework is the first to rigorously establish translation identifiability under reasonable UDT settings, to our best knowledge.\nExperiments corroborate with our theoretical claims.", "keywords": "unsupervised domain translation;translation identifiability;distribution matching;unpaired image to image translation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/bba6017fc2dc8543fc915d43386eaa33f05f7b0a.zip", "author": "Sagar Shrestha;Xiao Fu", "authorids": "~Sagar_Shrestha1;~Xiao_Fu1", "gender": "M;M", "homepage": ";https://web.engr.oregonstate.edu/~fuxia/", "dblp": "292/3689;60/4601-1", "google_scholar": "qIBTvlAAAAAJ;pDnpH1MAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Sagar_Shrestha1;~Xiao_Fu1", "aff": "Oregon State University;Oregon State University", "aff_domain": "oregonstate.edu;oregonstate.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nshrestha2024towards,\ntitle={Towards Identifiable Unsupervised Domain Translation: A Diversified Distribution Matching Approach},\nauthor={Sagar Shrestha and Xiao Fu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=55uj7mU7Cv}\n}", "github": "", "project": "", "reviewers": "4LhS;rKLC;bENZ;siTh", "pdf_size": 38400638, "rating": "5;6;6;8", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "39;107;54;90", "wc_strengths": "52;51;174;59", "wc_weaknesses": "58;180;107;33", "wc_questions": "312;60;68;106", "wc_review": "461;398;403;288", "wc_reply_reviewers": "0;38;11;38", "wc_reply_authors": "3117;1083;427;1575", "reply_reviewers": "0;1;1;1", "reply_authors": "5;2;1;3", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 27.207535720825582 ], "wc_strengths_avg": [ 84.0, 52.052857750559674 ], "wc_weaknesses_avg": [ 94.5, 56.082528473669946 ], "wc_questions_avg": [ 136.5, 102.80442597476045 ], "wc_review_avg": [ 387.5, 62.5559749344537 ], "wc_reply_reviewers_avg": [ 21.75, 16.708904811506947 ], "wc_reply_authors_avg": [ 1550.5, 991.8834356919164 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5429720305334156891&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=55uj7mU7Cv", "pdf": "https://openreview.net/pdf?id=55uj7mU7Cv", "email": "oregonstate.edu;oregonstate.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Oregon State University", "aff_unique_dep": "", "aff_unique_url": "https://oregonstate.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "How to Catch an AI Liar: Lie Detection in Black-Box LLMs by Asking Unrelated Questions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19439", "id": "567BjxgaTp", "author_site": "Lorenzo Pacchiardi, Alex Chan, S\u00f6ren Mindermann, Ilan Moscovitz, Alexa Pan, Yarin Gal, Owain Evans, Jan Brauner", "tldr": "", "abstract": "Large language models (LLMs) can \u201clie\u201d, which we define as outputting false statements when incentivised to, despite \u201cknowing\u201d the truth in a demonstrable sense. LLMs might \u201clie\u201d, for example, when instructed to output misinformation. Here, we develop a simple lie detector that requires neither access to the LLM\u2019s activations (black-box) nor ground-truth knowledge of the fact in question. The detector works by asking a predefined set of unrelated follow-up questions after a suspected lie, and feeding the LLM\u2019s yes/no answers into a logistic regression classifier. Despite its simplicity, this lie detector is highly accurate and surprisingly general. When trained on examples from a single setting\u2014prompting GPT-3.5 to lie about factual questions\u2014the detector generalises out-of-distribution to (1) other LLM architectures, (2) LLMs fine-tuned to lie, (3) sycophantic lies, and (4) lies emerging in real-life scenarios such as sales. These results indicate that LLMs have distinctive lie-related behavioural patterns, consistent across architectures and contexts, which could enable general-purpose lie detection.", "keywords": "language models;lying;deception;alignment;safety;truthfulness;honesty", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/3b63a2c68f1a0b002920ca2526f715d3513508c9.pdf", "author": "Lorenzo Pacchiardi;Alex James Chan;S\u00f6ren Mindermann;Ilan Moscovitz;Alexa Yue Pan;Yarin Gal;Owain Evans;Jan M. Brauner", "authorids": "~Lorenzo_Pacchiardi1;~Alex_James_Chan1;~S\u00f6ren_Mindermann1;~Ilan_Moscovitz1;~Alexa_Yue_Pan1;~Yarin_Gal1;~Owain_Evans1;~Jan_M._Brauner1", "gender": "M;M;M;M;F;;;M", "homepage": "http://lorenzopacchiardi.me/;https://alexjchan.com;https://www.soren-mindermann.com/;;;http://www.cs.ox.ac.uk/people/yarin.gal/website//;https://owainevans.github.io/;", "dblp": "308/9068;268/6948;211/7976;;;67/9076;52/10432;271/0265", "google_scholar": "9EAb0uEAAAAJ;yfy_BGIAAAAJ;slBPlrQAAAAJ;;;https://scholar.google.co.uk/citations?user=SIayDoQAAAAJ;4VpTwzIAAAAJ;https://scholar.google.de/citations?user=tNZUnjcAAAAJ", "orcid": "0000-0003-4760-7638;;0000-0002-0315-9821;;;;;0000-0002-1588-5724", "linkedin": ";alex-chan-040081131/;;ilan-moscovitz/;alexa-pan-5678ab163/;;;", "or_profile": "~Lorenzo_Pacchiardi1;~Alex_James_Chan1;~S\u00f6ren_Mindermann1;~Ilan_Moscovitz1;~Alexa_Yue_Pan1;~Yarin_Gal1;~Owain_Evans1;~Jan_M._Brauner1", "aff": "University of Cambridge;Spotify;Mila - Quebec Artificial Intelligence Institute;Independent;Yale University;University of Oxford;Truthful AI;University of Oxford", "aff_domain": "cam.ac.uk;spotify.com;mila.quebec;openreview.net;yale.edu;ox.ac.uk;owainevans.com;ox.ac.uk", "position": "Postdoc;Researcher;Postdoc;Researcher;Undergrad student;Associate Professor;Principal Researcher;PhD student", "bibtex": "@inproceedings{\npacchiardi2024how,\ntitle={How to Catch an {AI} Liar: Lie Detection in Black-Box {LLM}s by Asking Unrelated Questions},\nauthor={Lorenzo Pacchiardi and Alex James Chan and S{\\\"o}ren Mindermann and Ilan Moscovitz and Alexa Yue Pan and Yarin Gal and Owain Evans and Jan M. Brauner},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=567BjxgaTp}\n}", "github": "", "project": "", "reviewers": "eJWS;fhKw;cT55;Y6rc", "pdf_size": 3769151, "rating": "5;6;8;8", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "contribution": "2;3;3;4", "presentation": "1;2;2;3", "wc_summary": "377;122;70;120", "wc_strengths": "2;214;222;67", "wc_weaknesses": "2;88;329;18", "wc_questions": "2;27;64;68", "wc_review": "383;451;685;273", "wc_reply_reviewers": "40;4;122;0", "wc_reply_authors": "1618;735;1205;386", "reply_reviewers": "1;1;2;0", "reply_authors": "4;2;2;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 172.25, 120.03410973552477 ], "wc_strengths_avg": [ 126.25, 94.62656867920342 ], "wc_weaknesses_avg": [ 109.25, 130.93008630563108 ], "wc_questions_avg": [ 40.25, 27.261465477849864 ], "wc_review_avg": [ 448.0, 150.8542342793201 ], "wc_reply_reviewers_avg": [ 41.5, 49.01785389018985 ], "wc_reply_authors_avg": [ 986.0, 466.4724000409885 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9622504486493761, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4094771018704741638&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=567BjxgaTp", "pdf": "https://openreview.net/pdf?id=567BjxgaTp", "email": "cam.ac.uk;spotify.com;mila.quebec;openreview.net;yale.edu;ox.ac.uk;owainevans.com;ox.ac.uk", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5;6;5", "aff_unique_norm": "University of Cambridge;Spotify;Quebec Artificial Intelligence Institute;Independent;Yale University;University of Oxford;Truthful AI", "aff_unique_dep": ";;Artificial Intelligence;;;;", "aff_unique_url": "https://www.cam.ac.uk;https://www.spotify.com;https://mila.quebec;;https://www.yale.edu;https://www.ox.ac.uk;", "aff_unique_abbr": "Cambridge;Spotify;Mila;;Yale;Oxford;", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;2;4;0;0", "aff_country_unique": "United Kingdom;Sweden;Canada;;United States" }, { "id": "56jIlazr6a", "title": "Unified Uncertainty Estimation", "track": "main", "status": "Reject", "tldr": "", "abstract": "In order to build robust, fair, and safe AI systems, we would like our classifiers to recognize and say \u201cI don\u2019t know\u201d when facing test examples that do not belong to any of the in-domain classes observed during training. Perhaps surprisingly, the ubiquitous strategy to predict under uncertainty is the simplistic reject-or-classify rule: abstain from prediction if epistemic uncertainty is high, classify otherwise. We argue that this recipe has several problems: it does not allow different sources of uncertainty to communicate with each other, produces miscalibrated predictions, and it does not allow to correct for misspecifications in our uncertainty estimates. To address these issues, we introduce unified uncertainty calibration (U2C), a framework for the unified, non-linear calibration of aleatoric and epistemic uncertainties. Unified uncertainty calibration enables a clean analysis of uncertainty estimation via learning theory, and significantly outperforms reject-or-classify across a variety of standard benchmarks.", "keywords": "uncertainty estimation;calibration;epistemic;aleatoric", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/cc2447edc90f7aa164e8dd60dd601d1be54d499b.zip", "author": "Kamalika Chaudhuri;David Lopez-Paz", "authorids": "~Kamalika_Chaudhuri1;~David_Lopez-Paz2", "gender": "F;", "homepage": "http://cseweb.ucsd.edu/users/kamalika;http://lopezpaz.org", "dblp": "56/6435;74/10481", "google_scholar": "I-DJ7EsAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Kamalika_Chaudhuri1;~David_Lopez-Paz2", "aff": "University of California, San Diego;Meta Facebook", "aff_domain": "ucsd.edu;fb.com", "position": "Associate Professor;Research Scientist", "bibtex": "@misc{\nchaudhuri2024unified,\ntitle={Unified Uncertainty Estimation},\nauthor={Kamalika Chaudhuri and David Lopez-Paz},\nyear={2024},\nurl={https://openreview.net/forum?id=56jIlazr6a}\n}", "github": "", "project": "", "reviewers": "WQhb;LfhH;Pdw3;3np5", "site": "https://openreview.net/forum?id=56jIlazr6a", "pdf_size": 1719767, "rating": "3;5;5;8", "confidence": "4;3;4;4", "soundness": "1;3;3;3", "contribution": "2;2;3;3", "presentation": "1;3;4;3", "wc_summary": "60;135;152;118", "wc_strengths": "33;86;56;97", "wc_weaknesses": "439;201;23;209", "wc_questions": "51;4;103;5", "wc_review": "583;426;334;429", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "179;148;110;118", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 116.25, 34.629286738250904 ], "wc_strengths_avg": [ 68.0, 25.16942589730644 ], "wc_weaknesses_avg": [ 218.0, 147.6787053031005 ], "wc_questions_avg": [ 40.75, 40.64710936831794 ], "wc_review_avg": [ 443.0, 89.39519002720448 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 138.75, 27.215574585152524 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.08084520834544431, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VIZBaUc9khQJ:scholar.google.com/&scioq=Unified+Uncertainty+Estimation&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, San Diego;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ucsd.edu;https://meta.com", "aff_unique_abbr": "UCSD;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "56l30xVDW7", "title": "Semantic Attribution For Explainable Uncertainty Quantification", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Bayesian deep learning, with an emphasis on uncertainty quantification, is receiving growing interest in building reliable models. Nonetheless, interpreting and explaining the origins and reasons for uncertainty presents a significant challenge. In this paper, we present semantic uncertainty attribution as a tool for pinpointing the primary factors contributing to uncertainty. This approach allows us to explain why a particular image carries high uncertainty, thereby making our models more interpretable. Specifically, we utilize the variational autoencoder to disentangle different semantic factors within the latent space and link the uncertainty to corresponding semantic factors for an explanation. The proposed techniques can also enhance explainable out-of-distribution (OOD) detection. We can not only identify OOD samples via their uncertainty, but also provide reasoning rooted in a semantic concept.", "keywords": "Uncertainty Quantification;Model Explanability", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Hanjing Wang;Shiqiang Wang;Qiang Ji", "authorids": "~Hanjing_Wang2;~Shiqiang_Wang1;~Qiang_Ji1", "gender": "M;M;M", "homepage": "https://www.ecse.rpi.edu/~cvrl/people_zw.html;https://shiqiang.wang;https://www.ecse.rpi.edu/~qji/", "dblp": "234/8752;87/5094-1;", "google_scholar": ";kA_vmOcAAAAJ;vAXmpVIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hanjing_Wang2;~Shiqiang_Wang1;~Qiang_Ji1", "aff": "Rensselaer Polytechnic Institute;IBM, International Business Machines;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;us.ibm.com;rpi.edu", "position": "PhD student;Research Staff Member;Professor", "bibtex": "@misc{\nwang2024semantic,\ntitle={Semantic Attribution For Explainable Uncertainty Quantification},\nauthor={Hanjing Wang and Shiqiang Wang and Qiang Ji},\nyear={2024},\nurl={https://openreview.net/forum?id=56l30xVDW7}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=56l30xVDW7", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16783044059365412257&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Rensselaer Polytechnic Institute;International Business Machines", "aff_unique_dep": ";", "aff_unique_url": "https://www.rpi.edu;https://www.ibm.com", "aff_unique_abbr": "RPI;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "59nCKifDtm", "title": "Improve Temporal Consistency In Diffusion Models through Noise Correlations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion models have emerged as a powerful tool for generating diverse types of data, including sequential data such as audio, video, and motion. As the temporal consistency in sequential data is crucial for maintaining fidelity and realism, this paper introduce the AutoRegressive Temporal diffusion (ARTDiff) approach to address the challenge of temporal consistency in diffusion models. ARTDiff offers a straightforward and efficient solution that requires minimal computational overhead. Our proposed ARTDiff method leverages the inherent autoregressive dependence structure in time by introducing a Gaussian noise distribution whose correlations between time frames have a functional form in terms of time difference. This design explicitly captures the temporal dependencies and enhances the consistency in generated sequences. We evaluate the effectiveness of ARTDiff on audio and motion generation tasks. Experimental results demonstrate that ARTDiff significantly improves the fidelity and realism of generated samples compared to baseline diffusion models. The simplicity and efficiency of ARTDiff make it a practical choice for incorporating temporal consistency in diffusion-based generation models.", "keywords": "diffusion model;temporal consistency;sequential data generation", "primary_area": "generative models", "supplementary_material": "", "author": "Kexin Lu;Yuxi CAI;Lan Li;Dafei Qin;Guodong Li", "authorids": "~Kexin_Lu1;~Yuxi_CAI1;~Lan_Li4;~Dafei_Qin1;~Guodong_Li1", "gender": ";;F;M;M", "homepage": "https://github.com/neithen-Lu;;;https://dafei-qin.github.io/;https://saasweb.hku.hk/staff/gdli/", "dblp": ";;;347/3368;", "google_scholar": ";;;https://scholar.google.com/citations?hl=en;whNuLsEAAAAJ", "orcid": ";0000-0003-4065-1193;0009-0005-4711-9740;0009-0001-4992-4760;", "linkedin": ";;;dafei-qin-134151292;", "or_profile": "~Kexin_Lu1;~Yuxi_CAI1;~Lan_Li4;~Dafei_Qin1;~Guodong_Li1", "aff": "University of Hong Kong;University of Hong Kong;University of Hong Kong;University of Hong Kong;The University of Hong Kong", "aff_domain": "hku.hk;hku.hk;hku.hk;hku.hk;hku.hk", "position": "PhD student;PhD student;PhD student;PhD student;Professor", "bibtex": "@misc{\nlu2024improve,\ntitle={Improve Temporal Consistency In Diffusion Models through Noise Correlations},\nauthor={Kexin Lu and Yuxi CAI and Lan Li and Dafei Qin and Guodong Li},\nyear={2024},\nurl={https://openreview.net/forum?id=59nCKifDtm}\n}", "github": "", "project": "", "reviewers": "zRzD;jkr1;uW7w;kucM", "site": "https://openreview.net/forum?id=59nCKifDtm", "pdf_size": 2453954, "rating": "6;6;6;6", "confidence": "4;4;3;3", "soundness": "3;3;3;2", "contribution": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "39;63;95;65", "wc_strengths": "34;43;28;43", "wc_weaknesses": "161;44;91;32", "wc_questions": "50;3;4;42", "wc_review": "284;153;218;182", "wc_reply_reviewers": "29;29;14;0", "wc_reply_authors": "2069;1248;2200;772", "reply_reviewers": "1;1;1;0", "reply_authors": "6;4;6;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.5, 19.868316486305527 ], "wc_strengths_avg": [ 37.0, 6.363961030678928 ], "wc_weaknesses_avg": [ 82.0, 50.66063560596136 ], "wc_questions_avg": [ 24.75, 21.44032415799724 ], "wc_review_avg": [ 209.25, 48.91510502901941 ], "wc_reply_reviewers_avg": [ 18.0, 12.062338081814818 ], "wc_reply_authors_avg": [ 1572.25, 588.7208060702459 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.75, 1.299038105676658 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:h-qXsHj7ayAJ:scholar.google.com/&scioq=Improve+Temporal+Consistency+In+Diffusion+Models+through+Noise+Correlations&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.hku.hk", "aff_unique_abbr": "HKU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "5AbtYdHlr3", "title": "Stochastic Safe Action Model Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Hand-crafting models of interactive domains is challenging, especially when the dynamics of the domain are stochastic. Therefore, it's useful to be able to automatically learn such models instead. In this work, we propose an algorithm to learn stochastic planning models where the distribution over the sets of effects for each action has a small support, but the sets may set values to an arbitrary number of state attributes (a.k.a. fluents). This class captures the benchmark domains used in stochastic planning, in contrast to the prior work that assumed independence of the effects on individual fluents. Our algorithm has polynomial time and sample complexity when the size of the support is bounded by a constant. Importantly, our learning is safe in that we learn offline from example trajectories and we guarantee that actions are only permitted in states where our model of the dynamics is guaranteed to be accurate. Moreover, we guarantee approximate completeness of the model, in the sense that if the examples are achieving goals from some distribution, then with high probability there will exist plans in our learned model that achieve goals from the same distribution.", "keywords": "offline learning;planning;action model learning;method of moments", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "", "author": "Zihao Deng;Brendan Juba", "authorids": "~Zihao_Deng1;~Brendan_Juba1", "gender": ";M", "homepage": ";http://www.cse.wustl.edu/~bjuba/", "dblp": "188/6173;62/6079", "google_scholar": ";https://scholar.google.com.tw/citations?user=5wppdUoAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Zihao_Deng1;~Brendan_Juba1", "aff": "Washington University, St. Louis;Washington University in St. Louis", "aff_domain": "wustl.edu;cse.wustl.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\ndeng2024stochastic,\ntitle={Stochastic Safe Action Model Learning},\nauthor={Zihao Deng and Brendan Juba},\nyear={2024},\nurl={https://openreview.net/forum?id=5AbtYdHlr3}\n}", "github": "", "project": "", "reviewers": "Ksk7;cva7;YR2D;bFXN", "site": "https://openreview.net/forum?id=5AbtYdHlr3", "pdf_size": 280752, "rating": "3;3;3;3", "confidence": "3;3;4;2", "soundness": "2;3;2;3", "contribution": "2;2;2;2", "presentation": "1;2;1;1", "wc_summary": "42;166;95;134", "wc_strengths": "12;26;16;26", "wc_weaknesses": "114;93;295;154", "wc_questions": "34;130;64;80", "wc_review": "202;415;470;394", "wc_reply_reviewers": "0;44;194;133", "wc_reply_authors": "123;180;619;395", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_avg": [ 109.25, 46.25675626327467 ], "wc_strengths_avg": [ 20.0, 6.164414002968976 ], "wc_weaknesses_avg": [ 164.0, 78.74325367928354 ], "wc_questions_avg": [ 77.0, 34.77067730142742 ], "wc_review_avg": [ 370.25, 101.02567742905761 ], "wc_reply_reviewers_avg": [ 92.75, 75.58232266872989 ], "wc_reply_authors_avg": [ 329.25, 195.63534317704458 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5267497173215605354&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Washington University in St. Louis", "aff_unique_dep": "", "aff_unique_url": "https://wustl.edu", "aff_unique_abbr": "WUSTL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "St. Louis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "5BBEFotHkJ", "title": "Symmetry-preserving graph attention network to solve routing problems at multiple resolutions", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Travelling Salesperson Problems (TSPs) and Vehicle Routing Problems (VRPs) have achieved reasonable improvement in accuracy and computation time with the adaptation of Machine Learning (ML) methods. However, none of the previous works completely respects the symmetries arising from TSPs and VRPs including rotation, translation, permutation, and scaling. In this work, we introduce the first-ever completely equivariant model and training to solve combinatorial problems. Furthermore, it is essential to capture the multiscale structure (i.e. from local to global information) of the input graph, especially for the cases of large and long-range graphs, while previous methods are limited to extracting only local information that can lead to a local or sub-optimal solution. To tackle the above limitation, we propose a Multiresolution scheme in combination with Equivariant Graph Attention network (mEGAT) architecture, which can learn the optimal route based on low-level and high-level graph resolutions in an efficient way. In particular, our approach constructs a hierarchy of coarse-graining graphs from the input graph, in which we try to solve the routing problems on simple low-level graphs first, then utilize that knowledge for the more complex high-level graphs. Experimentally, we have shown that our model outperforms existing baselines and proved that symmetry preservation and multiresolution are important recipes for solving combinatorial problems in a data-driven manner. Our source code is publicly available at [anonymous url].", "keywords": "Symmetry;group equivariant;graph neural networks;multiresolution;multiscale;routing problems;NP-hard;combinatorics;TSP;VRP", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/a3e732803f2cc15aa40c6da99c5389f6f1ee3c57.pdf", "author": "Cong Dao Tran;Thong Bach;Truong Son Hy", "authorids": "~Cong_Dao_Tran1;~Thong_Bach1;~Truong_Son_Hy1", "gender": "M;M;M", "homepage": ";https://hytruongson.github.io/HySonLab/;https://daotranbk.github.io", "dblp": "318/9182.html;213/7552;", "google_scholar": "yFLbTtkAAAAJ;JiKBo6UAAAAJ;", "orcid": ";0000-0002-5092-3757;0009-0001-1920-7568", "linkedin": ";truong-son-h-4a9185b6/;", "or_profile": "~Thong_Bach1;~Truong_Son_Hy1;~Dao_Cong_Tran1", "aff": "Deakin University;Indiana State University;", "aff_domain": "deakin.edu.au;indstate.edu;", "position": "PhD student;Assistant Professor;", "bibtex": "@misc{\ntran2024symmetrypreserving,\ntitle={Symmetry-preserving graph attention network to solve routing problems at multiple resolutions},\nauthor={Cong Dao Tran and Thong Bach and Truong Son Hy},\nyear={2024},\nurl={https://openreview.net/forum?id=5BBEFotHkJ}\n}", "github": "", "project": "", "reviewers": "JHgM;aD1V;UYvA;sqmm", "site": "https://openreview.net/forum?id=5BBEFotHkJ", "pdf_size": 1345581, "rating": "3;3;3;5", "confidence": "5;4;3;4", "soundness": "3;3;3;3", "contribution": "2;2;2;2", "presentation": "3;4;3;3", "wc_summary": "86;102;51;64", "wc_strengths": "18;45;54;85", "wc_weaknesses": "115;185;101;317", "wc_questions": "5;286;104;6", "wc_review": "224;618;310;472", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 19.651653874419832 ], "wc_strengths_avg": [ 50.5, 23.921747427811372 ], "wc_weaknesses_avg": [ 179.5, 85.52631174089059 ], "wc_questions_avg": [ 100.25, 114.53465632724446 ], "wc_review_avg": [ 406.0, 151.36049682793725 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13152145194466231369&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Deakin University;Indiana State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.deakin.edu.au;https://www.indstate.edu", "aff_unique_abbr": "Deakin;ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Australia;United States" }, { "title": "Demystifying CLIP Data", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19438", "id": "5BCFlnfE1g", "author_site": "Hu Xu, Saining Xie, Xiaoqing Tan, Po-Yao Huang, Russell Howes, Vasu Sharma, Shang-Wen Li, Gargi Ghosh, Luke Zettlemoyer, Christoph Feichtenhofer", "tldr": "", "abstract": "Contrastive Language-Image Pre-training (CLIP) is an approach that has advanced research and applications in computer vision, fueling modern recognition systems and generative models. We believe that the main ingredient to the success of CLIP is its \\textit{data} and \\textit{not} the \\textit{model} architecture or pre-training {objective}. However, CLIP only provides very limited information about its data and how it has been collected, leading to works that aim to reproduce CLIP's data by filtering with its model parameters. In this work, we intend to reveal CLIP's data curation approach and in our pursuit of making it open to the community introduce Metadata-Curated Language-Image Pre-training (MetaCLIP). MetaCLIP takes a raw data pool and metadata (derived from CLIP's concepts) and yields a balanced subset over the metadata distribution. Our experimental study rigorously isolates the model and training settings, concentrating solely on data. MetaCLIP applied to CommonCrawl with 400M image-text data pairs outperforms CLIP's data on multiple standard benchmarks. In zero-shot ImageNet classification, MetaCLIP achieves 70.8\\% accuracy, surpassing CLIP's 68.3\\% on \\mbox{ViT-B} models. Scaling to 1B data, while maintaining the same training budget, attains \\textbf{72.4\\%}. Our observations hold across various model sizes, exemplified by ViT-H achieving \\textbf{80.5\\%}, without any bells-and-whistles. Curation code and training data distribution over metadata will be made available.", "keywords": "multi-modal pretraining;CLIP;image;text", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Hu Xu;Saining Xie;Xiaoqing Tan;Po-Yao Huang;Russell Howes;Vasu Sharma;Shang-Wen Li;Gargi Ghosh;Luke Zettlemoyer;Christoph Feichtenhofer", "authorids": "~Hu_Xu1;~Saining_Xie2;~Xiaoqing_Tan1;~Po-Yao_Huang2;~Russell_Howes1;~Vasu_Sharma1;~Shang-Wen_Li1;~Gargi_Ghosh3;~Luke_Zettlemoyer1;~Christoph_Feichtenhofer4", "gender": "M;F;M;M;M;F;M;M;M;M", "homepage": "https://howardhsu.github.io/;http://ellenxtan.github.io/;;http://vasusharma.github.io;https://swdanielli.github.io/;https://www.linkedin.com/in/gargi-ghosh-5b1087b;https://www.cs.washington.edu/people/faculty/lsz/;http://feichtenhofer.github.io/;https://berniebear.github.io/;", "dblp": ";;;165/0762;35/9232-1.html;;21/6793;127/1937;154/3943-1;126/0960", "google_scholar": "SaH2yWMAAAAJ;_zvwtKAAAAAJ;76IWQk8AAAAJ;PLUB4dIAAAAJ;wFI97HUAAAAJ;k5akwCcAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;UxuqG1EAAAAJ;E8K25LIAAAAJ;https://scholar.google.co.uk/citations?user=Y2GtJkAAAAAJ", "orcid": ";;;;;;;;;", "linkedin": ";xiaoqing-tan/;;vasu-sharma-6b460592?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;shang-wen-daniel-li-0109b579/;gargi-ghosh-5b1087b;luke-zettlemoyer-a0109b226/;christoph-feichtenhofer-549433a1;;", "or_profile": "~Hu_Xu1;~Xiaoqing_Tan1;~Russell_Howes1;~Vasu_Sharma1;~Shang-Wen_Li1;~Gargi_Ghosh3;~Luke_Zettlemoyer1;~Christoph_Feichtenhofer4;~Po-Yao_Huang1;~Saining_Xie1", "aff": "FAIR, AMI Foundation;Meta AI;Meta AI;Meta Facebook;Meta Facebook;Meta AI;Meta;Meta FAIR;Meta;New York University", "aff_domain": "meta.com;meta.com;fb.com;fb.com;fb.com;meta.com;meta.com;meta.com;meta.com;nyu.edu", "position": "Research Scientist;Researcher;Researcher;Researcher;Research Manager;Researcher;Researcher;Principal Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nxu2024demystifying,\ntitle={Demystifying {CLIP} Data},\nauthor={Hu Xu and Saining Xie and Xiaoqing Tan and Po-Yao Huang and Russell Howes and Vasu Sharma and Shang-Wen Li and Gargi Ghosh and Luke Zettlemoyer and Christoph Feichtenhofer},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5BCFlnfE1g}\n}", "github": "", "project": "", "reviewers": "7LK5;f2FV;Smdn;33mj", "pdf_size": 452854, "rating": "5;6;8;8", "confidence": "3;5;4;4", "soundness": "3;3;3;3", "contribution": "3;2;3;4", "presentation": "3;3;2;4", "wc_summary": "55;52;55;95", "wc_strengths": "67;71;45;144", "wc_weaknesses": "68;139;71;298", "wc_questions": "91;51;39;271", "wc_review": "281;313;210;808", "wc_reply_reviewers": "0;19;34;172", "wc_reply_authors": "631;542;882;1226", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;3", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 64.25, 17.795715776557007 ], "wc_strengths_avg": [ 81.75, 37.278512577623054 ], "wc_weaknesses_avg": [ 144.0, 93.3354166434157 ], "wc_questions_avg": [ 113.0, 93.23089616645332 ], "wc_review_avg": [ 403.0, 236.77943322847955 ], "wc_reply_reviewers_avg": [ 56.25, 67.9057250900099 ], "wc_reply_authors_avg": [ 820.25, 265.3699069223939 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.2721655269759087, "gs_citation": 166, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=448420599387582073&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=5BCFlnfE1g", "pdf": "https://openreview.net/pdf?id=5BCFlnfE1g", "email": "meta.com;meta.com;fb.com;fb.com;fb.com;meta.com;meta.com;meta.com;meta.com;nyu.edu", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;1;1;1;2", "aff_unique_norm": "FAIR;Meta;New York University", "aff_unique_dep": "AMI Foundation;Meta AI;", "aff_unique_url": "https://www.fair.iai.uni-sb.de/;https://meta.com;https://www.nyu.edu", "aff_unique_abbr": "FAIR;Meta;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;1", "aff_country_unique": "France;United States" }, { "title": "Image2Sentence based Asymmetrical Zero-shot Composed Image Retrieval", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19437", "id": "5BXAXOpaWu", "author_site": "Yongchao Du, Min Wang, Wengang Zhou, Shuping Hui, Houqiang Li", "tldr": "", "abstract": "The task of composed image retrieval (CIR) aims to retrieve images based on the query image and the text describing the users' intent. \nExisting methods have made great progress with the advanced large vision-language (VL) model in CIR task, however, they generally suffer from two main issues: lack of labeled triplets for model training and difficulty of deployment on resource-restricted environments when deploying the large vision-language model. To tackle the above problems, we propose Image2Sentence based Asymmetric zero-shot composed image retrieval (ISA), which takes advantage of the VL model and only relies on unlabeled images for composition learning. In the framework, we propose a new adaptive token learner that maps an image to a sentence in the word embedding space of VL model. The sentence adaptively captures discriminative visual information and is further integrated with the text modifier. An asymmetric structure is devised for flexible deployment, in which the lightweight model is adopted for the query side while the large VL model is deployed on the gallery side. The global contrastive distillation and the local alignment regularization are adopted for the alignment between the light model and the VL model for CIR task. Our experiments demonstrate that the proposed ISA could better cope with the real retrieval scenarios and further improve retrieval accuracy and efficiency.", "keywords": "zero-shot;composed image retrieval;asymmetrical", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yongchao Du;Min Wang;Wengang Zhou;Shuping Hui;Houqiang Li", "authorids": "~Yongchao_Du1;~Min_Wang9;~Wengang_Zhou1;~Shuping_Hui1;~Houqiang_Li1", "gender": "M;F;M;;M", "homepage": "https://github.com/duyc168;;http://staff.ustc.edu.cn/~zhwg/index.html;https://github.com/huisp;https://staff.ustc.edu.cn/~lihq/", "dblp": ";181/2695-19;22/4544-1;;59/7017.html", "google_scholar": ";FFDionEAAAAJ;8s1JF8YAAAAJ;;7sFMIKoAAAAJ", "orcid": ";;0000-0003-1690-9836;;0000-0003-2188-3028", "linkedin": ";;;;", "or_profile": "~Yongchao_Du1;~Min_Wang9;~Wengang_Zhou1;~Shuping_Hui1;~Houqiang_Li1", "aff": "University of Science and Technology of China;Institute of Artificial Intelligence, Hefei Comprehensive National Science Center;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;iai.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Researcher;Full Professor;MS student;Professor", "bibtex": "@inproceedings{\ndu2024imagesentence,\ntitle={Image2Sentence based Asymmetrical Zero-shot Composed Image Retrieval},\nauthor={Yongchao Du and Min Wang and Wengang Zhou and Shuping Hui and Houqiang Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5BXAXOpaWu}\n}", "github": "", "project": "", "reviewers": "ZKF9;zR31;K4T5;WHDg", "pdf_size": 3457534, "rating": "6;8;8;8", "confidence": "4;5;5;2", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "3;3;2;2", "wc_summary": "102;143;96;168", "wc_strengths": "75;83;32;116", "wc_weaknesses": "163;86;42;144", "wc_questions": "5;93;26;78", "wc_review": "345;405;196;506", "wc_reply_reviewers": "0;35;13;67", "wc_reply_authors": "733;1719;1078;1723", "reply_reviewers": "0;1;1;1", "reply_authors": "1;3;3;4", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 127.25, 29.67637949615822 ], "wc_strengths_avg": [ 76.5, 29.937434759845406 ], "wc_weaknesses_avg": [ 108.75, 47.8506792010312 ], "wc_questions_avg": [ 50.5, 36.16973873281365 ], "wc_review_avg": [ 363.0, 112.27867117133155 ], "wc_reply_reviewers_avg": [ 28.75, 25.380849079571785 ], "wc_reply_authors_avg": [ 1313.25, 425.60567136728804 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10372206563830069030&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5BXAXOpaWu", "pdf": "https://openreview.net/pdf?id=5BXAXOpaWu", "email": "ustc.edu.cn;iai.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Science and Technology of China;Hefei Comprehensive National Science Center", "aff_unique_dep": ";Institute of Artificial Intelligence", "aff_unique_url": "http://www.ustc.edu.cn;http://www.hfcn.edu.cn", "aff_unique_abbr": "USTC;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hefei", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "5BoXZXTJvL", "title": "Beyond Size: How Gradients Shape Pruning Decisions in Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large Language Models (LLMs) with a billion or more parameters are prime targets for network pruning, which aims to reduce a portion of the network weights without compromising performance. Prior approaches such as Weights Magnitude, SparseGPT, and Wanda, either concentrated solely on weights or integrated weights with activations for sparsity. However, they overlooked the informative gradients derived from pretrained large language models. In this paper, we present a novel sparsity-centric pruning method for pretrained LLMs, termed **G**radient-**b**ased **L**anguage **M**odel **P**runer (**GBLM-Pruner**). Distinctively, GBLM-Pruner operates in a training-free manner by harnessing normalized gradients, and substantially outperforms competitive counterparts like SparseGPT and Wanda in multiple benchmarks. Intriguing, after incorporating gradients, the unstructured pruning method tends to reveal some structural patterns post-pruning, which mirrors the geometric interdependence inherent in the LLMs' parameter structure. Additionally, GBLM-Pruner functions without any subsequent retraining or weight updates to maintain its simplicity as other counterparts. Extensive evaluations on LLaMA-1 and LLaMA-2 across various language benchmarks and perplexity show that GBLM-Pruner surpasses magnitude pruning, Wanda (*weights+activations*), and SparseGPT (*weights+activations+weight update*) by significant margins. Our code and models will be publicly available.", "keywords": "Large Language Models;Gradient-based Language Model Pruner;Sparsity-centric Pruning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/7003005c708248982f147f0891366e9f1720f860.zip", "author": "Rocktim Jyoti Das;Liqun Ma;Zhiqiang Shen", "authorids": "~Rocktim_Jyoti_Das2;~Liqun_Ma1;~Zhiqiang_Shen1", "gender": ";M;", "homepage": ";;", "dblp": ";18/2859;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Rocktim_Jyoti_Das2;~Liqun_Ma1;~Zhiqiang_Shen1", "aff": ";Mohamed bin Zayed University of Artificial Intelligence;", "aff_domain": ";mbzuai.ac.ae;", "position": ";PhD student;", "bibtex": "@misc{\ndas2024beyond,\ntitle={Beyond Size: How Gradients Shape Pruning Decisions in Large Language Models},\nauthor={Rocktim Jyoti Das and Liqun Ma and Zhiqiang Shen},\nyear={2024},\nurl={https://openreview.net/forum?id=5BoXZXTJvL}\n}", "github": "", "project": "", "reviewers": "B7C2;dK5u;gsUn;k6s5", "site": "https://openreview.net/forum?id=5BoXZXTJvL", "pdf_size": 2882889, "rating": "3;5;5;5", "confidence": "4;4;5;4", "soundness": "2;3;3;2", "contribution": "2;2;2;2", "presentation": "2;3;3;4", "wc_summary": "30;74;25;39", "wc_strengths": "37;56;64;63", "wc_weaknesses": "232;96;149;164", "wc_questions": "9;5;45;1", "wc_review": "308;231;283;267", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1511;484;1100;735", "reply_reviewers": "0;0;0;0", "reply_authors": "4;1;3;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 42.0, 19.144189719076646 ], "wc_strengths_avg": [ 55.0, 10.8397416943394 ], "wc_weaknesses_avg": [ 160.25, 48.51997011540712 ], "wc_questions_avg": [ 15.0, 17.549928774784245 ], "wc_review_avg": [ 272.25, 27.94078560098123 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 957.5, 387.41999174023016 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1523552247744558102&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_country_unique_index": "0", "aff_country_unique": "United Arab Emirates" }, { "id": "5CBxA1l5RO", "title": "TimewarpVAE: Simultaneous Time-Warping and Representation Learning of Trajectories", "track": "main", "status": "Reject", "tldr": "", "abstract": "Human demonstrations of trajectories are an important source of training data for many machine learning problems. However, the difficulty of collecting human demonstration data for complex tasks makes learning efficient representations of those trajectories challenging. For many problems, such as for handwriting or for quasistatic dexterous manipulation, the exact timings of the trajectories should be factored from their spatial path characteristics. In this work, we propose TimewarpVAE, a fully differentiable manifold-learning algorithm that incorporates Dynamic Time Warping (DTW) to simultaneously learn both timing variations and latent factors of spatial variation. We show how the TimewarpVAE algorithm learns appropriate time alignments and meaningful representations of spatial variations in small handwriting and fork manipulation datasets. Our results have lower spatial reconstruction test error than baseline approaches and the learned low-dimensional representations can be used to efficiently generate semantically meaningful novel trajectories.", "keywords": "Representation Learning;Variational Auto-Encoder;Trajectory Data;Dynamic Time Warping", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Travers Rhodes;Daniel Lee", "authorids": "~Travers_Rhodes1;~Daniel_Lee1", "gender": "M;M", "homepage": "https://www.traversrhodes.com/;", "dblp": "223/0175;", "google_scholar": "Bf66PJEAAAAJ;J0l7wWwAAAAJ", "orcid": "0000-0002-2142-5388;", "linkedin": "travers-rhodes/;", "or_profile": "~Travers_Rhodes1;~Daniel_Lee1", "aff": "Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu", "position": "PhD student;Full Professor", "bibtex": "@misc{\nrhodes2024timewarpvae,\ntitle={Timewarp{VAE}: Simultaneous Time-Warping and Representation Learning of Trajectories},\nauthor={Travers Rhodes and Daniel Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=5CBxA1l5RO}\n}", "github": "", "project": "", "reviewers": "qQMc;sdX1;joWW;ui6D", "site": "https://openreview.net/forum?id=5CBxA1l5RO", "pdf_size": 1662677, "rating": "3;5;6;6", "confidence": "4;3;3;4", "soundness": "4;3;3;2", "contribution": "2;2;3;3", "presentation": "4;4;4;3", "wc_summary": "93;169;68;127", "wc_strengths": "23;36;37;81", "wc_weaknesses": "193;134;77;121", "wc_questions": "3;31;94;150", "wc_review": "312;370;276;479", "wc_reply_reviewers": "76;0;40;115", "wc_reply_authors": "694;357;735;792", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 114.25, 37.91684981640748 ], "wc_strengths_avg": [ 44.25, 21.924586655168667 ], "wc_weaknesses_avg": [ 131.25, 41.43896113562694 ], "wc_questions_avg": [ 69.5, 56.97587208634897 ], "wc_review_avg": [ 359.25, 76.84196444651842 ], "wc_reply_reviewers_avg": [ 57.75, 42.60501731017135 ], "wc_reply_authors_avg": [ 644.5, 169.59731719576226 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4503611427123033463&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "5COCYDObes", "title": "Ask more, know better: Reinforce-Learned Prompt Questions for Decision Making with Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) demonstrate their promise in tackling complicated\npractical challenges by combining action-based policies with chain of thought\n(CoT) reasoning. Having high-quality prompts on hand, however, is vital to the\nframework\u2019s effectiveness. Currently, these prompts are handcrafted utilising\nextensive human labor, resulting in CoT policies that frequently fail to generalise.\nHuman intervention is also required in order to develop grounding functions that\nensure low-level controllers appropriately process CoT reasoning. In this paper, we\ntake the first step towards a fully integrated end-to-end framework for task-solving\nin real settings employing complicated reasoning. To that purpose, we offer a new\nleader-follower bilevel framework capable of learning to ask relevant questions\n(prompts) and subsequently undertaking reasoning to guide the learning of actions\nto be performed in an environment. A good prompt should make introspective\nrevisions based on historical findings, leading the CoT to consider the anticipated\ngoals. A prompt-generator policy has its own aim in our system, allowing it to\nadapt to the action policy and automatically root the CoT process towards outputs\nthat lead to decisive, high-performing actions. Meanwhile, the action policy is\nlearning how to use the CoT outputs to take specific actions. Our empirical data\nreveal that our system outperforms leading methods in agent learning benchmarks\nsuch as Overcooked and FourRoom.", "keywords": "Large language models;reinforcement learning;machine learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/3b7a587091358a68219ffb7923459f90d1251bd7.pdf", "author": "Xue Yan;Yan Song;Xinyu Cui;Filippos Christianos;Haifeng Zhang;David Henry Mguni;Jun Wang", "authorids": "~Xue_Yan2;~Yan_Song5;~Xinyu_Cui3;~Filippos_Christianos1;~Haifeng_Zhang3;~David_Henry_Mguni1;~Jun_Wang2", "gender": "F;M;M;M;;M;M", "homepage": ";;;;https://pkuzhf.github.io;;http://www0.cs.ucl.ac.uk/staff/jun.wang/", "dblp": ";;;184/8334;93/7133-2;217/2369;w/JunWang12", "google_scholar": "5d0Upv8AAAAJ;;;q09VRMkAAAAJ;;K-_yzBsAAAAJ;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ", "orcid": ";;;;;;", "linkedin": "\u96ea-\u95eb-48926a188/;yan-song-079477173/;%E6%96%B0%E5%AE%87-%E5%B4%94-a8506b293/;;;;", "or_profile": "~Xue_Yan2;~Yan_Song5;~Xinyu_Cui3;~Filippos_Christianos1;~Haifeng_Zhang3;~David_Henry_Mguni1;~Jun_Wang2", "aff": "Institute of Automation, Chinese Academy of Sciences;University College London, University of London;Institute of Automation, Chinese Academy of Sciences;Huawei Technologies Ltd.;Institute of Automation, Chinese Academy of Sciences;Queen Mary University, London;University College London", "aff_domain": "ia.ac.cn;ucl.ac.uk;ia.ac.cn;huawei.com;ia.ac.cn;qmul.ac.uk;ucl.ac.uk", "position": "PhD student;PhD student;PhD student;Researcher;Associate Professor;Lecturer;Professor", "bibtex": "@misc{\nyan2024ask,\ntitle={Ask more, know better: Reinforce-Learned Prompt Questions for Decision Making with Large Language Models},\nauthor={Xue Yan and Yan Song and Xinyu Cui and Filippos Christianos and Haifeng Zhang and David Henry Mguni and Jun Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=5COCYDObes}\n}", "github": "", "project": "", "reviewers": "UBnq;5KxL;g6Ja;WqYj", "site": "https://openreview.net/forum?id=5COCYDObes", "pdf_size": 1083443, "rating": "3;5;6;6", "confidence": "5;4;3;4", "soundness": "1;3;3;2", "contribution": "1;2;3;2", "presentation": "1;2;4;3", "wc_summary": "31;63;63;76", "wc_strengths": "17;34;88;36", "wc_weaknesses": "226;144;124;136", "wc_questions": "368;16;57;186", "wc_review": "642;257;332;434", "wc_reply_reviewers": "260;76;0;160", "wc_reply_authors": "1878;752;507;1527", "reply_reviewers": "1;1;0;1", "reply_authors": "5;2;2;5", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 58.25, 16.60383991732033 ], "wc_strengths_avg": [ 43.75, 26.592997198510737 ], "wc_weaknesses_avg": [ 157.5, 40.18395202067612 ], "wc_questions_avg": [ 156.75, 137.15205977308543 ], "wc_review_avg": [ 416.25, 144.686514575478 ], "wc_reply_reviewers_avg": [ 124.0, 96.78842906050289 ], "wc_reply_authors_avg": [ 1166.0, 557.4365434737841 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844386, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10287892105655605990&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;2;0;3;1", "aff_unique_norm": "Chinese Academy of Sciences;University College London;Huawei;Queen Mary University of London", "aff_unique_dep": "Institute of Automation;;Huawei Technologies;", "aff_unique_url": "http://www.ia.cas.cn;https://www.ucl.ac.uk;https://www.huawei.com;https://www.qmul.ac.uk", "aff_unique_abbr": "CAS;UCL;Huawei;QMUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;1;0;0;0;1;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Interpreting CLIP's Image Representation via Text-Based Decomposition", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19436", "id": "5Ca9sSzuDp", "author_site": "Yossi Gandelsman, Alexei Efros, Jacob Steinhardt", "tldr": "", "abstract": "We investigate the CLIP image encoder by analyzing how individual model components affect the final representation. We decompose the image representation as a sum across individual image patches, model layers, and attention heads, and use CLIP's text representation to interpret the summands. Interpreting the attention heads, we characterize each head's role by automatically finding text representations that span its output space, which reveals property-specific roles for many heads (e.g. location or shape). Next, interpreting the image patches, we uncover an emergent spatial localization within CLIP. Finally, we use this understanding to remove spurious features from CLIP and to create a strong zero-shot image segmenter. Our results indicate that scalable understanding of transformer models is attainable and can be used to repair and improve models.", "keywords": "CLIP;interpretability;explainability", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Yossi Gandelsman;Alexei A Efros;Jacob Steinhardt", "authorids": "~Yossi_Gandelsman2;~Alexei_A_Efros1;~Jacob_Steinhardt1", "gender": ";M;M", "homepage": ";http://www.eecs.berkeley.edu/~efros/;https://yossi.gandelsman.com", "dblp": "35/10625;40/6158;232/1765", "google_scholar": ";https://scholar.google.com.tw/citations?user=d97bGd8AAAAJ;https://scholar.google.co.il/citations?user=71L4yYMAAAAJ", "orcid": ";0000-0001-5720-8070;0000-0003-1259-3387", "linkedin": ";alexei-efros-890736a3/;yossi-gandelsman-26582981/", "or_profile": "~Jacob_Steinhardt1;~Alyosha_Efros1;~Yossi_Gandelsman1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "Assistant Professor;Professor;PhD student", "bibtex": "@inproceedings{\ngandelsman2024interpreting,\ntitle={Interpreting {CLIP}'s Image Representation via Text-Based Decomposition},\nauthor={Yossi Gandelsman and Alexei A Efros and Jacob Steinhardt},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5Ca9sSzuDp}\n}", "github": "", "project": "", "reviewers": "bDCR;eYDd;uT2r;ETim", "pdf_size": 24318917, "rating": "8;8;8;8", "confidence": "4;4;2;4", "soundness": "4;3;3;3", "contribution": "4;3;3;3", "presentation": "4;4;3;4", "wc_summary": "70;97;121;131", "wc_strengths": "109;71;28;149", "wc_weaknesses": "68;209;23;104", "wc_questions": "85;51;84;8", "wc_review": "332;428;256;392", "wc_reply_reviewers": "27;20;0;70", "wc_reply_authors": "408;561;453;279", "reply_reviewers": "1;1;0;2", "reply_authors": "1;1;2;2", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.75, 23.562417108607512 ], "wc_strengths_avg": [ 89.25, 44.846265173367556 ], "wc_weaknesses_avg": [ 101.0, 68.6403671318853 ], "wc_questions_avg": [ 57.0, 31.424512724941337 ], "wc_review_avg": [ 352.0, 65.1766829472013 ], "wc_reply_reviewers_avg": [ 29.25, 25.52817071393875 ], "wc_reply_authors_avg": [ 425.25, 101.09988872397437 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6302865657616014104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5Ca9sSzuDp", "pdf": "https://openreview.net/pdf?id=5Ca9sSzuDp", "email": "berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Physics-Regulated Deep Reinforcement Learning: Invariant Embeddings", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19435", "id": "5Dwqu5urzs", "author_site": "Hongpeng Cao, Yanbing Mao, Lui Sha, Marco Caccamo", "tldr": "", "abstract": "This paper proposes the Phy-DRL: a physics-regulated deep reinforcement learning (DRL) framework for safety-critical autonomous systems. The Phy-DRL has three distinguished invariant-embedding designs: i) residual action policy (i.e., integrating data-driven-DRL action policy and physics-model-based action policy), ii) automatically constructed safety-embedded reward, and iii) physics-model-guided neural network (NN) editing, including link editing and activation editing. Theoretically, the Phy-DRL exhibits 1) a mathematically provable safety guarantee and 2) strict compliance of critic and actor networks with physics knowledge about the action-value function and action policy. Finally, we evaluate the Phy-DRL on a cart-pole system and a quadruped robot. The experiments validate our theoretical results and demonstrate that Phy-DRL features guaranteed safety compared to purely data-driven DRL and solely model-based design while offering remarkably fewer learning parameters and fast training towards safety guarantee.", "keywords": "Physics-informed deep reinforcement learning;Safety-critical autonomous systems", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/289020ab57bf742ddf172c907d97beacd14306b7.zip", "author": "Hongpeng Cao;Yanbing Mao;Lui Sha;Marco Caccamo", "authorids": "~Hongpeng_Cao1;~Yanbing_Mao1;~Lui_Sha1;~Marco_Caccamo2", "gender": "M;M;M;", "homepage": "https://rtsl.cps.mw.tum.de/view_member?id=15;https://ymao578.github.io/index.html;https://ece.illinois.edu/directory/profile/lrs;https://rtsl.cps.mw.tum.de/personal_page/mcaccamo/", "dblp": "285/4627;141/4975;67/5282.html;86/450", "google_scholar": "47WX07UAAAAJ;kN1IRpsAAAAJ;https://scholar.google.com.tw/citations?user=SlXqNooAAAAJ;", "orcid": "0000-0003-4717-8714;;;", "linkedin": "hongpeng-cao-195299206/?originalSubdomain=de;;;", "or_profile": "~Hongpeng_Cao1;~Yanbing_Mao1;~Lui_Sha1;~Marco_Caccamo2", "aff": "Technische Universit\u00e4t M\u00fcnchen;Wayne State University;Department of Computer Science;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": "tum.de;wayne.edu;cs.illinois.edu;tum.de", "position": "PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncao2024physicsregulated,\ntitle={Physics-Regulated Deep Reinforcement Learning: Invariant Embeddings},\nauthor={Hongpeng Cao and Yanbing Mao and Lui Sha and Marco Caccamo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5Dwqu5urzs}\n}", "github": "", "project": "", "reviewers": "GsDG;bsXP;BXs3;4Lie", "pdf_size": 8326933, "rating": "6;8;8;8", "confidence": "3;2;3;2", "soundness": "2;3;3;2", "contribution": "3;3;3;2", "presentation": "2;3;3;1", "wc_summary": "106;63;49;109", "wc_strengths": "31;45;69;81", "wc_weaknesses": "121;51;137;103", "wc_questions": "44;4;47;56", "wc_review": "302;163;302;349", "wc_reply_reviewers": "299;73;83;15", "wc_reply_authors": "3313;2024;1690;1374", "reply_reviewers": "2;2;2;1", "reply_authors": "8;8;6;4", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 81.75, 26.242856170775315 ], "wc_strengths_avg": [ 56.5, 19.615045245933032 ], "wc_weaknesses_avg": [ 103.0, 32.341923257592455 ], "wc_questions_avg": [ 37.75, 19.97967717456916 ], "wc_review_avg": [ 279.0, 69.6670653896086 ], "wc_reply_reviewers_avg": [ 117.5, 107.9571674322738 ], "wc_reply_authors_avg": [ 2100.25, 736.9397448231435 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 6.5, 1.6583123951777 ], "replies_avg": [ 41, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14670243720961949725&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=5Dwqu5urzs", "pdf": "https://openreview.net/pdf?id=5Dwqu5urzs", "email": "tum.de;wayne.edu;cs.illinois.edu;tum.de", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Wayne State University;Unknown Institution", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.tum.de;https://wayne.edu;", "aff_unique_abbr": "TUM;WSU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States;" }, { "id": "5E1HnzEBSf", "title": "Local Superior Soups: A Catalyst for Reducing Communication Rounds in Federated Learning with Pre-trained Model", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning (FL) is a learning paradigm that enables collaborative training of models using decentralized data. \nRecently, the utilization of pre-trained weight initialization in FL has been demonstrated to effectively improve model performance. \nHowever, the current pre-trained models have become increasingly parameter-rich. \nThe sheer scale of model parameters introduces substantial communication rounds challenges during their adaptation to FL.\nTo address these communication cost issues and elevate the performance of pre-trained model adaptation in FL, we propose an innovative model interpolation-based local training technique called ``Local Superior Soups.''\nOur method promotes local training across different clients, encouraging the exploration of a connected low-loss basin within a few communication rounds through regularized model interpolation. \nThis approach serves as a facilitator for pre-trained model adaptation in FL.\nWe demonstrated its effectiveness and efficiency across diverse widely-used FL datasets.", "keywords": "Federated Learning; Model Soup; Pre-Trained Model Fine-Tuning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/17a08d39903392bcd1e2b91833dc56cabe3d9c3c.pdf", "author": "Minghui Chen;Meirui Jiang;Qi Dou;Zehua Wang;Xiaoxiao Li", "authorids": "~Minghui_Chen1;~Meirui_Jiang2;~Qi_Dou2;~Zehua_Wang1;~Xiaoxiao_Li1", "gender": "M;F;M;Unspecified;M", "homepage": "https://chenminghui.com/;https://www.cse.cuhk.edu.hk/~qdou;https://people.ece.ubc.ca/zwang/;https://xxlya.github.io/;https://meiruijiang.github.io/MeiruiJiang/", "dblp": ";165/7846;90/10799;71/8042;285/5480", "google_scholar": "aDKyh4cAAAAJ;https://scholar.google.com.hk/citations?user=iHh7IJQAAAAJ;https://scholar.google.ca/citations?user=pquTtPYAAAAJ;sdENOQ4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-3416-9950;;;0000-0003-4228-8420", "linkedin": "minghui-chen-75a046210/;;wangzehua/;;", "or_profile": "~Minghui_Chen1;~Qi_Dou2;~Zehua_Wang1;~Xiaoxiao_Li1;~Meirui_JIANG1", "aff": "University of British Columbia;The Chinese University of Hong Kong;University of British Columbia;University of British Columbia;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "ubc.ca;cuhk.edu.hk;ubc.ca;ece.ubc.ca;cse.cuhk.edu.hk", "position": "PhD student;Assistant Professor;Researcher;Assistant Professor;PhD student", "bibtex": "@misc{\nchen2024local,\ntitle={Local Superior Soups: A Catalyst for Reducing Communication Rounds in Federated Learning with Pre-trained Model},\nauthor={Minghui Chen and Meirui Jiang and Qi Dou and Zehua Wang and Xiaoxiao Li},\nyear={2024},\nurl={https://openreview.net/forum?id=5E1HnzEBSf}\n}", "github": "", "project": "", "reviewers": "gxja;w5Zc;w8kd;qnQ5", "site": "https://openreview.net/forum?id=5E1HnzEBSf", "pdf_size": 1010806, "rating": "3;3;5;6", "confidence": "4;5;4;4", "soundness": "3;3;2;3", "contribution": "2;2;2;3", "presentation": "4;3;2;2", "wc_summary": "46;117;165;51", "wc_strengths": "24;21;105;32", "wc_weaknesses": "130;80;411;274", "wc_questions": "4;11;282;3", "wc_review": "204;229;963;360", "wc_reply_reviewers": "0;43;0;0", "wc_reply_authors": "306;400;586;297", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 94.75, 49.29693195321591 ], "wc_strengths_avg": [ 45.5, 34.586847211042524 ], "wc_weaknesses_avg": [ 223.75, 129.46114281899415 ], "wc_questions_avg": [ 75.0, 119.55124424279322 ], "wc_review_avg": [ 439.0, 308.27828337396716 ], "wc_reply_reviewers_avg": [ 10.75, 18.619546181365433 ], "wc_reply_authors_avg": [ 397.25, 116.20106496930224 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yMmn_TLjQpYJ:scholar.google.com/&scioq=Local+Superior+Soups:+A+Catalyst+for+Reducing+Communication+Rounds+in+Federated+Learning+with+Pre-trained+Model&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "University of British Columbia;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.ubc.ca;https://www.cuhk.edu.hk", "aff_unique_abbr": "UBC;CUHK", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "Canada;China" }, { "title": "The Effective Horizon Explains Deep RL Performance in Stochastic Environments", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19434", "id": "5ES5Hdlbxw", "author_site": "Cassidy Laidlaw, Banghua Zhu, Stuart Russell, Anca Dragan", "tldr": "", "abstract": "Reinforcement learning (RL) theory has largely focused on proving minimax sample complexity bounds. These require strategic exploration algorithms that use relatively limited function classes for representing the policy or value function. Our goal is to explain why deep RL algorithms often perform well in practice, despite using random exploration and much more expressive function classes like neural networks. Our work arrives at an explanation by showing that many stochastic MDPs can be solved by performing only a few steps of value iteration on the random policy\u2019s Q function and then acting greedily. When this is true, we find that it is possible to separate the exploration and learning components of RL, making it much easier to analyze. We introduce a new RL algorithm, SQIRL, that iteratively learns a near-optimal policy by exploring randomly to collect rollouts and then performing a limited number of steps of fitted-Q iteration over those roll- outs. We find that any regression algorithm that satisfies basic in-distribution generalization properties can be used in SQIRL to efficiently solve common MDPs. This can explain why deep RL works with complex function approximators like neural networks, since it is empirically established that neural networks generalize well in-distribution. Furthermore, SQIRL explains why random exploration works well in practice, since we show many environments can be solved by effectively estimating the random policy\u2019s Q-function and then applying zero or a few steps of value iteration. We leverage SQIRL to derive instance-dependent sample complexity bounds for RL that are exponential only in an \u201ceffective horizon\u201d of lookahead\u2014which is typically much smaller than the full horizon\u2014and on the complexity of the class used for function approximation. Empirically, we also find that SQIRL performance strongly correlates with PPO and DQN performance in a variety of stochastic environments, supporting that our theoretical analysis is predictive of practical performance. Our code and data are available at https://github.com/cassidylaidlaw/effective-horizon.", "keywords": "reinforcement learning;effective horizon;RL theory;theory of reinforcement learning;instance-dependent bounds;empirical validation of theory", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Cassidy Laidlaw;Banghua Zhu;Stuart Russell;Anca Dragan", "authorids": "~Cassidy_Laidlaw1;~Banghua_Zhu1;~Stuart_Russell1;~Anca_Dragan1", "gender": "M;M;M;F", "homepage": "https://cassidylaidlaw.com;https://people.eecs.berkeley.edu/~banghua/;https://people.eecs.berkeley.edu/~russell/;http://www.ancadragan.com/", "dblp": "241/5375;204/5394;;", "google_scholar": "DzeJ67UAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=KJGrjCAAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Cassidy_Laidlaw1;~Banghua_Zhu1;~Stuart_Russell1;~Anca_Dragan1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nlaidlaw2024the,\ntitle={The Effective Horizon Explains Deep {RL} Performance in Stochastic Environments},\nauthor={Cassidy Laidlaw and Banghua Zhu and Stuart Russell and Anca Dragan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5ES5Hdlbxw}\n}", "github": "", "project": "", "reviewers": "kbuB;UBNV;1dWJ;yWoc", "pdf_size": 1650168, "rating": "5;6;6;6", "confidence": "2;4;3;3", "soundness": "3;3;3;3", "contribution": "2;3;2;2", "presentation": "3;4;3;3", "wc_summary": "42;120;219;91", "wc_strengths": "13;122;171;154", "wc_weaknesses": "35;196;508;254", "wc_questions": "67;92;4;1", "wc_review": "157;530;902;500", "wc_reply_reviewers": "0;13;225;207", "wc_reply_authors": "438;799;1126;1168", "reply_reviewers": "0;1;2;2", "reply_authors": "1;1;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 118.0, 64.6335826022355 ], "wc_strengths_avg": [ 115.0, 61.461369981476984 ], "wc_weaknesses_avg": [ 248.25, 170.0799444379025 ], "wc_questions_avg": [ 41.0, 39.515819616958474 ], "wc_review_avg": [ 522.25, 263.71042357100714 ], "wc_reply_reviewers_avg": [ 111.25, 105.04374088921243 ], "wc_reply_authors_avg": [ 882.75, 293.8344559441591 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8388709054180809828&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "openreview": "https://openreview.net/forum?id=5ES5Hdlbxw", "pdf": "https://openreview.net/pdf?id=5ES5Hdlbxw", "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Weatherproofing Retrieval for Localization with Generative AI and Geometric Consistency", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19433", "id": "5EniAcsO7f", "author_site": "Yannis Kalantidis, Mert Bulent SARIYILDIZ, Rafael Rezende, Philippe Weinzaepfel, Diane Larlus, Gabriela Csurka", "tldr": "", "abstract": "State-of-the-art visual localization approaches generally rely on a first image retrieval step whose role is crucial. Yet, retrieval often struggles when facing varying conditions, due to e.g. weather or time of day, with dramatic consequences on the visual localization accuracy. In this paper, we improve this retrieval step and tailor it to the final localization task. Among the several changes we advocate for, we propose to synthesize variants of the training set images, obtained from generative text-to-image models, in order to automatically expand the training set towards a number of nameable variations that particularly hurt visual localization. After expanding the training set, we propose a training approach that leverages the specificities and the underlying geometry of this mix of real and synthetic images. We experimentally show that those changes translate into large improvements for the most challenging visual localization datasets.", "keywords": "visual localization;image retrieval;synthetic data;domain shift;geometric consistency;long-term visual localization;ret4loc;image alteration", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yannis Kalantidis;Mert B\u00fclent Sar\u0131y\u0131ld\u0131z;Rafael S. Rezende;Philippe Weinzaepfel;Diane Larlus;Gabriela Csurka", "authorids": "~Yannis_Kalantidis2;~Mert_B\u00fclent_Sar\u0131y\u0131ld\u0131z1;~Rafael_S._Rezende1;~Philippe_Weinzaepfel1;~Diane_Larlus1;~Gabriela_Csurka2", "gender": "M;M;M;M;F;F", "homepage": "https://www.skamalas.com/;https://mbsariyildiz.github.io;https://europe.naverlabs.com/people_user/rafael-sampaio-de-rezende/;https://europe.naverlabs.com/people_user/Philippe-Weinzaepfel/;https://dlarlus.github.io/;https://europe.naverlabs.com/people_user/gabriela-csurka-khedari", "dblp": "33/8693;247/9362;200/8093;29/9989;48/4033;c/GabrielaCsurka", "google_scholar": "QJZQgN8AAAAJ;9vpQ9tIAAAAJ;https://scholar.google.com/citations?hl=fr;https://scholar.google.fr/citations?user=LSxIJ5cAAAAJ;https://scholar.google.fr/citations?user=nI2oJqkAAAAJ;https://scholar.google.fr/citations?user=PXm1lPAAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;gabriela-csurka-0387bb2a/", "or_profile": "~Yannis_Kalantidis2;~Mert_B\u00fclent_Sar\u0131y\u0131ld\u0131z1;~Rafael_S._Rezende1;~Philippe_Weinzaepfel1;~Diane_Larlus1;~Gabriela_Csurka2", "aff": "Naver Labs Europe;Naver Labs Europe;Naver Labs Europe;Naver Labs Europe;NAVER LABS Europe;Naver Labs Europe", "aff_domain": "naverlabs.com;naverlabs.com;naverlabs.com;naverlabs.com;naverlabs.com;naverlabs.com", "position": "Research Scientist;Researcher;Research Scientist;Research Scientist;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nkalantidis2024weatherproofing,\ntitle={Weatherproofing Retrieval for Localization with Generative {AI} and Geometric Consistency},\nauthor={Yannis Kalantidis and Mert B{\\\"u}lent Sar{\\i}y{\\i}ld{\\i}z and Rafael S. Rezende and Philippe Weinzaepfel and Diane Larlus and Gabriela Csurka},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5EniAcsO7f}\n}", "github": "", "project": "", "reviewers": "dtiA;oYk5;Hu6J", "pdf_size": 20924613, "rating": "6;6;8", "confidence": "5;3;3", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "87;69;69", "wc_strengths": "242;28;80", "wc_weaknesses": "412;145;62", "wc_questions": "5;8;62", "wc_review": "746;250;273", "wc_reply_reviewers": "178;21;0", "wc_reply_authors": "1749;806;565", "reply_reviewers": "3;1;0", "reply_authors": "3;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.0, 8.48528137423857 ], "wc_strengths_avg": [ 116.66666666666667, 91.13116310510301 ], "wc_weaknesses_avg": [ 206.33333333333334, 149.32366040100797 ], "wc_questions_avg": [ 25.0, 26.19160170741759 ], "wc_review_avg": [ 423.0, 228.5884219873497 ], "wc_reply_reviewers_avg": [ 66.33333333333333, 79.4243175747971 ], "wc_reply_authors_avg": [ 1040.0, 510.9018170516392 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7THObuIPnnsJ:scholar.google.com/&scioq=Weatherproofing+Retrieval+for+Localization+with+Generative+AI+and+Geometric+Consistency&hl=en&as_sdt=0,44", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5EniAcsO7f", "pdf": "https://openreview.net/pdf?id=5EniAcsO7f", "email": "naverlabs.com;naverlabs.com;naverlabs.com;naverlabs.com;naverlabs.com;naverlabs.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "NAVER LABS", "aff_unique_dep": "", "aff_unique_url": "https://labs.naver.com", "aff_unique_abbr": "NLE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "Unknown;France" }, { "id": "5EtSvYUU0v", "title": "Connecting NTK and NNGP: A Unified Theoretical Framework for Neural Network Learning Dynamics in the Kernel Regime", "track": "main", "status": "Reject", "tldr": "", "abstract": "Artificial neural networks (ANNs) have revolutionized machine learning in recent years, but a complete theoretical framework for their learning process is still lacking. Substantial theoretical advances have been achieved for infinitely wide networks. In this regime, two disparate theoretical frameworks have been used, in which the network\u2019s output is described using kernels: one framework is based on the Neural Tangent Kernel (NTK), which assumes linearized gradient descent dynamics, while the Neural Network Gaussian Process (NNGP) kernel assumes a Bayesian framework. However, the relation between these two frameworks and between their underlying sets of assumptions has remained elusive. This work unifies these two distinct theories using gradient descent learning dynamics with an additional small noise in an ensemble of randomly initialized infinitely wide deep networks. We derive an exact analytical expression for the network input-output function during and after learning and introduce a new time-dependent Neural Dynamical Kernel (NDK) from which both NTK and NNGP kernels can be derived. We identify two important learning phases characterized by different time scales: gradient-driven and diffusive learning. In the initial gradient-driven learning phase, the dynamics is dominated by deterministic gradient descent, and is adequately described by the NTK theory. This phase is followed by the slow diffusive learning stage, during which the network parameters sample the solution space, ultimately approaching the equilibrium posterior distribution corresponding to NNGP. Combined with numerical evaluations on synthetic and benchmark datasets, we provide novel insights into the different roles of initialization, regularization, and network depth, as well as phenomena such as early stopping and representational drift. This work closes the gap between the NTK and NNGP theories, providing a comprehensive framework for understanding the learning process of deep neural networks in the infinite width limit.", "keywords": "Learning dynamics;Neural tangent kernel;Neural network Gaussian process;Infinite width limit;Representational drift;Statistical mechanics", "primary_area": "learning theory", "supplementary_material": "/attachment/78a56dc0d1eb798f652b92220e09b57b7dd3f4c8.pdf", "author": "Yehonatan Avidan;Qianyi Li;Haim Sompolinsky", "authorids": "~Yehonatan_Avidan1;~Qianyi_Li1;~Haim_Sompolinsky1", "gender": ";F;M", "homepage": ";;", "dblp": "274/0837;280/1128;33/5545", "google_scholar": "-pnXrNwAAAAJ;LbzGoc8AAAAJ;", "orcid": ";0000-0002-1448-4566;", "linkedin": ";;", "or_profile": "~Yehonatan_Avidan1;~Qianyi_Li1;~Haim_Sompolinsky1", "aff": ";Harvard University, Harvard University;", "aff_domain": ";g.harvard.edu;", "position": ";PhD student;", "bibtex": "@misc{\navidan2024connecting,\ntitle={Connecting {NTK} and {NNGP}: A Unified Theoretical Framework for Neural Network Learning Dynamics in the Kernel Regime},\nauthor={Yehonatan Avidan and Qianyi Li and Haim Sompolinsky},\nyear={2024},\nurl={https://openreview.net/forum?id=5EtSvYUU0v}\n}", "github": "", "project": "", "reviewers": "2yMn;2CVF;Bpa7;88ey", "site": "https://openreview.net/forum?id=5EtSvYUU0v", "pdf_size": 4473456, "rating": "3;5;8;8", "confidence": "3;5;4;3", "soundness": "3;3;4;3", "contribution": "4;3;3;4", "presentation": "1;2;3;4", "wc_summary": "174;67;113;161", "wc_strengths": "105;54;42;150", "wc_weaknesses": "150;108;202;189", "wc_questions": "152;376;132;124", "wc_review": "581;605;489;624", "wc_reply_reviewers": "262;24;155;108", "wc_reply_authors": "627;717;683;407", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 128.75, 42.2751404492049 ], "wc_strengths_avg": [ 87.75, 43.0254285277904 ], "wc_weaknesses_avg": [ 162.25, 36.70405290972647 ], "wc_questions_avg": [ 196.0, 104.4222198576529 ], "wc_review_avg": [ 574.75, 51.799493240764434 ], "wc_reply_reviewers_avg": [ 137.25, 85.96329158425705 ], "wc_reply_authors_avg": [ 608.5, 120.69279183116114 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10431141825192819348&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "5F0WDt9CjA", "title": "PIANO PERFORMANCE EVALUATION DATASET WITH MULTI-LEVEL PERCEPTUAL FEATURES", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This study aims to build a comprehensive dataset that enables the automatic evaluation of piano performances. In real-world piano performance, especially within the realm of classical piano music, we encounter a vast spectrum of performance variations. The challenge lies in how to effectively evaluate these performances. We must consider three critical aspects: 1) It is essential to gauge how performers\nperceive and express, and listeners perceive the music, not just the compositional characteristics of music such as beat stability measured from a metronome. 2) Beyond fundamental elements like pitch and duration, we must also embrace higher-level features such as interpretation. 3) Such evaluation should be done by experts to discern the nuanced performances. Regrettably, there exists no dataset\nthat addresses these challenging evaluation tasks. Therefore, we introduce a pioneering dataset PercePiano, annotated by music experts, with more extensive features capable of representing these nuanced aspects effectively. It encapsulates piano performance with a wide range of perceptual features that are recognized by musicians. Our evaluation benchmark includes a novel metric designed to\naccommodate the inherent subjectivity of perception. For the baseline models, we pinpoint a significant issue in current transformer-based models. We in response introduce a new baseline that leverages hierarchical levels of performance, which shows results comparable to that of large pre-trained models. In conclusion, our research opens new possibilities for comprehensive piano performance evaluation.", "keywords": "dataset;music;perception;piano performance evaluation;hierarchical attention network", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Jisoo Park;Jongho Kim;Jeong Mi Park;Ahyeon Choi;Wen-Syan Li;Jonghwa Park;seung-won hwang", "authorids": "~Jisoo_Park2;~Jongho_Kim2;~Jeong_Mi_Park1;~Ahyeon_Choi1;~Wen-Syan_Li2;~Jonghwa_Park1;~seung-won_hwang2", "gender": "F;;F;F;M;;", "homepage": ";;https://www.researchgate.net/profile/Jeong-Mi-Park;;https://aml.gsds.snu.ac.kr/home;http/joapkcom;http://seungwonh.github.io", "dblp": ";;;;60/5978.html;;h/SeungwonHwang", "google_scholar": ";;;https://scholar.google.co.kr/citations?user=KEXGGRMAAAAJ;;;63bBmc3mYrAC", "orcid": "0009-0009-0857-8927;;;;;;", "linkedin": ";;;;;;", "or_profile": "~Jisoo_Park2;~Jongho_Kim2;~Jeong_Mi_Park1;~Ahyeon_Choi1;~Wen-Syan_Li2;~Jonghwa_Park1;~seung-won_hwang2", "aff": "Seoul National University;;Seoul National University;Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "MS student;;Lecturer;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\npark2024piano,\ntitle={{PIANO} {PERFORMANCE} {EVALUATION} {DATASET} {WITH} {MULTI}-{LEVEL} {PERCEPTUAL} {FEATURES}},\nauthor={Jisoo Park and Jongho Kim and Jeong Mi Park and Ahyeon Choi and Wen-Syan Li and Jonghwa Park and seung-won hwang},\nyear={2024},\nurl={https://openreview.net/forum?id=5F0WDt9CjA}\n}", "github": "", "project": "", "reviewers": "iYeg;vNEe;Pd1L;zHsN;ZEzP;sviL", "site": "https://openreview.net/forum?id=5F0WDt9CjA", "pdf_size": 746270, "rating": "3;3;3;3;6;6", "confidence": "4;3;4;4;3;3", "soundness": "3;3;2;2;3;2", "contribution": "2;2;2;2;2;2", "presentation": "2;2;3;2;4;3", "wc_summary": "162;73;31;57;29;9", "wc_strengths": "125;79;34;50;22;9", "wc_weaknesses": "260;243;319;509;51;9", "wc_questions": "3;24;90;27;1;9", "wc_review": "550;419;474;643;103;36", "wc_reply_reviewers": "0;0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;0;0;0;0;0", "reply_authors": "0;0;0;0;0;0", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999299 ], "wc_summary_avg": [ 60.166666666666664, 49.974715829329355 ], "wc_strengths_avg": [ 53.166666666666664, 39.001780586133364 ], "wc_weaknesses_avg": [ 231.83333333333334, 167.1789626584504 ], "wc_questions_avg": [ 25.666666666666668, 30.39554060859732 ], "wc_review_avg": [ 370.8333333333333, 224.7171679145934 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:F2v9CO1z7x0J:scholar.google.com/&scioq=PIANO+PERFORMANCE+EVALUATION+DATASET+WITH+MULTI-LEVEL+PERCEPTUAL+FEATURES&hl=en&as_sdt=0,44", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "5GX6s5TpmV", "title": "The Certification Paradox: Certifications Admit Better Evasion Attacks", "track": "main", "status": "Reject", "tldr": "", "abstract": "In guaranteeing the absence of adversarial examples in bounded spaces, certification mechanisms play an important role in demonstrating neural network robustness. Within this work we ask if certifications themselves can potentially compromise the very models they help to protect? By demonstrating a new attack surface that exploits certified guarantees to construct norm minimising evasion attacks, we demonstrate the heretofore unexplored risks inherent in releasing certifications. Our new *Certification Aware Attack* produces smaller, more difficult to detect adversarial examples more than $74$% of the time than comparable attacks, while reducing the median perturbation norm by more than $10$%. That this is achievable in significantly less computational time highlights an apparent paradox---that releasing certifications can reduce security.", "keywords": "certified robustness;adversarial attacks;risk;randomised smoothing", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Andrew Craig Cullen;Shijie Liu;Paul Montague;Sarah Monazam Erfani;Benjamin I. P. Rubinstein", "authorids": "~Andrew_Craig_Cullen1;~Shijie_Liu4;~Paul_Montague1;~Sarah_Monazam_Erfani1;~Benjamin_I._P._Rubinstein1", "gender": "M;M;M;;M", "homepage": "https://www.andrewcraigcullen.com;https://github.com/shijiel2;;https://people.eng.unimelb.edu.au/smonazam/;http://www.bipr.net/", "dblp": "238/6828;;50/805;136/0170;90/1092", "google_scholar": "BeXBviIAAAAJ;https://scholar.google.com.au/citations?user=lH5nxwMAAAAJ;;https://scholar.google.com.au/citations?user=Jq9ocx4AAAAJ;https://scholar.google.com.au/citations?user=hMG_gR4AAAAJ", "orcid": "0000-0001-8243-6470;0009-0008-2980-6266;0000-0001-9461-7471;;0000-0002-2947-6980", "linkedin": ";;;;benjaminrubinstein/", "or_profile": "~Andrew_Craig_Cullen1;~Shijie_Liu4;~Paul_Montague1;~Sarah_Monazam_Erfani1;~Benjamin_I._P._Rubinstein1", "aff": "The University of Melbourne;The University of Melbourne;Defence Science and Technology Group;The University of Melbourne;The University of Melbourne", "aff_domain": "unimelb.edu.au;unimelb.edu.au;dst.defence.gov.au;unimelb.edu.au;unimelb.edu.au", "position": "Postdoc;PhD student;Researcher;Associate Professor;Associate Professor", "bibtex": "@misc{\ncullen2024the,\ntitle={The Certification Paradox: Certifications Admit Better Evasion Attacks},\nauthor={Andrew Craig Cullen and Shijie Liu and Paul Montague and Sarah Monazam Erfani and Benjamin I. P. Rubinstein},\nyear={2024},\nurl={https://openreview.net/forum?id=5GX6s5TpmV}\n}", "github": "", "project": "", "reviewers": "Kwhk;xpHV;Z9pA;VDGn", "site": "https://openreview.net/forum?id=5GX6s5TpmV", "pdf_size": 1206878, "rating": "1;3;5;6", "confidence": "5;3;3;2", "soundness": "2;2;2;3", "contribution": "3;3;3;3", "presentation": "2;2;3;2", "wc_summary": "524;49;75;207", "wc_strengths": "98;20;37;58", "wc_weaknesses": "213;240;447;34", "wc_questions": "108;68;30;5", "wc_review": "943;377;589;304", "wc_reply_reviewers": "412;174;502;18", "wc_reply_authors": "1345;915;1251;231", "reply_reviewers": "2;1;2;1", "reply_authors": "4;4;3;2", "rating_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 213.75, 188.87479318320908 ], "wc_strengths_avg": [ 53.25, 29.13224158900238 ], "wc_weaknesses_avg": [ 233.5, 146.4965869909603 ], "wc_questions_avg": [ 52.75, 38.99599338393625 ], "wc_review_avg": [ 553.25, 248.17974836799235 ], "wc_reply_reviewers_avg": [ 276.5, 191.40206372973097 ], "wc_reply_authors_avg": [ 935.5, 437.0271730682201 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9258889211887232, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IaUUpoRxR7UJ:scholar.google.com/&scioq=The+Certification+Paradox:+Certifications+Admit+Better+Evasion+Attacks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Melbourne;Defence Science and Technology Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimelb.edu.au;https://www.dst.defence.gov.au/", "aff_unique_abbr": "UniMelb;DST Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Australia" }, { "id": "5Gt68fnttu", "title": "Dynamic Electroencephalography Representation Learning for Improved Epileptic Seizure Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Epileptic seizure is an abnormal brain activity that affects millions of people worldwide. Effectively detecting seizures from electroencephalography (EEG) signals with automated algorithms is essential for seizure diagnosis and treatment. Although much research has been proposed to learn EEG representations, most of them neglect the detection latency when it comes to real-world clinical scenarios where the inputs are streaming. Moreover, they fail to either capture the underlying dynamics of brain activities or localize seizure regions. To this end, we propose an improved seizure detection task named onset detection, which identifies both the presence and the specific timestamps of seizure events, and two new metrics to quantify the timeliness of detection methods. We further introduce the Dynamic Seizure Network, a framework for EEG representation learning, which models the evolutionary brain states and dynamic brain connectivity efficiently. Theoretical analysis and experimental results on three real-world seizure datasets demonstrate that our method outperforms baselines with low time and space complexity. Our method can also provide visualizations to assist clinicians in localizing abnormal brain activities for further diagnosis.", "keywords": "Electroencephalography;Epileptic;Seizure;Efficient;Neuroscience", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/f87e31012249c8580aceb7f4d3a9fc3e054f7790.zip", "author": "Zihang Liu;Haishuai Wang", "authorids": "~Zihang_Liu3;~Haishuai_Wang2", "gender": "M;M", "homepage": "https://github.com/lzhmarkk;https://www.linkedin.com/in/haishuai-wang-b5241775/", "dblp": "193/1470;163/0767", "google_scholar": "https://scholar.google.com.hk/citations?user=05jAWL4AAAAJ;", "orcid": "0000-0002-4114-7960;0000-0003-1617-0920", "linkedin": ";", "or_profile": "~Zihang_Liu3;~Haishuai_Wang2", "aff": "Beihang University;Zhejiang University", "aff_domain": "buaa.edu.cn;zju.edu.cn", "position": "MS student;Research Professor", "bibtex": "@misc{\nliu2024dynamic,\ntitle={Dynamic Electroencephalography Representation Learning for Improved Epileptic Seizure Detection},\nauthor={Zihang Liu and Haishuai Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=5Gt68fnttu}\n}", "github": "", "project": "", "reviewers": "P4gj;Rq1a;UeqT", "site": "https://openreview.net/forum?id=5Gt68fnttu", "pdf_size": 11349966, "rating": "3;5;6", "confidence": "4;4;4", "soundness": "2;3;3", "contribution": "3;2;3", "presentation": "3;3;3", "wc_summary": "81;66;77", "wc_strengths": "26;13;92", "wc_weaknesses": "108;295;25", "wc_questions": "12;161;31", "wc_review": "227;535;225", "wc_reply_reviewers": "0;113;6", "wc_reply_authors": "905;3011;330", "reply_reviewers": "0;2;1", "reply_authors": "3;6;2", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.66666666666667, 6.342099196813483 ], "wc_strengths_avg": [ 43.666666666666664, 34.58644564308715 ], "wc_weaknesses_avg": [ 142.66666666666666, 112.91983390392005 ], "wc_questions_avg": [ 68.0, 66.21681558838863 ], "wc_review_avg": [ 329.0, 145.66628527791414 ], "wc_reply_reviewers_avg": [ 39.666666666666664, 51.91231924012728 ], "wc_reply_authors_avg": [ 1415.3333333333333, 1152.467015676467 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.6666666666666665, 1.699673171197595 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fWvqcbh53SkJ:scholar.google.com/&scioq=Dynamic+Electroencephalography+Representation+Learning+for+Improved+Epileptic+Seizure+Detection&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Beihang University;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.zju.edu.cn", "aff_unique_abbr": "BUAA;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "When Scaling Meets LLM Finetuning: The Effect of Data, Model and Finetuning Method", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19432", "id": "5HCnKDeTws", "author_site": "Biao Zhang, Zhongtao Liu, Colin Cherry, Orhan Firat", "tldr": "", "abstract": "While large language models (LLMs) often adopt finetuning to unlock their capabilities for downstream applications, our understanding on the inductive biases (especially the scaling properties) of different finetuning methods is still limited. To fill this gap, we conduct systematic experiments studying whether and how different scaling factors, including LLM model size, pretraining data size, new finetuning parameter size and finetuning data size, affect the finetuning performance. We consider two types of finetuning \u2013 full-model tuning (FMT) and parameter efficient tuning (PET, including prompt tuning and LoRA), and explore their scaling behaviors in the data-limited regime where the LLM model size substantially outweighs the finetuning data size. Based on two sets of pretrained bilingual LLMs from 1B to 16B and experiments on bilingual machine translation and multilingual summarization benchmarks, we find that 1) LLM finetuning follows a powerbased multiplicative joint scaling law between finetuning data size and each other scaling factor; 2) LLM finetuning benefits more from LLM model scaling than pretraining data scaling, and PET parameter scaling is generally ineffective; and 3) the optimal finetuning method is highly task- and finetuning data-dependent. We hope our findings could shed light on understanding, selecting and developing LLM finetuning methods.", "keywords": "LLM finetuning;Scaling Laws;Full-model finetuning;Parameter efficient tuning;Machine Translation;Multilingual Summarization", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Biao Zhang;Zhongtao Liu;Colin Cherry;Orhan Firat", "authorids": "~Biao_Zhang2;zhongtao@google.com;~Colin_Cherry1;~Orhan_Firat1", "gender": "M;;M;M", "homepage": ";;https://sites.google.com/site/colinacherry/;", "dblp": "https://dblp.uni-trier.de/pers/hd/z/Zhang_0002:Biao;;99/6601;120/2225", "google_scholar": "gqPKjaIAAAAJ;;TNr_OWMAAAAJ;https://scholar.google.com.tr/citations?user=dLaR9lgAAAAJ", "orcid": ";;;", "linkedin": ";;colincherry/;", "or_profile": "~Biao_Zhang2;zhongtao@google.com;~Colin_Cherry1;~Orhan_Firat1", "aff": "Google DeepMind;;Google;Google", "aff_domain": "google.com;;google.com;google.com", "position": "Researcher;;Researcher;Research Scientist", "bibtex": "@inproceedings{\nzhang2024when,\ntitle={When Scaling Meets {LLM} Finetuning: The Effect of Data, Model and Finetuning Method},\nauthor={Biao Zhang and Zhongtao Liu and Colin Cherry and Orhan Firat},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5HCnKDeTws}\n}", "github": "", "project": "", "reviewers": "wU43;gFLk;oDvP;R4DF", "pdf_size": 1266512, "rating": "5;6;8;8", "confidence": "3;3;3;4", "soundness": "3;2;4;3", "contribution": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "30;68;71;25", "wc_strengths": "60;34;115;31", "wc_weaknesses": "121;56;181;59", "wc_questions": "4;245;46;30", "wc_review": "215;403;413;145", "wc_reply_reviewers": "13;126;192;0", "wc_reply_authors": "376;849;393;197", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 48.5, 21.10094784600919 ], "wc_strengths_avg": [ 60.0, 33.69718089098849 ], "wc_weaknesses_avg": [ 104.25, 51.34868547489799 ], "wc_questions_avg": [ 81.25, 95.72192799980577 ], "wc_review_avg": [ 294.0, 116.70903992407786 ], "wc_reply_reviewers_avg": [ 82.75, 79.87294598297974 ], "wc_reply_authors_avg": [ 453.75, 240.76895044834995 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 135, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8034468019146020698&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5HCnKDeTws", "pdf": "https://openreview.net/pdf?id=5HCnKDeTws", "email": "google.com;;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "5HGPR6fg2S", "title": "Normalized Space Alignment: A Versatile Metric for Representation Space Discrepancy Minimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce a manifold analysis technique for quantifying the discrepancy between two representation spaces. Normalized Space Alignment (NSA) aims to compare pairwise distance between two point clouds. Our technique provides a robust means of comparing representations across different layers and models, with a particular focus on Graph Neural Networks (GNNs) to explore their unique capabilities. We show that our technique acts as a pseudometric, satisfies the properties of a similarity metric, is continuous and differentiable. We also demonstrate that NSA can serve as an effective loss function by utilizing it in autoencoders to preserve representation structure for dimensionality reduction. Furthermore, our empirical analysis showcases that NSA consistently outperforms or matches the results of previous techniques while offering computational efficiency. Its versatility extends to robustness analysis and various neural network training and representation learning applications, highlighting its wide applicability and potential to enhance the performance of neural networks.", "keywords": "Deep Learning;Representation Learning;Dimensionality Reduction;Metric Learning;Autoencoders;Similarity Metric;Graph Neural Networks", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "/attachment/39c7dfd5c7dbd4d6fd9ce095b8b23dec9272d136.zip", "author": "Danish Ebadulla;Ambuj Singh", "authorids": "~Danish_Ebadulla1;~Ambuj_Singh1", "gender": "M;", "homepage": ";", "dblp": "311/5773.html;", "google_scholar": "LNzVTfcAAAAJ;", "orcid": "0000-0002-6635-6080;", "linkedin": "danish-ebadulla-7627a4156/;", "or_profile": "~Danish_Ebadulla1;~Ambuj_Singh1", "aff": "UC Santa Barbara;", "aff_domain": "ucsb.edu;", "position": "PhD student;", "bibtex": "@misc{\nebadulla2024normalized,\ntitle={Normalized Space Alignment: A Versatile Metric for Representation Space Discrepancy Minimization},\nauthor={Danish Ebadulla and Ambuj Singh},\nyear={2024},\nurl={https://openreview.net/forum?id=5HGPR6fg2S}\n}", "github": "", "project": "", "reviewers": "WXtW;wTyL;BYXr;odB3", "site": "https://openreview.net/forum?id=5HGPR6fg2S", "pdf_size": 12475731, "rating": "3;3;3;6", "confidence": "5;2;4;4", "soundness": "2;2;2;3", "contribution": "2;1;2;2", "presentation": "3;1;2;2", "wc_summary": "77;29;92;65", "wc_strengths": "90;17;61;38", "wc_weaknesses": "354;184;319;144", "wc_questions": "191;8;57;2", "wc_review": "712;238;529;249", "wc_reply_reviewers": "129;0;0;16", "wc_reply_authors": "1909;1245;968;822", "reply_reviewers": "1;0;0;1", "reply_authors": "4;2;2;2", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.75, 23.27418097377435 ], "wc_strengths_avg": [ 51.5, 27.13392710243027 ], "wc_weaknesses_avg": [ 250.25, 88.2733680109692 ], "wc_questions_avg": [ 64.5, 76.08712111783439 ], "wc_review_avg": [ 432.0, 199.33263656511446 ], "wc_reply_reviewers_avg": [ 36.25, 53.94615370904584 ], "wc_reply_authors_avg": [ 1236.0, 417.2019894487561 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MiamZ8MNfdgJ:scholar.google.com/&scioq=Normalized+Space+Alignment:+A+Versatile+Metric+for+Representation+Space+Discrepancy+Minimization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "5HpZZbgdeK", "title": "Efficient calibration as a binary top-versus-all problem for classifiers with many classes", "track": "main", "status": "Reject", "tldr": "", "abstract": "Most classifiers based on deep neural networks associate their class prediction with a probability known as the confidence score. This score is often a by-product of the learning step and may not correctly estimate the classification accuracy, which impacts real-world usage. To be reliably used, the confidence score requires a post-processing calibration step. Data-driven methods have been proposed to calibrate the confidence score of already-trained classifiers. Still, many methods fail when the number of classes is high and per-class calibration data is scarce. To deal with a large number of classes, we propose to reformulate the confidence calibration of multiclass classifiers as a single binary classification problem. Our top-versus-all reformulation allows the use of the binary cross-entropy loss for scaling calibration methods. Contrary to the standard one-versus-all reformulation, it also allows the application of binary calibration methods to multiclass classifiers with efficient use of scarce per-class calibration data and without degradation of the accuracy. Additionally, we solve the problem of scaling methods overfitting the calibration set by introducing a regularization loss term during optimization. We evaluate our approach on an extensive list of deep networks and standard image classification datasets (CIFAR-10, CIFAR-100, and ImageNet). We show that it significantly improves the performance of existing calibration methods.", "keywords": "Calibration;Image Classification;Deep Learning;Neural Networks", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Adrien Le Coz;St\u00e9phane Herbin;Faouzi Adjed", "authorids": "~Adrien_Le_Coz1;~St\u00e9phane_Herbin1;~Faouzi_Adjed1", "gender": "M;M;M", "homepage": ";https://stepherbin.github.io/;", "dblp": "281/7439;49/247;", "google_scholar": "z-0Z-AwAAAAJ;https://scholar.google.fr/citations?user=xap7jEQAAAAJ;XikGL6wAAAAJ", "orcid": ";0000-0002-3341-3018;", "linkedin": ";;", "or_profile": "~Adrien_Le_Coz1;~St\u00e9phane_Herbin1;~Faouzi_Adjed1", "aff": "IRT SystemX;ONERA;SystemX", "aff_domain": "irt-systemx.fr;onera.fr;irt-systemx.fr", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@misc{\ncoz2024efficient,\ntitle={Efficient calibration as a binary top-versus-all problem for classifiers with many classes},\nauthor={Adrien Le Coz and St{\\'e}phane Herbin and Faouzi Adjed},\nyear={2024},\nurl={https://openreview.net/forum?id=5HpZZbgdeK}\n}", "github": "", "project": "", "reviewers": "JkUP;75D8;TXd7;fZjD", "site": "https://openreview.net/forum?id=5HpZZbgdeK", "pdf_size": 312845, "rating": "3;5;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "contribution": "2;2;2;3", "presentation": "2;3;3;2", "wc_summary": "75;43;71;60", "wc_strengths": "81;21;32;70", "wc_weaknesses": "140;2;145;106", "wc_questions": "812;71;325;49", "wc_review": "1108;137;573;285", "wc_reply_reviewers": "232;19;69;193", "wc_reply_authors": "1339;284;1543;596", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.25, 12.397076268217438 ], "wc_strengths_avg": [ 51.0, 25.10975905897944 ], "wc_weaknesses_avg": [ 98.25, 57.56029447457683 ], "wc_questions_avg": [ 314.25, 307.1639423825655 ], "wc_review_avg": [ 525.75, 370.922751391715 ], "wc_reply_reviewers_avg": [ 128.25, 87.18192186457007 ], "wc_reply_authors_avg": [ 940.5, 517.5618320548763 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JIRXmv3-g9EJ:scholar.google.com/&scioq=Efficient+calibration+as+a+binary+top-versus-all+problem+for+classifiers+with+many+classes&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "IRT SystemX;ONERA;SystemX", "aff_unique_dep": ";;", "aff_unique_url": "https://www.irt-systemx.fr;https://www.onera.fr;", "aff_unique_abbr": ";ONERA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France;" }, { "id": "5IOKw3AQe4", "title": "On the Theoretical Analysis of Dense Contrastive Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Contrastive learning has achieved outstanding performance in self-supervised learning. However, the canonical image-level matching pretext is unsuitable for multi-object dense prediction tasks like segmentation and detection. Recently, numerous studies have focused on dense contrastive learning (DCL) that adopts patch-level contrast to learning representations aware of local information. Although empirical evidence has validated its superiority, to date, there has not been any theoretical work that could formally explain and guarantee the effectiveness of DCL methods, which hinders their principled development. To bridge this gap, using the language of spectral graph theory, we establish the first theoretical framework for modeling and analyzing DCL by dissecting the corresponding patch-level positive-pair graph. Specifically, by decoupling the image-level and patch-level supervision, we theoretically characterize how different positive pair selection strategies affect the performance of DCL, and verify these insights on both synthetic and real-world datasets. Furthermore, drawing inspiration from the theory, we design two unsupervised metrics to guide the selection of positive pairs.", "keywords": "Contrastive Learning;Dense Contrastive Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Lizhe Fang;Yifei Wang;Yisen Wang", "authorids": "~Lizhe_Fang1;~Yifei_Wang1;~Yisen_Wang1", "gender": "M;M;M", "homepage": "https://zero-lab-pku.github.io/;https://yifeiwang77.com;https://yisenwang.github.io/", "dblp": ";00/555-1;172/1346-1", "google_scholar": ";-CLy6YsAAAAJ;uMWPDboAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Lizhe_Fang1;~Yifei_Wang1;~Yisen_Wang1", "aff": "Peking University;Massachusetts Institute of Technology;Peking University", "aff_domain": "pku.edu.cn;mit.edu;pku.edu.cn", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@misc{\nfang2024on,\ntitle={On the Theoretical Analysis of Dense Contrastive Learning},\nauthor={Lizhe Fang and Yifei Wang and Yisen Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=5IOKw3AQe4}\n}", "github": "", "project": "", "reviewers": "hTKu;MfsU;vGDf", "site": "https://openreview.net/forum?id=5IOKw3AQe4", "pdf_size": 1259978, "rating": "6;6;6", "confidence": "3;4;2", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "2;1;3", "wc_summary": "60;103;142", "wc_strengths": "27;59;41", "wc_weaknesses": "71;837;51", "wc_questions": "5;105;43", "wc_review": "163;1104;277", "wc_reply_reviewers": "9;670;96", "wc_reply_authors": "216;3548;1101", "reply_reviewers": "1;3;1", "reply_authors": "1;7;4", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 101.66666666666667, 33.48963355361709 ], "wc_strengths_avg": [ 42.333333333333336, 13.097921802925667 ], "wc_weaknesses_avg": [ 319.6666666666667, 365.90101879545995 ], "wc_questions_avg": [ 51.0, 41.21488404286329 ], "wc_review_avg": [ 514.6666666666666, 419.31240011343436 ], "wc_reply_reviewers_avg": [ 258.3333333333333, 293.2511248439164 ], "wc_reply_authors_avg": [ 1621.6666666666667, 1409.225871021234 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 4.0, 2.449489742783178 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:G_pGcKMqjH4J:scholar.google.com/&scioq=On+the+Theoretical+Analysis+of+Dense+Contrastive+Learning&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://web.mit.edu", "aff_unique_abbr": "Peking U;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "5Itc7v0pnA", "title": "Quantile-Free Regression: A Flexible Alternative to Quantile Regression", "track": "main", "status": "Reject", "tldr": "", "abstract": "Constructing valid prediction intervals rather than point estimates is a well-established method for uncertainty quantification in the regression setting. Models equipped with this capacity output an interval of values in which the ground truth target will fall with some prespecified probability. This is an essential requirement in many real-world applications in which simple point predictions' inability to convey the magnitude and frequency of errors renders them insufficient for high-stakes decisions. Quantile regression is well-established as a leading approach for obtaining such intervals via the empirical estimation of quantiles in the (non-parametric) distribution of outputs. This method is simple, computationally inexpensive, interpretable, assumption-free, and highly effective. However, it does require that the quantiles being learned are chosen a priori. This results in either (a) intervals that are arbitrarily symmetric around the median which is sub-optimal for real-world skewed distributions or (b) learning an excessive number of intervals. In this work, we propose Quantile-Free Regression (QFR), a direct replacement for quantile regression which liberates it from this limitation whilst maintaining its strengths. We demonstrate that this added flexibility results in intervals with an improvement in desirable qualities (e.g. sharpness) whilst maintaining the essential coverage guarantees of quantile regression.", "keywords": "Quantile regression;interval regression;pinball loss;neural networks", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/5443f7f0d63a9813f6460c3ff9fd19c362a3ffd1.zip", "author": "Thomas Pouplin;Alan Jeffares;Nabeel Seedat;Mihaela van der Schaar", "authorids": "~Thomas_Pouplin1;~Alan_Jeffares1;~Nabeel_Seedat1;~Mihaela_van_der_Schaar2", "gender": "M;;;F", "homepage": ";https://alanjeffares.com;;https://www.vanderschaar-lab.com", "dblp": "339/7726;304/1985;227/8368;", "google_scholar": "VVCoRhgAAAAJ;e65kJ08AAAAJ;https://scholar.google.com/citations?hl=en;DZ3S--MAAAAJ", "orcid": ";;;", "linkedin": ";alanjeffares;nabeel-seedat/;", "or_profile": "~Thomas_Pouplin1;~Alan_Jeffares1;~Nabeel_Seedat1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;AstraZeneca;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;astrazeneca.com;ucla.edu", "position": "PhD student;PhD student;Intern;Full Professor", "bibtex": "@misc{\npouplin2024quantilefree,\ntitle={Quantile-Free Regression: A Flexible Alternative to Quantile Regression},\nauthor={Thomas Pouplin and Alan Jeffares and Nabeel Seedat and Mihaela van der Schaar},\nyear={2024},\nurl={https://openreview.net/forum?id=5Itc7v0pnA}\n}", "github": "", "project": "", "reviewers": "eWa2;iRaW;LRPB;ccij;UxpT", "site": "https://openreview.net/forum?id=5Itc7v0pnA", "pdf_size": 685134, "rating": "3;3;5;5;5", "confidence": "4;4;3;4;5", "soundness": "2;2;3;3;2", "contribution": "2;2;3;4;2", "presentation": "3;3;3;2;3", "wc_summary": "85;96;90;53;60", "wc_strengths": "65;106;42;20;58", "wc_weaknesses": "196;182;137;25;35", "wc_questions": "2;101;169;308;508", "wc_review": "348;485;438;406;661", "wc_reply_reviewers": "0;0;69;0;0", "wc_reply_authors": "861;2270;1072;1370;1628", "reply_reviewers": "0;0;1;0;0", "reply_authors": "2;4;3;2;3", "rating_avg": [ 4.2, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 76.8, 17.08098357823694 ], "wc_strengths_avg": [ 58.2, 28.47033543883879 ], "wc_weaknesses_avg": [ 115.0, 72.15815962176418 ], "wc_questions_avg": [ 217.6, 176.03704155659966 ], "wc_review_avg": [ 467.6, 106.47178029881908 ], "wc_reply_reviewers_avg": [ 13.8, 27.6 ], "wc_reply_authors_avg": [ 1440.2, 489.8589184653067 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R78nC4QmoI0J:scholar.google.com/&scioq=Quantile-Free+Regression:+A+Flexible+Alternative+to+Quantile+Regression&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Cambridge;AstraZeneca;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.astrazeneca.com;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;AZ;UCLA", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Cambridge;;Los Angeles", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "An Intuitive Multi-Frequency Feature Representation for SO(3)-Equivariant Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19431", "id": "5JWAOLBxwp", "author_site": "Dongwon Son, Jaehyung Kim, Sanghyeon Son, Beomjoon Kim", "tldr": "", "abstract": "The usage of 3D vision algorithms, such as shape reconstruction, remains limited because they require inputs to be at a fixed canonical rotation. Recently, a simple equivariant network, Vector Neuron (VN) has been proposed that can be easily used with the state-of-the-art 3D neural network (NN) architectures. However, its performance is limited because it is designed to use only three-dimensional features, which is insufficient to capture the details present in 3D data. In this paper, we introduce an equivariant feature representation for mapping a 3D point to a high-dimensional feature space. Our feature can discern multiple frequencies present in 3D data, which, as shown by Tancik et al. (2020), is the key to designing an expressive feature for 3D vision tasks. Our representation can be used as an input to VNs, and the results demonstrate that with our feature representation, VN captures more details, overcoming the limitation raised in its original paper.", "keywords": "Equivariant networks;SO(3) Equivariance;Fourier features", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Dongwon Son;Jaehyung Kim;Sanghyeon Son;Beomjoon Kim", "authorids": "~Dongwon_Son1;~Jaehyung_Kim2;~Sanghyeon_Son1;~Beomjoon_Kim2", "gender": "M;;;M", "homepage": "https://dongwon-son.github.io/;;;https://beomjoonkim.github.io/", "dblp": "226/6343;;;88/1505", "google_scholar": "https://scholar.google.co.kr/citations?user=oaUQsWgAAAAJ;;;https://scholar.google.ca/citations?user=dw3rEwgAAAAJ", "orcid": "0000-0003-1446-8125;;;", "linkedin": ";jaehyung-kim-a666551ba/;;", "or_profile": "~Dongwon_Son1;~Jaehyung_Kim2;~Sanghyeon_Son1;~Beomjoon_Kim2", "aff": "KAIST;KAIST;;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;;kaist.ac.kr", "position": "PhD student;MS student;;Assistant Professor", "bibtex": "@inproceedings{\nson2024an,\ntitle={An Intuitive Multi-Frequency Feature Representation for {SO}(3)-Equivariant Networks},\nauthor={Dongwon Son and Jaehyung Kim and Sanghyeon Son and Beomjoon Kim},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5JWAOLBxwp}\n}", "github": "", "project": "", "reviewers": "Jvrz;HjVh;XaCw;79hQ;MkmG", "pdf_size": 3314025, "rating": "5;6;6;6;6", "confidence": "2;2;4;3;3", "soundness": "2;3;3;3;3", "contribution": "3;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "147;82;62;61;48", "wc_strengths": "135;79;76;61;81", "wc_weaknesses": "156;510;183;100;22", "wc_questions": "184;92;3;8;2", "wc_review": "622;763;324;230;153", "wc_reply_reviewers": "0;0;12;26;24", "wc_reply_authors": "1090;2404;636;437;306", "reply_reviewers": "0;0;1;1;1", "reply_authors": "2;5;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 80.0, 35.21931288370062 ], "wc_strengths_avg": [ 86.4, 25.295058806019803 ], "wc_weaknesses_avg": [ 194.2, 167.23683804712405 ], "wc_questions_avg": [ 57.8, 71.68374990191292 ], "wc_review_avg": [ 418.4, 234.54006054403584 ], "wc_reply_reviewers_avg": [ 12.4, 11.200000000000001 ], "wc_reply_authors_avg": [ 974.6, 762.5209767606397 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6998817567924967616&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=5JWAOLBxwp", "pdf": "https://openreview.net/pdf?id=5JWAOLBxwp", "email": "kaist.ac.kr;kaist.ac.kr;;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "5KF3Q79t8B", "title": "Learning An Efficient-And-Rigorous Neural Multigrid Solver", "track": "main", "status": "Reject", "tldr": "", "abstract": "Partial Differential Equations (PDEs) and their efficient\nnumerical solutions are of fundamental significance to science and\nengineering involving heavy computation. To date, the historical\nreliance on legacy generic numerical solvers has circumscribed\npossible integration of big data knowledge and exhibits sub-optimal\nefficiency for certain PDE formulations. In contrast, AI-inspired\nneural methods have the potential to learn such knowledge from big data\nand endow numerical solvers with compact structures and high\nefficiency, but still with unconquered challenges including, a lack of\nsound mathematical backbone, no guarantee of correctness or\nconvergence, and low accuracy, thus unable to handle complex, unseen\nscenarios. This paper articulates a mathematically rigorous neural PDE\nsolver by integrating iterative solvers and the Multigrid Method\nwith Convolutional Neural Networks (CNNs). \nOur novel UGrid neural solver, built upon the principled integration of \nU-Net and MultiGrid, manifests\na mathematically rigorous proof of both convergence and correctness,\nand showcases high numerical accuracy and strong generalization power\nto complicated cases not observed during the training phase. In\naddition, we devise a new residual loss metric, which enables\nunsupervised training and affords more stability and a larger solution\nspace over the legacy losses. We conduct extensive experiments on\nPoisson's equations, and our comprehensive evaluations have confirmed\nall of the aforementioned theoretical and numerical advantages.\nFinally, a mathematically-sound proof affords our new method to\ngeneralize to other types of linear PDEs.", "keywords": "Partial Differential Equations;Numerical Solver;Neural Solver;Multigrid Method", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/5d473dd1db48b355b88380959e0f201c7228715f.pdf", "author": "Xi Han;Fei Hou;Hong Qin", "authorids": "~Xi_Han1;~Fei_Hou1;~Hong_Qin1", "gender": "M;M;M", "homepage": ";https://lcs.ios.ac.cn/~houf/;http://www.cs.stonybrook.edu/~qin", "dblp": ";24/3702;79/627-1", "google_scholar": "paSti1kAAAAJ;NWoYRf8AAAAJ;NOcejj8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xi_Han1;~Fei_Hou1;~Hong_Qin1", "aff": "State University of New York at Stony Brook;Institute of Software, Chinese Academy of Sciences;Stony Brook University (State University of New York, Stony Brook)", "aff_domain": "stonybrook.edu;ios.ac.cn;cs.stonybrook.edu", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\nhan2024learning,\ntitle={Learning An Efficient-And-Rigorous Neural Multigrid Solver},\nauthor={Xi Han and Fei Hou and Hong Qin},\nyear={2024},\nurl={https://openreview.net/forum?id=5KF3Q79t8B}\n}", "github": "", "project": "", "reviewers": "pjtp;Ejzh;BHtd;APYK", "site": "https://openreview.net/forum?id=5KF3Q79t8B", "pdf_size": 25176367, "rating": "3;3;5;6", "confidence": "3;4;4;3", "soundness": "4;3;3;3", "contribution": "2;1;3;3", "presentation": "1;3;2;3", "wc_summary": "26;202;62;106", "wc_strengths": "51;147;46;140", "wc_weaknesses": "249;121;451;136", "wc_questions": "31;23;338;165", "wc_review": "357;493;897;547", "wc_reply_reviewers": "0;0;38;0", "wc_reply_authors": "827;1695;1274;379", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;3;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 99.0, 65.87108622149782 ], "wc_strengths_avg": [ 96.0, 47.5972688292091 ], "wc_weaknesses_avg": [ 239.25, 131.88702551805466 ], "wc_questions_avg": [ 139.25, 127.8639413595561 ], "wc_review_avg": [ 573.5, 199.1902357044642 ], "wc_reply_reviewers_avg": [ 9.5, 16.454482671904334 ], "wc_reply_authors_avg": [ 1043.75, 491.43024683061583 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:I15nbenFRvcJ:scholar.google.com/&scioq=Learning+An+Efficient-And-Rigorous+Neural+Multigrid+Solver&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "State University of New York at Stony Brook;Chinese Academy of Sciences;Stony Brook University", "aff_unique_dep": ";Institute of Software;", "aff_unique_url": "https://www.stonybrook.edu;http://www.ios.ac.cn;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;CAS;SBU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "Implicit Neural Representation Inference for Low-Dimensional Bayesian Deep Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19430", "id": "5KUiMKRebi", "author_site": "Panagiotis Dimitrakopoulos, Giorgos Sfikas, Christophoros Nikou", "tldr": "", "abstract": "Bayesian inference is the standard for providing full predictive distributions with well calibrated uncertainty estimates.\n\tHowever, scaling to a modern, overparameterized deep learning setting \n\ttypically comes at the cost of severe and restrictive approximations, sacrificing model predictive strength.\n\tWith our approach, we factor model parameters as a function of deterministic and probabilistic components;\n\tthe model is solved by combining maximum a posteriori estimation of the former,\n\twith inference over a low-dimensional, Implicit Neural Representation of the latter.\n\tThis results in a solution that combines both predictive accuracy and good calibration,\n\tas it entails inducing stochasticity over the full set of model weights while being comparatively cheap to compute.\n\tExperimentally, our approach compares favorably to the state of the art,\n\tincluding much more expensive methods as well as less expressive posterior approximations over full network parameters.", "keywords": "Bayesian Deep Learning;Implicit neural representations;Probabilistic machine learning;Hypernetworks", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Panagiotis Dimitrakopoulos;Giorgos Sfikas;Christophoros Nikou", "authorids": "~Panagiotis_Dimitrakopoulos2;~Giorgos_Sfikas1;~Christophoros_Nikou1", "gender": ";M;M", "homepage": ";http://www.cs.uoi.gr/~sfikas;https://www.cse.uoi.gr/~cnikou/", "dblp": ";01/747;26/429", "google_scholar": "https://scholar.google.gr/citations?user=Xz0qnGoAAAAJ;X73G9lYAAAAJ;", "orcid": ";0000-0002-7305-2886;", "linkedin": ";giorgos-sfikas-15a30484/;", "or_profile": "~Panagiotis_Dimitrakopoulos2;~Giorgos_Sfikas1;~Christophoros_Nikou1", "aff": "University of Ioannina;University of West Attica;University of Ioannina", "aff_domain": "uoi.gr;uniwa.gr;uoi.gr", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ndimitrakopoulos2024implicit,\ntitle={Implicit Neural Representation Inference for Low-Dimensional Bayesian Deep Learning},\nauthor={Panagiotis Dimitrakopoulos and Giorgos Sfikas and Christophoros Nikou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5KUiMKRebi}\n}", "github": "", "project": "", "reviewers": "Ji8X;VEKs;tKmG;wVmX", "pdf_size": 1392743, "rating": "5;5;5;8", "confidence": "4;3;5;4", "soundness": "2;2;3;3", "contribution": "2;3;2;3", "presentation": "2;2;3;4", "wc_summary": "126;108;80;109", "wc_strengths": "43;87;118;20", "wc_weaknesses": "293;275;146;76", "wc_questions": "194;160;198;187", "wc_review": "656;630;542;392", "wc_reply_reviewers": "0;897;19;23", "wc_reply_authors": "1918;3648;2710;1530", "reply_reviewers": "0;3;1;1", "reply_authors": "8;14;9;8", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 105.75, 16.498105951896417 ], "wc_strengths_avg": [ 67.0, 38.03288051147323 ], "wc_weaknesses_avg": [ 197.5, 90.19562073626413 ], "wc_questions_avg": [ 184.75, 14.821858857781638 ], "wc_review_avg": [ 555.0, 103.15522284402279 ], "wc_reply_reviewers_avg": [ 234.75, 382.44893449975774 ], "wc_reply_authors_avg": [ 2451.5, 811.2057383919322 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 9.75, 2.48746859276655 ], "replies_avg": [ 50, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6565143739902807954&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=5KUiMKRebi", "pdf": "https://openreview.net/pdf?id=5KUiMKRebi", "email": "uoi.gr;uniwa.gr;uoi.gr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Ioannina;University of West Attica", "aff_unique_dep": ";", "aff_unique_url": "https://www.uoi.gr;https://www.uoa.gr", "aff_unique_abbr": "UOI;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Greece" }, { "id": "5KcFkhEj4x", "title": "In Search of the Long-Tail: Systematic Generation of Long-Tail Knowledge via Logical Rule Induced Search", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Since large language models (LLMs) have approached human-level performance on many tasks, it has become increasingly harder for researchers to find tasks that are still challenging to the models. Failure cases usually come from the long-tail distribution -- data to which an oracle language model could assign a probability on the lower end of its distribution. Systematically finding evaluation data in the long-tail distribution is important, but current methodology such as prompt engineering or crowdsourcing are insufficient because coming up with long-tail examples is also hard for humans due to our cognitive bias. In this paper, we propose a Logic-Induced-Knowledge-Search (LINK) framework for systematically generating long-tail knowledge statements. Grounded by a symbolic logic rule, we search for long-tail values for each variable of the rule by first prompting a large language model, then verifying the correctness of the values with a critic, and lastly pushing for the long-tail distribution with a reranker. Using this framework we construct a dataset, Logic-Induced-Long-Tail (LINT [https://doi.org/10.5281/zenodo.8384878]), consisting of 200 symbolic rules and 40K knowledge statements spanning across four different domains. Human annotations find that 89% of the statements in LINT are factually correct. In contrast, ChatGPT and GPT4 struggle with directly generating long-tail statements under the guidance of logic rules, each only getting 61% and 79% of their statements correct. Moreover, their ``long-tail\" generations in fact fall into the higher likelihood range, and thus are not really long-tail. Our findings suggest that LINK is effective for generating data in the long-tail distribution while enforcing quality. To demonstrate how the community can utilize LINT for systematically evaluating LLMs' capabilities in the long-tail distribution, we challenge the models with a simple entailment classification task using samples from LINT. We find that ChatGPT and GPT4 performances drop by 2% and 4% when reasoning on long-tail knowledge statements compared to on head distribution statements. We hope our work can inspire future research on generating evaluation data in the long-tail distribution.", "keywords": "long-tail;evaluation;generation;large language model;symbolic rule;reasoning", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Huihan Li;Zeyi Liao;Yuting Ning;Siyuan Wang;Xiang Lorraine Li;Ximing Lu;Faeze Brahman;Wenting Zhao;Yejin Choi;Xiang Ren", "authorids": "~Huihan_Li1;~Zeyi_Liao1;~Yuting_Ning1;~Siyuan_Wang1;~Xiang_Lorraine_Li1;~Ximing_Lu1;~Faeze_Brahman1;~Wenting_Zhao1;~Yejin_Choi1;~Xiang_Ren1", "gender": "F;M;;F;F;F;;F;M;F", "homepage": "https://huihanlhh.github.io/;https://lzy37ld.github.io/;https://nnnyt.github.io;https://siyuanwangw.github.io;https://gloriaximinglu.github.io/;https://fabrahman.github.io;;https://yejinc.github.io/;https://shanzhenren.github.io/;https://people.cs.pitt.edu/~xianglli/", "dblp": "309/5929-1;;;12/9626;24/10879;276/6005;41/10049-2.html;89/579-1;36/360-1;40/1491-69", "google_scholar": "NxdcyroAAAAJ;vpv_bHEAAAAJ;HYmzF-QAAAAJ;t_tryJ0AAAAJ;https://scholar.google.com/citations?hl=en;viCG2ikAAAAJ;sycHskQAAAAJ;vhP-tlcAAAAJ;_moJlrIAAAAJ;SRgRwSoAAAAJ", "orcid": ";;;;;;;;;", "linkedin": ";;;;;;;;xren7;", "or_profile": "~Huihan_Li1;~Zeyi_Liao1;~Yuting_Ning1;~Siyuan_Wang1;~Ximing_Lu1;~Faeze_Brahman1;~Wenting_Zhao1;~Yejin_Choi1;~Xiang_Ren1;~Xiang_Li2", "aff": "University of Southern California;Ohio State University, Columbus;University of Science and Technology of China;University of Southern California;University of Washington;Allen Institute for AI;Cornell University;Department of Computer Science, University of Washington;University of Southern California;University of Pittsburgh", "aff_domain": "usc.edu;osu.edu;ustc.edu.cn;usc.edu;cs.washington.edu;allenai.org;cornell.edu;cs.washington.edu;usc.edu;pitt.edu", "position": "PhD student;PhD student;MS student;Postdoc;PhD student;Postdoc;PhD student;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@misc{\nli2024in,\ntitle={In Search of the Long-Tail: Systematic Generation of Long-Tail Knowledge via Logical Rule Induced Search},\nauthor={Huihan Li and Zeyi Liao and Yuting Ning and Siyuan Wang and Xiang Lorraine Li and Ximing Lu and Faeze Brahman and Wenting Zhao and Yejin Choi and Xiang Ren},\nyear={2024},\nurl={https://openreview.net/forum?id=5KcFkhEj4x}\n}", "github": "", "project": "", "reviewers": "zFAF;TKVD;tVDU;pztA", "site": "https://openreview.net/forum?id=5KcFkhEj4x", "pdf_size": 4201363, "rating": "3;3;5;8", "confidence": "3;4;4;4", "soundness": "2;2;2;3", "contribution": "2;2;2;4", "presentation": "3;2;3;4", "wc_summary": "146;74;323;171", "wc_strengths": "32;8;7;81", "wc_weaknesses": "211;240;12;49", "wc_questions": "1;2;5;114", "wc_review": "390;324;347;415", "wc_reply_reviewers": "0;0;0;77", "wc_reply_authors": "1161;1047;956;987", "reply_reviewers": "0;0;0;1", "reply_authors": "4;4;4;3", "rating_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 178.5, 90.70970179644513 ], "wc_strengths_avg": [ 32.0, 30.008332176247315 ], "wc_weaknesses_avg": [ 128.0, 98.90652152411387 ], "wc_questions_avg": [ 30.5, 48.23121395942673 ], "wc_review_avg": [ 369.0, 35.58791929854849 ], "wc_reply_reviewers_avg": [ 19.25, 33.34197804570089 ], "wc_reply_authors_avg": [ 1037.75, 78.31786194732335 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.49374193110101877, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:x3qfXfiSQ_sJ:scholar.google.com/&scioq=In+Search+of+the+Long-Tail:+Systematic+Generation+of+Long-Tail+Knowledge+via+Logical+Rule+Induced+Search&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3;4;5;3;0;6", "aff_unique_norm": "University of Southern California;Ohio State University;University of Science and Technology of China;University of Washington;Allen Institute for AI;Cornell University;University of Pittsburgh", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.usc.edu;https://www.osu.edu;http://www.ustc.edu.cn;https://www.washington.edu;https://allenai.org;https://www.cornell.edu;https://www.pitt.edu", "aff_unique_abbr": "USC;OSU;USTC;UW;AI2;Cornell;Pitt", "aff_campus_unique_index": "0;1;0;3;0", "aff_campus_unique": "Los Angeles;Columbus;;Seattle", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "5KckEwghKo", "title": "From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Graph-based semi-supervised learning (GSSL) has long been a research focus. Traditional methods are generally shallow learners, based on the cluster assumption. Recently, graph convolutional networks (GCNs) have become the predominant techniques for their promising performance. In this paper, we theoretically discuss the relationship between these two types of methods in a unified optimization framework. One of the most intriguing findings is that, unlike traditional ones, typical GCNs may not effectively incorporate both graph structure and label information at each layer. Motivated by this, we propose three simple but powerful graph convolution methods. The first is a supervised method OGC which guides the graph convolution process with labels. The others are two \u201cno-learning\u201d unsupervised methods: GGC and its multi-scale version GGCM, both aiming to preserve the graph structure information during the convolution process. Finally, we conduct extensive experiments to show the effectiveness of our methods.", "keywords": "Graph Convolution Networks;Graph Embedding;Semi-Supervised Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/3fbea7a973a7eefb4404071301d5f3e8ebdb258d.zip", "author": "Zheng Wang;Hongming Ding;Li Pan;Jianhua Li;Zhiguo Gong;Philip S. Yu", "authorids": "~Zheng_Wang24;~Hongming_Ding1;~Li_Pan1;~Jianhua_Li3;~Zhiguo_Gong1;~Philip_S._Yu1", "gender": "M;M;M;M;M;M", "homepage": "https://zhengwang100.github.io/;https://www.nowcoder.com/profile/292263636;https://icst.sjtu.edu.cn/DirectoryDetail.aspx?id=12;https://infosec.sjtu.edu.cn/TeamDetail.aspx?id=9;https://www.fst.um.edu.mo/people/fstzgg/;https://cs.uic.edu/profiles/philip-yu/", "dblp": "181/2834-45;;26/4737;93/3389-1.html;95/6295;y/PhilipSYu", "google_scholar": "Znbe2RgAAAAJ;;;;i75MaGYAAAAJ;D0lL1r0AAAAJ", "orcid": ";;0000-0002-0424-9845;;;0000-0002-3491-5968", "linkedin": ";;;;;", "or_profile": "~Zheng_Wang24;~Hongming_Ding1;~Li_Pan1;~Jianhua_Li3;~Zhiguo_Gong1;~Philip_S._Yu1", "aff": "Shanghai Jiaotong University;NIO;Shanghai Jiaotong University;Shanghai Jiaotong University;University of Macau;University of Illinois Chicago", "aff_domain": "sjtu.edu.cn;nio.com;sjtu.edu.cn;sjtu.edu.cn;um.edu.mo;uic.edu", "position": "Associate Professor;Researcher;Full Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nwang2024from,\ntitle={From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited},\nauthor={Zheng Wang and Hongming Ding and Li Pan and Jianhua Li and Zhiguo Gong and Philip S. Yu},\nyear={2024},\nurl={https://openreview.net/forum?id=5KckEwghKo}\n}", "github": "", "project": "", "reviewers": "9Ae7;2zfL;seo4;WdoQ", "site": "https://openreview.net/forum?id=5KckEwghKo", "pdf_size": 898400, "rating": "3;3;5;5", "confidence": "4;4;3;4", "soundness": "2;2;2;2", "contribution": "1;1;2;2", "presentation": "2;1;2;3", "wc_summary": "46;63;84;91", "wc_strengths": "18;11;42;68", "wc_weaknesses": "162;166;345;90", "wc_questions": "34;3;67;2", "wc_review": "260;243;538;251", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.0, 17.7341478509682 ], "wc_strengths_avg": [ 34.75, 22.37604746151563 ], "wc_weaknesses_avg": [ 190.75, 94.05151513931075 ], "wc_questions_avg": [ 26.5, 26.688012290165037 ], "wc_review_avg": [ 323.0, 124.27590273258932 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8502318163900352528&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0;2;3", "aff_unique_norm": "Shanghai Jiao Tong University;NIO;University of Macau;University of Illinois at Chicago", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;;https://www.um.edu.mo;https://www.uic.edu", "aff_unique_abbr": "SJTU;;UM;UIC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Macau SAR;Chicago", "aff_country_unique_index": "0;0;0;0;2", "aff_country_unique": "China;;United States" }, { "title": "MMICL: Empowering Vision-language Model with Multi-Modal In-Context Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19429", "id": "5KojubHBr8", "author_site": "Haozhe Zhao, Zefan Cai, Shuzheng Si, Xiaojian Ma, Kaikai An, Liang Chen, Zixuan Liu, Sheng Wang, Wenjuan Han, Baobao Chang", "tldr": "", "abstract": "Since the resurgence of deep learning, vision-language models (VLMs) enhanced by large language models (LLMs) have grown exponentially in popularity. \nHowever, while LLMs can utilize extensive background knowledge and task information with in-context learning, most VLMs still struggle with understanding complex multi-modal prompts with multiple images, making VLMs less effective in downstream vision-language tasks.\nIn this paper, we address the limitation above by 1) introducing vision-language Model with **M**ulti-**M**odal **I**n-**C**ontext **L**earning(MMICL), a new approach to allow the VLM to deal with multi-modal inputs efficiently; 2) proposing a novel context scheme to augment the in-context learning ability of the VLM; 3) constructing the Multi-modal In-Context Learning (MIC) dataset, designed to enhance the VLM's ability to understand complex multi-modal prompts.\nOur experiments confirm that MMICL achieves new state-of-the-art zero-shot performance on a wide range of general vision-language tasks, especially for complex benchmarks, including MME and MMBench. Our analysis demonstrates that MMICL effectively tackles the challenge of complex multi-modal prompt understanding and emerges the impressive ICL ability. Furthermore, we observe that MMICL successfully alleviates language bias in VLMs, a common issue for VLMs that often leads to hallucination when faced with extensive textual context.\nOur code, dataset, dataset tool, and model are available at https://github.com/PKUnlp-icler/MIC.", "keywords": "multi-modal in-context learning; multi-modal instruction tuning; vision-language model", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Haozhe Zhao;Zefan Cai;Shuzheng Si;Xiaojian Ma;Kaikai An;Liang Chen;Zixuan Liu;Sheng Wang;Wenjuan Han;Baobao Chang", "authorids": "~Haozhe_Zhao1;~Zefan_Cai1;~Shuzheng_Si1;~Xiaojian_Ma1;~Kaikai_An1;~Liang_Chen10;~Zixuan_Liu1;~Sheng_Wang7;~Wenjuan_Han1;~Baobao_Chang1", "gender": "M;;M;;M;M;M;;F;M", "homepage": ";;;;https://github.com/kkk-an;https://chenllliang.github.io;;http://homes.cs.washington.edu/~swang;https://scholar.google.com/citations?user=rfVLLfAAAAAJ;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6027", "dblp": "299/7199;;324/3680;;;01/5394-24;;;188/9071;91/6051", "google_scholar": "skIXywUAAAAJ;;https://scholar.google.com.hk/citations?user=zO2XyZUAAAAJ;;6TrBRiEAAAAJ;lMKPaTYAAAAJ;yjKOHbEAAAAJ;;rfVLLfAAAAAJ;LaKNyhQAAAAJ", "orcid": "0000-0003-0502-4426;;;;;;;;0000-0002-2327-0842;0000-0003-2824-6750", "linkedin": ";;;;;;;;;", "or_profile": "~Haozhe_Zhao1;~Zefan_Cai1;~Shuzheng_Si1;~Xiaojian_Ma1;~Kaikai_An1;~Liang_Chen10;~Zixuan_Liu1;~Sheng_Wang7;~Wenjuan_Han1;~Baobao_Chang1", "aff": "Peking University;;Peking University;;Peking University;Peking University;University of Washington;University of Washington, Seattle;Beijing Jiaotong University;Peking University", "aff_domain": "pku.edu.cn;;pku.edu.cn;;pku.edu.cn;pku.edu.cn;uw.edu;uw.edu;bjtu.edu.cn;pku.edu.cn", "position": "MS student;;MS student;;MS student;PhD student;PhD student;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzhao2024mmicl,\ntitle={{MMICL}: Empowering Vision-language Model with Multi-Modal In-Context Learning},\nauthor={Haozhe Zhao and Zefan Cai and Shuzheng Si and Xiaojian Ma and Kaikai An and Liang Chen and Zixuan Liu and Sheng Wang and Wenjuan Han and Baobao Chang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5KojubHBr8}\n}", "github": "", "project": "", "reviewers": "eWon;7yvn;d1dY;iVLP;QUmJ", "pdf_size": 8910437, "rating": "3;5;6;6;8", "confidence": "4;5;5;4;4", "soundness": "2;3;3;4;3", "contribution": "2;3;2;3;3", "presentation": "1;3;2;2;3", "wc_summary": "106;155;49;117;48", "wc_strengths": "34;109;35;97;20", "wc_weaknesses": "474;404;81;91;77", "wc_questions": "53;83;2;146;4", "wc_review": "667;751;167;451;149", "wc_reply_reviewers": "501;292;0;187;0", "wc_reply_authors": "6009;2965;1415;3120;707", "reply_reviewers": "1;2;0;1;0", "reply_authors": "10;8;4;6;2", "rating_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 95.0, 41.30375285612676 ], "wc_strengths_avg": [ 59.0, 36.51301137950689 ], "wc_weaknesses_avg": [ 225.4, 175.8619913454866 ], "wc_questions_avg": [ 57.6, 53.7497906972669 ], "wc_review_avg": [ 437.0, 248.00645152898744 ], "wc_reply_reviewers_avg": [ 196.0, 189.2902533148498 ], "wc_reply_authors_avg": [ 2843.2, 1828.4883811498503 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 6.0, 2.8284271247461903 ], "replies_avg": [ 42, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.05025189076296061, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18220756047687730171&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=5KojubHBr8", "pdf": "https://openreview.net/pdf?id=5KojubHBr8", "email": "pku.edu.cn;;pku.edu.cn;;pku.edu.cn;pku.edu.cn;uw.edu;uw.edu;bjtu.edu.cn;pku.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;1;1;2;0", "aff_unique_norm": "Peking University;University of Washington;Beijing Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.washington.edu;http://www.njtu.edu.cn/en", "aff_unique_abbr": "Peking U;UW;BJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;1;1;0;0", "aff_country_unique": "China;United States" }, { "id": "5LhYYajlqV", "title": "In-Context Unlearning: Language Models as Few Shot Unlearners", "track": "main", "status": "Reject", "tldr": "", "abstract": "Machine unlearning has garnered increased attention within regulatory contexts, driven by the need to comply with the \"Right to be Forgotten''. However, achieving precise unlearning is computationally infeasible for large models, particularly when dealing with large language models (LLMs). To this end, several algorithms which approximate the removal of training data without retraining the model have been proposed which rely on gradient ascent based model updates. In this work, we propose a new class of unlearning methods called \"In-Context Unlearning'' suitable for LLMs by providing inputs in context and without having to update model parameters. To unlearn a particular training instance, we provide the instance alongside a different label and additional correctly labelled instances as inputs to the LLM at inference time. Our experimental results across various text classification tasks demonstrate that these contexts effectively remove specific information from the training set while maintaining performance levels that are competitive with state-of-the-art unlearning methods that require access to the LLM parameters.", "keywords": "Machine unlearning;In-context unlearning;Right to be forgotten;Approximate data deletion", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/02cc08f0314c30511359ffc5f1219cd39d017f78.zip", "author": "Martin Pawelczyk;Seth Neel;Himabindu Lakkaraju", "authorids": "~Martin_Pawelczyk1;~Seth_Neel2;~Himabindu_Lakkaraju1", "gender": "M;F;M", "homepage": "https://sites.google.com/view/martinpawelczyk/;http://web.stanford.edu/~himalv;https://sethneel.com", "dblp": "251/3229;68/9376;188/6406", "google_scholar": "oYAf_hgAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Martin_Pawelczyk1;~Hima_Lakkaraju1;~Seth_Neel1", "aff": "Harvard University;Harvard University;Harvard University", "aff_domain": "harvard.edu;harvard.edu;harvard.edu", "position": "Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@misc{\npawelczyk2024incontext,\ntitle={In-Context Unlearning: Language Models as Few Shot Unlearners},\nauthor={Martin Pawelczyk and Seth Neel and Himabindu Lakkaraju},\nyear={2024},\nurl={https://openreview.net/forum?id=5LhYYajlqV}\n}", "github": "", "project": "", "reviewers": "fKNk;Awru;poKX", "site": "https://openreview.net/forum?id=5LhYYajlqV", "pdf_size": 15565023, "rating": "5;5;6", "confidence": "4;3;2", "soundness": "3;3;3", "contribution": "3;2;3", "presentation": "2;3;3", "wc_summary": "240;54;61", "wc_strengths": "90;81;24", "wc_weaknesses": "600;216;32", "wc_questions": "311;13;89", "wc_review": "1241;364;206", "wc_reply_reviewers": "130;0;0", "wc_reply_authors": "2149;1608;659", "reply_reviewers": "1;0;0", "reply_authors": "4;3;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 118.33333333333333, 86.07877529075071 ], "wc_strengths_avg": [ 65.0, 29.223278392404914 ], "wc_weaknesses_avg": [ 282.6666666666667, 236.62816588244286 ], "wc_questions_avg": [ 137.66666666666666, 126.43136038534463 ], "wc_review_avg": [ 603.6666666666666, 455.2554838280979 ], "wc_reply_reviewers_avg": [ 43.333333333333336, 61.282587702834114 ], "wc_reply_authors_avg": [ 1472.0, 615.844677387624 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 113, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5161391496565915695&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multiscale Positive-Unlabeled Detection of AI-Generated Texts", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19428", "id": "5Lp6qU9hzV", "author_site": "Yuchuan Tian, Hanting Chen, Xutao Wang, Zheyuan Bai, QINGHUA ZHANG, Ruifeng Li, Chao Xu, Yunhe Wang", "tldr": "", "abstract": "Recent releases of Large Language Models (LLMs), e.g. ChatGPT, are astonishing at generating human-like texts, but they may impact the authenticity of texts. Previous works proposed methods to detect these AI-generated texts, including simple ML classifiers, pretrained-model-based zero-shot methods, and finetuned language classification models. However, mainstream detectors always fail on short texts, like SMSes, Tweets, and reviews. In this paper, a Multiscale Positive-Unlabeled (MPU) training framework is proposed to address the difficulty of short-text detection without sacrificing long-texts. Firstly, we acknowledge the human-resemblance property of short machine texts, and rephrase AI text detection as a partial Positive-Unlabeled (PU) problem by regarding these short machine texts as partially \"unlabeled\". Then in this PU context, we propose the length-sensitive Multiscale PU Loss, where a recurrent model in abstraction is used to estimate positive priors of scale-variant corpora. Additionally, we introduce a Text Multiscaling module to enrich training corpora. Experiments show that our MPU method augments detection performance on long AI-generated texts, and significantly improves short-text detection of language model detectors. Language Models trained with MPU could outcompete existing detectors on various short-text and long-text detection benchmarks. The codes are available at https://github.com/mindspore-lab/mindone/tree/master/examples/detect_chatgpt and https://github.com/YuchuanTian/AIGC_text_detector.", "keywords": "Large Language Models;AI-Generated Texts;Positive-Unlabeled Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/c5966f7285ade08785ad31f1ed8f09767d83fc05.pdf", "author": "Yuchuan Tian;Hanting Chen;Xutao Wang;Zheyuan Bai;QINGHUA ZHANG;Ruifeng Li;Chao Xu;Yunhe Wang", "authorids": "~Yuchuan_Tian1;~Hanting_Chen1;~Xutao_Wang1;~Zheyuan_Bai2;~QINGHUA_ZHANG1;~Ruifeng_Li3;~Chao_Xu1;~Yunhe_Wang1", "gender": "M;M;M;M;F;M;M;M", "homepage": ";;;;https://www.facebook.com/qinghua.zhang.395;https://www.zhihu.com/people/risenberg;http://www.cis.pku.edu.cn/faculty/vision/xuchao/xuchao01.htm;https://www.wangyunhe.site/", "dblp": "193/6675;232/2060;;;;;;63/8217-1", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;;;https://scholar.google.co.uk/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ", "orcid": ";;;;;;;0000-0002-0142-509X", "linkedin": ";;;zheyuanbai/;;;;", "or_profile": "~Yuchuan_Tian1;~Hanting_Chen1;~Xutao_Wang1;~Zheyuan_Bai2;~QINGHUA_ZHANG1;~Ruifeng_Li3;~Chao_Xu1;~Yunhe_Wang1", "aff": "Peking University;Huawei Technologies Ltd.;;Huawei Technologies Ltd.;;;Peking University;Huawei Noah's Ark Lab", "aff_domain": "pku.edu.cn;huawei.com;;huawei.com;;;pku.edu;huawei.com", "position": "PhD student;Researcher;;Researcher;;;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\ntian2024multiscale,\ntitle={Multiscale Positive-Unlabeled Detection of {AI}-Generated Texts},\nauthor={Yuchuan Tian and Hanting Chen and Xutao Wang and Zheyuan Bai and QINGHUA ZHANG and Ruifeng Li and Chao Xu and Yunhe Wang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5Lp6qU9hzV}\n}", "github": "", "project": "", "reviewers": "vhmU;PQsn;scbq;o5iQ", "pdf_size": 351998, "rating": "6;6;6;8", "confidence": "4;5;4;2", "soundness": "3;3;3;3", "contribution": "4;3;3;3", "presentation": "3;2;4;3", "wc_summary": "84;157;100;83", "wc_strengths": "94;66;88;97", "wc_weaknesses": "139;169;81;120", "wc_questions": "4;18;108;99", "wc_review": "321;410;377;399", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "130;733;614;383", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 106.0, 30.20761493398643 ], "wc_strengths_avg": [ 86.25, 12.132085558550928 ], "wc_weaknesses_avg": [ 127.25, 31.90905044027478 ], "wc_questions_avg": [ 57.25, 46.622821664931436 ], "wc_review_avg": [ 376.75, 34.310166131920724 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 465.0, 230.74553083429373 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11859119720528962574&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=5Lp6qU9hzV", "pdf": "https://openreview.net/pdf?id=5Lp6qU9hzV", "email": "pku.edu.cn;huawei.com;;huawei.com;;;pku.edu;huawei.com", "author_num": 8, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Peking University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Peking U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "5Lt6sBoemN", "title": "When and Why Momentum Accelerates SGD: An Empirical Study", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Momentum has become a crucial component in deep learning optimizers, necessitating a comprehensive understanding of when and why it accelerates stochastic gradient descent (SGD). To address the question of ''when'', we establish a meaningful comparison framework that examines the performance of SGD with Momentum (SGDM) under the \\emph{effective learning rates} $\\eta_{ef}$, and offers a holistic view of the momentum acceleration effect. In the comparison of SGDM and SGD with the same effective learning rate and the same batch size, we observe a consistent pattern: when $\\eta_{ef}$ is small, SGDM and SGD experience almost the same empirical training losses; when $\\eta_{ef}$ surpasses a certain threshold, SGDM begins to perform better than SGD. Furthermore, we observe that the advantage of SGDM over SGD becomes more pronounced with a larger batch size. For the question of ``why'', we find that the momentum acceleration is closely related to \\emph{edge of stability} (EoS), a recently discovered phenomenon describing that the sharpness (largest eigenvalue of the Hessian) of the training trajectory often oscillates around the stability threshold. Specifically, the misalignment between SGD and SGDM happens at the same moment that SGD enters the EoS regime and converges slower. Momentum improves the performance of SGDM by preventing or deferring the occurrence of EoS. Together, this study unveils the interplay between momentum, learning rates, and batch sizes, thus improving our understanding of momentum acceleration.", "keywords": "Momentum;Optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Jingwen Fu;Bohan Wang;Huishuai Zhang;Zhizheng Zhang;Zhi-Ming Ma;Wei Chen;Nanning Zheng", "authorids": "~Jingwen_Fu1;~Bohan_Wang1;~Huishuai_Zhang3;~Zhizheng_Zhang1;~Zhi-Ming_Ma1;~Wei_Chen1;~Nanning_Zheng1", "gender": "M;M;M;;F;M;M", "homepage": "https://www.jw-fu.cn/;https://bhwangfy.github.io/;;http://homepage.amss.ac.cn/research/homePage/8eb59241e2e74d828fb84eec0efadba5/myHomePage.html;https://weichen-cas.github.io/;;https://huishuai-git.github.io", "dblp": "247/5290;202/1184;67/4758;;;07/256-1;144/7537", "google_scholar": ";LfkHCEUAAAAJ;X7M0I8kAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;w1srHyIAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Jingwen_Fu1;~Bohan_Wang1;~Zhizheng_Zhang1;~Zhi-Ming_Ma1;~Wei_Chen1;~Nanning_Zheng1;~Huishuai_Zhang2", "aff": "Microsoft;Microsoft Research Asia, University of Science and Technology of China;Beijing Galbot Co., Ltd;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences; Chinese Academy of Sciences;Xi'an Jiaotong University;Peking University", "aff_domain": "microsoft.com;ustc.edu.cn;galbot.com;amss.ac.cn;ict.ac.cn;xjtu.edu.cn;pku.edu.cn", "position": "Intern;PhD student;Principal Researcher;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nfu2024when,\ntitle={When and Why Momentum Accelerates {SGD}: An Empirical Study},\nauthor={Jingwen Fu and Bohan Wang and Huishuai Zhang and Zhizheng Zhang and Zhi-Ming Ma and Wei Chen and Nanning Zheng},\nyear={2024},\nurl={https://openreview.net/forum?id=5Lt6sBoemN}\n}", "github": "", "project": "", "reviewers": "GteF;PhKt;Dpus;euxD", "site": "https://openreview.net/forum?id=5Lt6sBoemN", "pdf_size": 1628764, "rating": "3;3;3;6", "confidence": "4;3;4;3", "soundness": "2;2;1;3", "contribution": "2;2;2;3", "presentation": "1;2;3;3", "wc_summary": "81;10;82;112", "wc_strengths": "42;44;21;61", "wc_weaknesses": "767;230;248;145", "wc_questions": "86;73;5;39", "wc_review": "976;357;356;357", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 71.25, 37.49249924984996 ], "wc_strengths_avg": [ 42.0, 14.19506956657839 ], "wc_weaknesses_avg": [ 347.5, 245.30236444029643 ], "wc_questions_avg": [ 50.75, 31.499007920885383 ], "wc_review_avg": [ 511.5, 268.179510775898 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7255542862177271988&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;2;3;4", "aff_unique_norm": "Microsoft;Galbot;Chinese Academy of Sciences;Xi'an Jiao Tong University;Peking University", "aff_unique_dep": "Microsoft Corporation;;Academy of Mathematics and Systems Science;;", "aff_unique_url": "https://www.microsoft.com;;http://www.cas.cn;https://www.xjtu.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "Microsoft;;CAS;XJTU;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "United States;China" }, { "id": "5M2MjyNR2w", "title": "Adaptive Expansion for Hypergraph Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Hypergraph, with its powerful ability to capture higher-order complex relationships, has attracted substantial attention recently. Consequently, an increasing number of hypergraph neural networks (HyGNNs) have emerged to model the high-order relationships among nodes and hyperedges. In general, most HyGNNs leverage typical expansion methods, such as clique expansion (CE), to convert hypergraphs into graphs for representation learning. However, they still face the following limitations in hypergraph expansion: (i) Some expansion methods expand hypergraphs in a straightforward manner, resulting in information loss and redundancy; (ii) Most expansion methods often employ fixed edge weights while ignoring the fact that nodes having similar attribute features within the same hyperedge are more likely to be connected compared with nodes with dissimilar features. In light of these challenges, we design a novel CE-based \\textbf{Ad}aptive \\textbf{E}xpansion method called \\textbf{AdE} to expand hypergraphs into weighted graphs that preserve the higher-order hypergraph structure information. Specifically, we first introduce a Global Simulation Network to pick two representative nodes for symbolizing each hyperedge in an adaptive manner. We then connect the rest of the nodes within the same hyperedge to the corresponding selected nodes. Instead of leveraging the fixed edge weights, we further design a distance-aware kernel function to dynamically adjust the edge weights to make sure that node pairs having similar attribute features within the corresponding hyperedge are more likely to be connected with large weights. After obtaining the adaptive weighted graphs, we employ graph neural networks to model the rich relationships among nodes for downstream tasks. Extensive theoretical justifications and empirical experiments over five benchmark hypergraph datasets demonstrate that AdE has excellent rationality, generalization, and effectiveness compared to classic expansion models.", "keywords": "Hypergraph;Hypergraph Expansion.", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/1533602913af5d7fadac0d73ba5241d98959bb5d.zip", "author": "Yiyue Qian;Tianyi Ma;Chuxu Zhang;Yanfang Ye", "authorids": "~Yiyue_Qian2;~Tianyi_Ma3;~Chuxu_Zhang2;~Yanfang_Ye1", "gender": ";M;;", "homepage": "https://yiyueqian.github.io/;https://tianyi-billy-ma.github.io/;;http://yes-lab.org/", "dblp": "261/9059;;;", "google_scholar": "c6c81_kAAAAJ;https://scholar.google.com/citations?hl=en;;egjr888AAAAJ", "orcid": "0000-0001-7924-5438;0009-0003-3790-8469;;", "linkedin": "yiyue-qian-224655212/;Tianyi-Ma-03BILLY;;", "or_profile": "~Yiyue_Qian2;~Tianyi_Ma3;~Chuxu_Zhang2;~Yanfang_Ye1", "aff": "University of Notre Dame;University of Notre Dame;;University of Notre Dame", "aff_domain": "nd.edu;nd.edu;;nd.edu", "position": "PhD student;PhD student;;Associate Professor", "bibtex": "@misc{\nqian2024adaptive,\ntitle={Adaptive Expansion for Hypergraph Learning},\nauthor={Yiyue Qian and Tianyi Ma and Chuxu Zhang and Yanfang Ye},\nyear={2024},\nurl={https://openreview.net/forum?id=5M2MjyNR2w}\n}", "github": "", "project": "", "reviewers": "qSDB;7Lez;1ZMR;UEpe", "site": "https://openreview.net/forum?id=5M2MjyNR2w", "pdf_size": 3900555, "rating": "3;5;6;8", "confidence": "5;3;3;5", "soundness": "2;3;2;3", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "44;184;55;104", "wc_strengths": "70;54;47;112", "wc_weaknesses": "223;229;132;175", "wc_questions": "2;2;113;73", "wc_review": "339;469;347;464", "wc_reply_reviewers": "669;359;34;23", "wc_reply_authors": "4815;3715;3357;1729", "reply_reviewers": "2;2;1;1", "reply_authors": "12;8;6;4", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 96.75, 55.20586472468301 ], "wc_strengths_avg": [ 70.75, 25.232667318379164 ], "wc_weaknesses_avg": [ 189.75, 39.36607041603213 ], "wc_questions_avg": [ 47.5, 47.64714052280577 ], "wc_review_avg": [ 404.75, 61.84001536222319 ], "wc_reply_reviewers_avg": [ 271.25, 266.37414945898934 ], "wc_reply_authors_avg": [ 3404.0, 1106.2861293535232 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 7.5, 2.958039891549808 ], "replies_avg": [ 43, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11623038287928215160&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "5MlPrLO52d", "title": "Neural Tangent Kernels for Axis-Aligned Tree Ensembles", "track": "main", "status": "Reject", "tldr": "", "abstract": "While axis-aligned rules are known to induce an important inductive bias in machine learning models such as typical hard decision tree ensembles, theoretical understanding of the learning behavior is largely unrevealed due to the discrete nature of rules. To address this issue, we impose the axis-aligned constraint on soft trees, which relax the splitting process of decision trees and are trained using a gradient method, and present their Neural Tangent Kernel (NTK) that enables us to analytically describe the training behavior. We study two cases: imposing the axis-aligned constraint throughout the entire training process, or only at the initial state. Moreover, we extend the NTK framework to handle various tree architectures simultaneously, and prove that any axis-aligned non-oblivious tree ensemble can be transformed into an axis-aligned oblivious tree ensemble with the same NTK. \nOne can search for suitable tree architecture via Multiple Kernel Learning (MKL), and our numerical experiments show a variety of suitable features depending on the type of constraints, which supports not only the theoretical but also the practical impact of the axis-aligned constraint in tree ensemble learning.", "keywords": "Neural Tangent Kernel;Tree Ensemble;Soft Tree", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/f53cb91411a9db9a0dce7d9535b183654b58b6be.zip", "author": "Ryuichi Kanoh;Mahito Sugiyama", "authorids": "~Ryuichi_Kanoh1;~Mahito_Sugiyama1", "gender": "M;M", "homepage": ";https://mahito.nii.ac.jp/", "dblp": "287/4416;05/8421", "google_scholar": ";qLlRvTkAAAAJ", "orcid": ";0000-0001-5907-9831", "linkedin": "ryuichi-kanoh-43ab4316b/;", "or_profile": "~Ryuichi_Kanoh1;~Mahito_Sugiyama1", "aff": "NII, the Graduate University for Advanced Studies;National Institute of Informatics", "aff_domain": "nii.ac.jp;nii.ac.jp", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nkanoh2024neural,\ntitle={Neural Tangent Kernels for Axis-Aligned Tree Ensembles},\nauthor={Ryuichi Kanoh and Mahito Sugiyama},\nyear={2024},\nurl={https://openreview.net/forum?id=5MlPrLO52d}\n}", "github": "", "project": "", "reviewers": "QWBa;PQpi;Nxzp;w3rN", "site": "https://openreview.net/forum?id=5MlPrLO52d", "pdf_size": 2158884, "rating": "5;5;6;6", "confidence": "4;3;3;2", "soundness": "3;4;3;3", "contribution": "2;3;3;3", "presentation": "3;2;4;3", "wc_summary": "23;40;133;116", "wc_strengths": "18;61;76;77", "wc_weaknesses": "97;450;103;7", "wc_questions": "59;146;65;29", "wc_review": "197;697;377;229", "wc_reply_reviewers": "421;449;0;0", "wc_reply_authors": "1655;2032;631;93", "reply_reviewers": "2;2;0;0", "reply_authors": "3;3;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.0, 47.270498199193966 ], "wc_strengths_avg": [ 58.0, 23.947860029656095 ], "wc_weaknesses_avg": [ 164.25, 169.3035365844435 ], "wc_questions_avg": [ 74.75, 43.338060639581 ], "wc_review_avg": [ 375.0, 197.9191754226962 ], "wc_reply_reviewers_avg": [ 217.5, 217.72517080025452 ], "wc_reply_authors_avg": [ 1102.75, 776.3099815795234 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:a8X-VfUeUuEJ:scholar.google.com/&scioq=Neural+Tangent+Kernels+for+Axis-Aligned+Tree+Ensembles&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "National Institute of Informatics", "aff_unique_dep": "", "aff_unique_url": "https://www.nii.ac.jp", "aff_unique_abbr": "NII", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "5NJzNAXAmx", "title": "Informed POMDP: Leveraging Additional Information in Model-Based RL", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this work, we generalize the problem of learning through interaction in a POMDP by accounting for eventual additional information available at training time. First, we introduce the informed POMDP, a new learning paradigm offering a clear distinction between the training information and the execution observation. Next, we propose an objective that leverages this information for learning a sufficient statistic of the history for the optimal control. We then adapt this informed objective to learn a world model able to sample latent trajectories. Finally, we empirically show a significant learning speed improvement in most environments using this informed world model in the Dreamer algorithm. These results and the simplicity of the proposed adaptation advocate for a systematic consideration of eventual additional information when learning in a POMDP using model-based RL.", "keywords": "POMDP;RNN;Sufficient Statistic;Model-Based;Privileged Information;Asymmetric Learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Gaspard Lambrechts;Adrien Bolland;Damien Ernst", "authorids": "~Gaspard_Lambrechts1;~Adrien_Bolland1;~Damien_Ernst1", "gender": "M;;M", "homepage": "https://gsprd.be;;http://www.damien-ernst.be", "dblp": ";;", "google_scholar": "LRcgg2wAAAAJ;8m9bl5oAAAAJ;https://scholar.google.be/citations?user=91ZxYSsAAAAJ", "orcid": ";;", "linkedin": ";adrien-bolland-48497218a/;", "or_profile": "~Gaspard_Lambrechts1;~Adrien_Bolland1;~Damien_Ernst1", "aff": "Universit\u00e9 de Li\u00e8ge;University of Liege;University of Li\u00e8ge", "aff_domain": "ulg.ac.be;ulg.ac.be;uliege.be", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nlambrechts2024informed,\ntitle={Informed {POMDP}: Leveraging Additional Information in Model-Based {RL}},\nauthor={Gaspard Lambrechts and Adrien Bolland and Damien Ernst},\nyear={2024},\nurl={https://openreview.net/forum?id=5NJzNAXAmx}\n}", "github": "", "project": "", "reviewers": "EdBW;1Mpb;cAA6;SwZJ", "site": "https://openreview.net/forum?id=5NJzNAXAmx", "pdf_size": 868987, "rating": "5;6;6;6", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "contribution": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "170;227;101;90", "wc_strengths": "21;89;133;80", "wc_weaknesses": "456;206;138;213", "wc_questions": "101;1;104;97", "wc_review": "748;523;476;480", "wc_reply_reviewers": "54;33;0;17", "wc_reply_authors": "1782;678;838;1079", "reply_reviewers": "1;1;0;1", "reply_authors": "4;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 147.0, 55.43915583772899 ], "wc_strengths_avg": [ 80.75, 39.90222424878092 ], "wc_weaknesses_avg": [ 253.25, 120.66767379874364 ], "wc_questions_avg": [ 75.75, 43.22831826476714 ], "wc_review_avg": [ 556.75, 111.94501998749207 ], "wc_reply_reviewers_avg": [ 26.0, 19.937402037376884 ], "wc_reply_authors_avg": [ 1094.25, 421.94808626180543 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13697808195694192667&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1", "aff_unique_norm": "Universit\u00e9 de Li\u00e8ge;University of Li\u00e8ge", "aff_unique_dep": ";", "aff_unique_url": "https://www.ulg.ac.be;https://www.ulg.ac.be", "aff_unique_abbr": "ULi\u00e8ge;ULi\u00e8ge", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Belgium" }, { "title": "PandaLM: An Automatic Evaluation Benchmark for LLM Instruction Tuning Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19427", "id": "5Nn2BLV7SB", "author_site": "Yidong Wang, Zhuohao Yu, Zhengran Zeng, Linyi Yang, Wenjin Yao, Cunxiang Wang, Hao Chen, Chaoya Jiang, Rui Xie, Jindong Wang, Xing Xie, Wei Ye, Shikun Zhang, Yue Zhang", "tldr": "", "abstract": "Instruction tuning large language models (LLMs) remains a challenging task, owing to the complexity of hyperparameter selection and the difficulty involved in evaluating the tuned models. To determine the optimal hyperparameters, an automatic, robust, and reliable evaluation benchmark is essential. However, establishing such a benchmark is not a trivial task due to the challenges associated with evaluation accuracy and privacy protection. In response to these challenges, we introduce a judge large language model, named PandaLM, which is trained to distinguish the superior model given several LLMs. PandaLM's focus extends beyond just the objective correctness of responses, which is the main focus of traditional evaluation datasets. It addresses vital subjective factors such as relative conciseness, clarity, adherence to instructions, comprehensiveness, and formality. To ensure the reliability of PandaLM, we collect a diverse human-annotated test dataset, where all contexts are generated by humans and labels are aligned with human preferences. Our findings reveal that PandaLM-7B offers a performance comparable to both GPT-3.5 and GPT-4. Impressively, PandaLM-70B surpasses their performance. PandaLM enables the evaluation of LLM to be fairer but with less cost, evidenced by significant improvements achieved by models tuned through PandaLM compared to their counterparts trained with default Alpaca's hyperparameters. In addition, PandaLM does not depend on API-based evaluations, thus avoiding potential data leakage.", "keywords": "LLM evaluation", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/4400e4f0f67603849e18132665f6c3cb9cd1ce73.pdf", "author": "Yidong Wang;Zhuohao Yu;Wenjin Yao;Zhengran Zeng;Linyi Yang;Cunxiang Wang;Hao Chen;Chaoya Jiang;Rui Xie;Jindong Wang;Xing Xie;Wei Ye;Shikun Zhang;Yue Zhang", "authorids": "~Yidong_Wang1;~Zhuohao_Yu1;~Wenjin_Yao1;~Zhengran_Zeng2;~Linyi_Yang1;~Cunxiang_Wang1;~Hao_Chen15;~Chaoya_Jiang1;~Rui_Xie2;~Jindong_Wang1;~Xing_Xie3;~Wei_Ye2;~Shikun_Zhang2;~Yue_Zhang7", "gender": "M;;M;;;Not Specified;M;;M;;M;M;M;M", "homepage": "https://qianlanwyd.github.io/;;;;https://yanglinyi.github.io/;https://wangcunxiang.github.io/;https://hhhhhhao.github.io/;;;;http://research.microsoft.com/en-us/people/xingx/;https://se.pku.edu.cn/kcl/weiye/;;http://frcchang.github.io", "dblp": "59/6759.html;;;;218/8007;213/1862.html;;;86/2228-3.html;;08/6809-1;09/5394-4;83/3715.html;47/722-4", "google_scholar": ";;FdntfpkAAAAJ;;go3sFxcAAAAJ;https://scholar.google.com.sg/citations?hl=en;tktqkhwAAAAJ;;6PcaSxgAAAAJ;;5EQfAFIAAAAJ;RgLGFMIAAAAJ;uiklLscAAAAJ;", "orcid": ";;;;;;;;;;0000-0002-8608-8482;;;0000-0002-5214-2268", "linkedin": ";;;;;;haochen97/;;;;xingx/;;;", "or_profile": "~Yidong_Wang1;~Zhuohao_Yu1;~Wenjin_Yao1;~Zhengran_Zeng2;~Linyi_Yang1;~Cunxiang_Wang1;~Hao_Chen15;~Chaoya_Jiang1;~Rui_Xie2;~Jindong_Wang1;~Xing_Xie3;~Wei_Ye2;~Shikun_Zhang2;~Yue_Zhang7", "aff": "Peking University;;Peking University;;Westlake University;Westlake University;Carnegie Mellon University;;Peking University;;Microsoft Research Asia;Peking University;Peking University;Westlake University", "aff_domain": "pku.edu.cn;;pku.edu.cn;;westlake.edu.cn;westlake.edu.cn;andrew.cmu.edu;;pku.edu.cn;;microsoft.com;pku.edu.cn;pku.edu.cn;westlake.edu.cn", "position": "PhD student;;MS student;;Researcher;PhD student;PhD student;;Postdoc;;Senior Principal Researcher;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024pandalm,\ntitle={Panda{LM}: An Automatic Evaluation Benchmark for {LLM} Instruction Tuning Optimization},\nauthor={Yidong Wang and Zhuohao Yu and Wenjin Yao and Zhengran Zeng and Linyi Yang and Cunxiang Wang and Hao Chen and Chaoya Jiang and Rui Xie and Jindong Wang and Xing Xie and Wei Ye and Shikun Zhang and Yue Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5Nn2BLV7SB}\n}", "github": "", "project": "", "reviewers": "8MGB;FMz2;Vuch", "pdf_size": 3818880, "rating": "5;8;8", "confidence": "4;3;3", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "2;3;3", "wc_summary": "99;182;86", "wc_strengths": "77;186;122", "wc_weaknesses": "162;165;137", "wc_questions": "283;126;6", "wc_review": "621;659;351", "wc_reply_reviewers": "0;17;0", "wc_reply_authors": "1942;654;510", "reply_reviewers": "0;1;0", "reply_authors": "3;1;1", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 122.33333333333333, 42.52319628417203 ], "wc_strengths_avg": [ 128.33333333333334, 44.723844000960185 ], "wc_weaknesses_avg": [ 154.66666666666666, 12.552113589175153 ], "wc_questions_avg": [ 138.33333333333334, 113.42055467252055 ], "wc_review_avg": [ 543.6666666666666, 137.11633341396237 ], "wc_reply_reviewers_avg": [ 5.666666666666667, 8.013876853447538 ], "wc_reply_authors_avg": [ 1035.3333333333333, 643.7998308653259 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 227, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13482765834651698548&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5Nn2BLV7SB", "pdf": "https://openreview.net/pdf?id=5Nn2BLV7SB", "email": "pku.edu.cn;;pku.edu.cn;;westlake.edu.cn;westlake.edu.cn;andrew.cmu.edu;;pku.edu.cn;;microsoft.com;pku.edu.cn;pku.edu.cn;westlake.edu.cn", "author_num": 14, "aff_unique_index": "0;0;1;1;2;0;3;0;0;1", "aff_unique_norm": "Peking University;Westlake University;Carnegie Mellon University;Microsoft", "aff_unique_dep": ";;;Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.westlake.edu.cn;https://www.cmu.edu;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "Peking U;WU;CMU;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "5Osk2F3qCn", "title": "TADIS: Steering Models for Deep-Thinking about Demonstration Examples", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Instruction tuning has been demonstrated that could significantly improve the zero-shot generalization capability to unseen tasks by an apparent margin. By incorporating additional context (e.g., task definition, examples) during the fine-tuning process, Large Language Models (LLMs) achieved much higher performance than before. However, recent work reported that delusive task examples can achieve almost the same performance as correct task examples, indicating the input-label correspondence is less important than previously thought. Intrigued by this counter-intuitive observation, we suspect models have the same illusion of competence as humans. Therefore, we propose a novel method called TADIS that steers LLMs for \"Deep-Thinking'' about demonstration examples instead of merely seeing. To alleviate the illusion of competence of models, we first ask the model to verify the correctness of shown examples. Then, using the verification results as conditions to elicit models for a better answer. Our experimental results show that TADIS consistently outperforms competitive baselines on in-domain and out-domain tasks (improving 2.79 and 4.03 average ROUGLE-L on out-domain and in-domain datasets, respectively). \nDespite the presence of generated examples (not all of the thinking labels are accurate), TADIS can notably enhance performance in zero-shot and few-shot settings. This also suggests that our approach can be adopted on a large scale to improve the instruction following capabilities of models without any manual labor. Moreover, we construct three types of thinking labels with different model sizes and find that small models learn from the format of TADIS but larger models can be steered for \"Deep-Thinking''.", "keywords": "Instruction Tuning;Language Model;Few-Shot", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/6c070efd803b8a4afa978472dde5fd91f3bb1ed9.zip", "author": "Tianci Xue;Ziqi Wang;Yixia Li;Yun Chen;Guanhua Chen", "authorids": "~Tianci_Xue1;~Ziqi_Wang2;~Yixia_Li1;~Yun_Chen1;~Guanhua_Chen1", "gender": "M;;M;F;M", "homepage": "https://xuetianci.github.io/;https://www.wzq016.github.io;https://liyixia.me;https://yunc.me/;https://ghchen.me", "dblp": "347/9360;38/8097-3;257/2679;10/5680-7;85/3682-1", "google_scholar": "wVFSbzkAAAAJ;xYRZiZkAAAAJ;LrYjRNYAAAAJ;vXd0JQMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0002-0921-7551;0000-0002-3563-7592;0000-0002-5353-9734", "linkedin": ";;liyixia/;;", "or_profile": "~Tianci_Xue1;~Ziqi_Wang2;~Yixia_Li1;~Yun_Chen1;~Guanhua_Chen1", "aff": "Nanjing University;Meta Facebook;Southern University of Science and Technology ;Shanghai University of Finance and Economics;Southern University of Science and Technology", "aff_domain": "nju.edu.cn;meta.com;sustech.edu;sufe.edu.cn;sustech.edu.cn", "position": "MS student;Intern;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nxue2024tadis,\ntitle={{TADIS}: Steering Models for Deep-Thinking about Demonstration Examples},\nauthor={Tianci Xue and Ziqi Wang and Yixia Li and Yun Chen and Guanhua Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=5Osk2F3qCn}\n}", "github": "", "project": "", "reviewers": "B4dK;Fers;B5Pw;8SUE", "site": "https://openreview.net/forum?id=5Osk2F3qCn", "pdf_size": 525928, "rating": "3;3;3;6", "confidence": "2;4;2;3", "soundness": "2;2;2;3", "contribution": "2;2;2;3", "presentation": "3;2;1;2", "wc_summary": "116;93;66;91", "wc_strengths": "60;152;104;42", "wc_weaknesses": "755;267;242;144", "wc_questions": "169;81;112;136", "wc_review": "1100;593;524;413", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.5, 17.698870020427858 ], "wc_strengths_avg": [ 89.5, 42.55290824373817 ], "wc_weaknesses_avg": [ 352.0, 237.16977041773262 ], "wc_questions_avg": [ 124.5, 32.25290684574028 ], "wc_review_avg": [ 657.5, 263.42408773686583 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2471280889710494543&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Nanjing University;Meta;Southern University of Science and Technology;Shanghai University of Finance and Economics", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://www.nju.edu.cn;https://meta.com;https://www.sustech.edu.cn;http://www.sufe.edu.cn", "aff_unique_abbr": "Nanjing U;Meta;SUSTech;SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "5PkgaUwiY0", "title": "VideoDirectorGPT: Consistent Multi-Scene Video Generation via LLM-Guided Planning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Although recent text-to-video (T2V) generation methods have seen significant advancements, the majority of these works focus on producing short video clips of a single event with a single background (i.e., single-scene videos). Meanwhile, recent large language models (LLMs) have demonstrated their capability in generating layouts and programs to control downstream visual modules such as image generation models. This prompts an important question: can we leverage the knowledge embedded in these LLMs for temporally consistent long video generation? In this paper, we propose VideoDirectorGPT, a novel framework for consistent multi-scene video generation that uses the knowledge of LLMs for video content planning and grounded video generation. Specifically, given a single text prompt, we first ask our video planner LLM (GPT-4) to expand it into a \u2018video plan\u2019, which involves generating the scene descriptions, the entities with their respective layouts, the background for each scene, and consistency groupings of the entities and backgrounds. Next, guided by this output from the video planner, our video generator, named Layout2Vid, has explicit control over spatial layouts and can maintain temporal consistency of entities/backgrounds across multiple scenes, while being trained only with image-level annotations. Our experiments demonstrate that our proposed VideoDirectorGPT framework substantially improves layout and movement control in both single- and multi-scene video generation and can generate multi-scene videos with visual consistency across scenes, while achieving competitive performance with SOTAs in open-domain single-scene text-to-video generation. We also demonstrate that our framework can dynamically control the strength for layout guidance and can also generate videos with user-provided images. We hope our framework can inspire future work on integrating the planning ability of LLMs into consistent long video generation.", "keywords": "Text-to-Video Generation;Large Language Models;Layout-Guided Video Generation;Temporal Consistency;Multi-Scene Video Generation;Layout Control", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/2436f0253ad2ca33bd87987c580d9851b9cb317b.zip", "author": "Han Lin;Abhay Zala;Jaemin Cho;Mohit Bansal", "authorids": "~Han_Lin1;~Abhay_Zala1;~Jaemin_Cho1;~Mohit_Bansal2", "gender": "M;;M;M", "homepage": "https://hl-hanlin.github.io/;;https://j-min.io;https://www.cs.unc.edu/~mbansal/", "dblp": ";278/2061.html;130/8348-1;32/5243.html", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;8mfWxD8AAAAJ;IbQZoHQAAAAJ;DN8QtscAAAAJ", "orcid": ";;0000-0002-1558-6169;", "linkedin": "han-lin-9336981a3/;;;", "or_profile": "~Han_Lin1;~Abhay_Zala1;~Jaemin_Cho1;~Mohit_Bansal2", "aff": "Department of Computer Science, University of North Carolina at Chapel Hill;Department of Computer Science, University of North Carolina at Chapel Hill;University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "cs.unc.edu;cs.unc.edu;unc.edu;unc.edu", "position": "PhD student;MS student;PhD student;Full Professor", "bibtex": "@misc{\nlin2024videodirectorgpt,\ntitle={VideoDirector{GPT}: Consistent Multi-Scene Video Generation via {LLM}-Guided Planning},\nauthor={Han Lin and Abhay Zala and Jaemin Cho and Mohit Bansal},\nyear={2024},\nurl={https://openreview.net/forum?id=5PkgaUwiY0}\n}", "github": "", "project": "", "reviewers": "tZEz;pdWu;dMSD;GLr8", "site": "https://openreview.net/forum?id=5PkgaUwiY0", "pdf_size": 18166678, "rating": "3;3;5;5", "confidence": "4;4;4;5", "soundness": "3;2;3;3", "contribution": "2;3;2;2", "presentation": "3;2;3;2", "wc_summary": "57;64;123;118", "wc_strengths": "21;78;137;48", "wc_weaknesses": "120;260;287;176", "wc_questions": "3;79;4;91", "wc_review": "201;481;551;433", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "350;710;0;744", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;0;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.5, 30.153772566629204 ], "wc_strengths_avg": [ 71.0, 43.110323589599744 ], "wc_weaknesses_avg": [ 210.75, 66.48825084178407 ], "wc_questions_avg": [ 44.25, 40.97178907492325 ], "wc_review_avg": [ 416.5, 131.30403649545585 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 451.0, 302.7094316337038 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11199615266521370931&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of North Carolina at Chapel Hill;University of North Carolina", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "UNC Chapel Hill;UNC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "TEDDY: Trimming Edges with Degree-based Discrimination Strategy", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19426", "id": "5RUf9nEdyC", "author_site": "Hyunjin Seo, Jihun Yun, Eunho Yang", "tldr": "", "abstract": "Since the pioneering work on the lottery ticket hypothesis for graph neural networks (GNNs) was proposed in Chen et al. (2021), the study on finding graph lottery tickets (GLT) has become one of the pivotal focus in the GNN community, inspiring researchers to discover sparser GLT while achieving comparable performance to original dense networks. In parallel, the graph structure has gained substantial attention as a crucial factor in GNN training dynamics, also elucidated by several recent studies. Despite this, contemporary studies on GLT, in general, have not fully exploited inherent pathways in the graph structure and identified tickets in an iterative manner, which is time-consuming and inefficient. To address these limitations, we introduce **TEDDY**, a one-shot edge sparsification framework that leverages structural information by incorporating *edge-degree* statistics. Following the edge sparsification, we encourage the parameter sparsity during training via simple projected gradient descent on the $\\ell_0$ ball. Given the target sparsity levels for both the graph structure and the model parameters, our TEDDY facilitates efficient and rapid realization of GLT within a *single* training. Remarkably, our experimental results demonstrate that TEDDY significantly surpasses conventional iterative approaches in generalization, even when conducting one-shot sparsification that solely utilizes graph structures, without taking feature information into account.", "keywords": "Graph Lottery Tickets; Graph Compression; Graph Sparsification; Graph Neural Networks", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/6b610ba0b8bced99d28ff3e7067ba884245d75c8.zip", "author": "Hyunjin Seo;Jihun Yun;Eunho Yang", "authorids": "~Hyunjin_Seo2;~Jihun_Yun2;~Eunho_Yang1", "gender": "F;M;M", "homepage": "https://github.com/hyunjin72;https://github.com/abcdxyzpqrst;https://sites.google.com/site/hleehome2/", "dblp": ";241/9676;96/2621", "google_scholar": "MFDOhRUAAAAJ;ELv5qfEAAAAJ;", "orcid": ";;", "linkedin": "hyunjin-seo-97525629a/?originalSubdomain=kr;;", "or_profile": "~Hyunjin_Seo2;~Jihun_Yun2;~Eunho_Yang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nseo2024teddy,\ntitle={{TEDDY}: Trimming Edges with Degree-based Discrimination Strategy},\nauthor={Hyunjin Seo and Jihun Yun and Eunho Yang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5RUf9nEdyC}\n}", "github": "", "project": "", "reviewers": "QTmY;k6Ge;mYP6;vxbf;SQ4D", "pdf_size": 9177078, "rating": "5;5;6;6;8", "confidence": "5;4;3;3;2", "soundness": "3;2;3;2;3", "contribution": "2;2;3;3;3", "presentation": "2;2;3;3;4", "wc_summary": "34;86;78;55;35", "wc_strengths": "42;25;90;18;31", "wc_weaknesses": "291;157;179;296;19", "wc_questions": "53;510;118;83;19", "wc_review": "420;778;465;452;104", "wc_reply_reviewers": "0;402;0;79;0", "wc_reply_authors": "1131;4190;937;1090;1190", "reply_reviewers": "0;3;0;1;0", "reply_authors": "4;13;2;3;3", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 57.6, 21.434551546510132 ], "wc_strengths_avg": [ 41.2, 25.639032743065794 ], "wc_weaknesses_avg": [ 188.4, 101.85794028940502 ], "wc_questions_avg": [ 156.6, 179.70264327493905 ], "wc_review_avg": [ 443.8, 213.6524280227117 ], "wc_reply_reviewers_avg": [ 96.2, 155.93126690949447 ], "wc_reply_authors_avg": [ 1707.6, 1244.0209966073724 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690602 ], "reply_authors_avg": [ 5.0, 4.049691346263317 ], "replies_avg": [ 39, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8951435925492911, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14447942716248132355&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=5RUf9nEdyC", "pdf": "https://openreview.net/pdf?id=5RUf9nEdyC", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Learning Adaptive Multiresolution Transforms via Meta-Framelet-based Graph Convolutional Network", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19425", "id": "5RielfrDkP", "author_site": "Tianze Luo, Zhanfeng Mo, Sinno Pan", "tldr": "", "abstract": "Graph Neural Networks are popular tools in graph representation learning that capture the graph structural properties. However, most GNNs employ single-resolution graph feature extraction, thereby failing to capture micro-level local patterns (high resolution) and macro-level graph cluster and community patterns (low resolution) simultaneously. Many multiresolution methods have been developed to capture graph patterns at multiple scales, but most of them depend on predefined and handcrafted multiresolution transforms that remain fixed throughout the training process once formulated. Due to variations in graph instances and distributions, fixed handcrafted transforms can not effectively tailor multiresolution representations to each graph instance. To acquire multiresolution representation suited to different graph instances and distributions, we introduce the Multiresolution Meta-Framelet-based Graph Convolutional Network (MM-FGCN), facilitating comprehensive and adaptive multiresolution analysis across diverse graphs. Extensive experiments demonstrate that our MM-FGCN achieves SOTA performance on various graph learning tasks.", "keywords": "Graph neural networks;graph multiresolution analysis", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/69b2419b7e2027cca1feb8016c59f2e8caabceec.zip", "author": "Tianze Luo;Zhanfeng Mo;Sinno Jialin Pan", "authorids": "~Tianze_Luo1;~Zhanfeng_Mo1;~Sinno_Jialin_Pan1", "gender": "Not Specified;M;M", "homepage": "https://ltz0120.github.io/;;http://www.cse.cuhk.edu.hk/~sinnopan/", "dblp": "297/4000;246/3205;80/5412", "google_scholar": "XROXNIMAAAAJ;DhN2kNoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-2254-8651;", "linkedin": ";zhanfeng-mo-5b2105198/;", "or_profile": "~Tianze_Luo1;~Zhanfeng_Mo1;~Sinno_Pan1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nluo2024learning,\ntitle={Learning Adaptive Multiresolution Transforms via Meta-Framelet-based Graph Convolutional Network},\nauthor={Tianze Luo and Zhanfeng Mo and Sinno Jialin Pan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5RielfrDkP}\n}", "github": "", "project": "", "reviewers": "PtSQ;nDyN;sJoC;oTNL", "pdf_size": 3633327, "rating": "6;6;8;8", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "contribution": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "32;21;92;151", "wc_strengths": "29;29;72;40", "wc_weaknesses": "164;77;161;29", "wc_questions": "8;74;29;37", "wc_review": "233;201;354;257", "wc_reply_reviewers": "0;17;118;103", "wc_reply_authors": "2217;3329;1404;1129", "reply_reviewers": "0;1;1;1", "reply_authors": "4;7;4;5", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.0, 52.02403290787826 ], "wc_strengths_avg": [ 42.5, 17.613914953808536 ], "wc_weaknesses_avg": [ 107.75, 57.32963893135906 ], "wc_questions_avg": [ 37.0, 23.843238035132728 ], "wc_review_avg": [ 261.25, 57.115562677785114 ], "wc_reply_reviewers_avg": [ 59.5, 51.62605931116571 ], "wc_reply_authors_avg": [ 2019.75, 855.2231799360914 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 5.0, 1.224744871391589 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4837618134532439165&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=5RielfrDkP", "pdf": "https://openreview.net/pdf?id=5RielfrDkP", "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "5T46w5X3Go", "title": "Theoretical Analysis on the Generalization Power of Overfitted Transfer Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Transfer learning is a useful technique for achieving improved performance and reducing training costs by leveraging the knowledge gained from source tasks and applying it to target tasks. Assessing the effectiveness of transfer learning relies on understanding the similarity between the ground truth of the source and target tasks. In real-world applications, tasks often exhibit partial similarity, where certain aspects are similar while others are different or irrelevant. To investigate the impact of partial similarity on transfer learning performance, we focus on a linear regression model with two distinct sets of features: a common part shared across tasks and a task-specific part. Our study explores various types of transfer learning, encompassing two options for parameter transfer. By establishing a theoretical characterization on the error of the learned model, we compare these transfer learning options, particularly examining how generalization performance changes with the number of features/parameters in both underparameterized and overparameterized regimes. Furthermore, we provide practical guidelines for determining the number of features in the common and task-specific parts for improved generalization performance. For example, when the total number of features in the source task's learning model is fixed, we show that it is more advantageous to allocate a greater number of redundant features to the task-specific part rather than the common part. Moreover, in specific scenarios, particularly those characterized by high noise levels and small true parameters, sacrificing certain true features in the common part in favor of employing more redundant features in the task-specific part can yield notable benefits.", "keywords": "transfer learning;generalization performance;overfitting;overparameterization;double descent", "primary_area": "learning theory", "supplementary_material": "", "author": "Peizhong Ju;Sen Lin;Mark S. Squillante;Yingbin Liang;Ness Shroff", "authorids": "~Peizhong_Ju1;~Sen_Lin1;~Mark_S._Squillante1;~Yingbin_Liang1;~Ness_Shroff1", "gender": "M;;M;F;M", "homepage": ";https://slin70.github.io/;https://researcher.watson.ibm.com/researcher/view.php?person=us-mss;https://sites.google.com/view/yingbinliang/home;http://newslab.ece.ohio-state.edu/", "dblp": "167/9021;70/9499-1.html;67/3865;51/332;67/1991", "google_scholar": "VDzpfOYAAAAJ;94-TbUsAAAAJ;;lGgLAiIAAAAJ;https://scholar.google.com.tw/citations?user=5kL-ZrAAAAAJ", "orcid": "0000-0002-4569-3539;;;;0000-0002-4606-6879", "linkedin": ";;;;nessshroff/", "or_profile": "~Peizhong_Ju1;~Sen_Lin1;~Mark_S._Squillante1;~Yingbin_Liang1;~Ness_Shroff1", "aff": "Ohio State University, Columbus;University of Houston;IBM Research;The Ohio State University;Ohio State University, Columbus", "aff_domain": "osu.edu;uh.edu;us.ibm.com;osu.edu;osu.edu", "position": "Postdoc;Assistant Professor;Distinguished Research Staff Member;Professor;Full Professor", "bibtex": "@misc{\nju2024theoretical,\ntitle={Theoretical Analysis on the Generalization Power of Overfitted Transfer Learning},\nauthor={Peizhong Ju and Sen Lin and Mark S. Squillante and Yingbin Liang and Ness Shroff},\nyear={2024},\nurl={https://openreview.net/forum?id=5T46w5X3Go}\n}", "github": "", "project": "", "reviewers": "hznS;kvri;onS3;6fkR", "site": "https://openreview.net/forum?id=5T46w5X3Go", "pdf_size": 486498, "rating": "3;3;5;8", "confidence": "4;4;3;3", "soundness": "2;3;3;4", "contribution": "1;2;2;4", "presentation": "1;2;2;4", "wc_summary": "137;115;105;107", "wc_strengths": "39;68;73;72", "wc_weaknesses": "66;261;372;59", "wc_questions": "86;77;17;64", "wc_review": "328;521;567;302", "wc_reply_reviewers": "0;0;21;0", "wc_reply_authors": "813;652;916;312", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 1.0897247358851685 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 116.0, 12.68857754044952 ], "wc_strengths_avg": [ 63.0, 13.982131454109563 ], "wc_weaknesses_avg": [ 189.5, 132.9482982215267 ], "wc_questions_avg": [ 61.0, 26.580067720004024 ], "wc_review_avg": [ 429.5, 116.01400777492346 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 673.25, 228.80709669938125 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8551861104941366, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:omXfKyZcMRAJ:scholar.google.com/&scioq=Theoretical+Analysis+on+the+Generalization+Power+of+Overfitted+Transfer+Learning&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Ohio State University;University of Houston;IBM", "aff_unique_dep": ";;IBM Research", "aff_unique_url": "https://www.osu.edu;https://www.uh.edu;https://www.ibm.com/research", "aff_unique_abbr": "OSU;UH;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5TlHjMVrNG", "title": "Evaluating Robustness to Unforeseen Adversarial Attacks", "track": "main", "status": "Reject", "tldr": "", "abstract": "When considering real-world adversarial settings, defenders are unlikely to have access to the full range of deployment-time adversaries during training, and adversaries are likely to use realistic adversarial distortions that will not be limited to small $L_p$-constrained perturbations. To narrow in on this discrepancy between research and reality we introduce eighteen novel adversarial attacks, which we use to create ImageNet-UA, a new benchmark for evaluating model robustness against a wide range of unforeseen adversaries. We make use of our benchmark to identify a range of defense strategies which can help overcome this generalization gap, finding a rich space of techniques which can improve unforeseen robustness. We hope the greater variety and realism of ImageNetUA will make it a useful tool for those working on real-world worst-case robustness, enabling development of more robust defenses which can generalize beyond attacks seen during training.", "keywords": "ML safety;adversarial robustness;distribution shift;unforeseen adversaries", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/c91af6fcdaedf9a37c1218d1d8f5a8de3396e91c.zip", "author": "Maximilian Kaufmann;Daniel Kang;Yi Sun;Xuwang Yin;Steven Basart;Mantas Mazeika;Adam Dziedzic;Akul Arora;Franziska Boenisch;Tom B Brown;Abhinav Kommula;Oliver Zhang;Jacob Steinhardt;Dan Hendrycks", "authorids": "~Maximilian_Kaufmann1;~Daniel_Kang1;~Yi_Sun3;~Xuwang_Yin2;~Steven_Basart1;~Mantas_Mazeika3;~Adam_Dziedzic1;~Akul_Arora1;~Franziska_Boenisch2;~Tom_B_Brown1;~Abhinav_Kommula1;~Oliver_Zhang1;~Jacob_Steinhardt1;~Dan_Hendrycks1", "gender": ";;;;;;;;;;;;;", "homepage": ";;;;;;;;;;;;;", "dblp": ";;;;;;;;;;;;;", "google_scholar": ";;;;;;;;;;;;;", "orcid": ";;;;;;;;;;;;;", "linkedin": ";;;;;;;;;;;;;", "or_profile": ";;;;;;;;;;;;;", "aff": ";;;;;;;;;;;;;", "aff_domain": ";;;;;;;;;;;;;", "position": ";;;;;;;;;;;;;", "bibtex": "@misc{\nkaufmann2024evaluating,\ntitle={Evaluating Robustness to Unforeseen Adversarial Attacks},\nauthor={Maximilian Kaufmann and Daniel Kang and Yi Sun and Xuwang Yin and Steven Basart and Mantas Mazeika and Adam Dziedzic and Akul Arora and Franziska Boenisch and Tom B Brown and Abhinav Kommula and Oliver Zhang and Jacob Steinhardt and Dan Hendrycks},\nyear={2024},\nurl={https://openreview.net/forum?id=5TlHjMVrNG}\n}", "github": "", "project": "", "reviewers": "PXCR;Sze7;aca9;hiUg", "site": "https://openreview.net/forum?id=5TlHjMVrNG", "pdf_size": 10796350, "rating": "3;3;6;8", "confidence": "4;4;4;4", "soundness": "3;1;3;4", "contribution": "2;1;3;3", "presentation": "3;2;2;2", "wc_summary": "75;67;109;64", "wc_strengths": "88;64;101;78", "wc_weaknesses": "567;71;121;343", "wc_questions": "381;716;58;27", "wc_review": "1111;918;389;512", "wc_reply_reviewers": "265;464;0;20", "wc_reply_authors": "1312;2877;430;385", "reply_reviewers": "1;1;0;1", "reply_authors": "2;5;1;2", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.75, 17.92170471802278 ], "wc_strengths_avg": [ 82.75, 13.5531361684298 ], "wc_weaknesses_avg": [ 275.5, 196.98921290263587 ], "wc_questions_avg": [ 295.5, 279.56618178885657 ], "wc_review_avg": [ 732.5, 293.37902106319734 ], "wc_reply_reviewers_avg": [ 187.25, 190.8341884988117 ], "wc_reply_authors_avg": [ 1251.0, 1008.9095598714486 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yZsZf28pFSgJ:scholar.google.com/&scioq=Evaluating+Robustness+to+Unforeseen+Adversarial+Attacks&hl=en&as_sdt=0,5", "gs_version_total": 3 }, { "id": "5VD7dS3cZX", "title": "Rethinking the Solution to Curse of Dimensionality on Randomized Smoothing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Randomized Smoothing (RS) is currently a scalable certified defense method providing robustness certification against adversarial examples. \nAlthough significant progress has been achieved in providing defenses against $\\ell_p$ adversaries,\nearly investigations found that RS suffers from the curse of dimensionality, indicating that the robustness guarantee offered by RS decays significantly with increasing input data dimension.\nDouble Sampling Randomized Smoothing (DSRS) is the state-of-the-art method that provides a theoretical solution to the curse of dimensionality under concentration assumptions on the base classifier.\nHowever, we speculate the solution to the curse of dimensionality can be deepened from the perspective of the smoothing distribution.\nIn this work, we further address the curse of dimensionality by theoretically showing that some Exponential General Gaussian (EGG) distributions with the exponent $\\eta$ can provide $\\Omega(\\sqrt{d})$ lower bounds for the $\\ell_2$ certified radius with tighter constant factors than DSRS.\nOur theoretical analysis shows that the lower bound improves with monotonically decreasing $\\eta \\in (0,2)$. Intriguingly, we observe a contrary phenomenon that EGG provides greater certified radii at larger $\\eta$, on real-world tasks. \nFurther investigations show these discoveries are not contradictory, which are in essence dependent on whether the assumption in DSRS absolutely holds. \nOur experiments on real-world datasets demonstrate that EGG distributions bring significant improvements for point-to-point certified accuracy, up to 4\\%-6\\% on ImageNet.\nFurthermore, we also report the performance of Exponential Standard Gaussian (ESG) distributions on DSRS.", "keywords": "Exponential Gaussian distribution;randomized smoothing;certified robustness;curse of dimensionality", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Youwei Shu;Xi Xiao;Derui Wang;Yuxin Cao;Siji Chen;Jason Xue;Linyi Li;Bo Li", "authorids": "~Youwei_Shu1;~Xi_Xiao1;~Derui_Wang1;~Yuxin_Cao1;~Siji_Chen2;~Jason_Xue1;~Linyi_Li1;~Bo_Li19", "gender": "M;M;;;M;;M;F", "homepage": "https://github.com/tdano1;https://www.sigs.tsinghua.edu.cn/xx_en/main.htm;;;https://github.com/LostDriver;;http://linyil.com;http://boli.cs.illinois.edu/", "dblp": ";;;151/7989;;;99/4340-1.html;50/3402-26", "google_scholar": ";;;https://scholar.google.com/citations?hl=en;;;-b0sk-YAAAAJ;K8vJkTcAAAAJ", "orcid": ";;;0009-0002-5766-0846;;;;", "linkedin": ";;;;;;;", "or_profile": "~Youwei_Shu1;~Xi_Xiao1;~Derui_Wang1;~Yuxin_Cao1;~Siji_Chen2;~Jason_Xue1;~Linyi_Li1;~Bo_Li19", "aff": "Tsinghua University;Shenzhen International Graduate School, Tsinghua University;;Tsinghua University;;;Simon Fraser University;University of Illinois, Urbana Champaign", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;;mails.tsinghua.edu.cn;;;sfu.ca;illinois.edu", "position": "MS student;Associate Professor;;MS student;;;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nshu2024rethinking,\ntitle={Rethinking the Solution to Curse of Dimensionality on Randomized Smoothing},\nauthor={Youwei Shu and Xi Xiao and Derui Wang and Yuxin Cao and Siji Chen and Jason Xue and Linyi Li and Bo Li},\nyear={2024},\nurl={https://openreview.net/forum?id=5VD7dS3cZX}\n}", "github": "", "project": "", "reviewers": "FGgM;Ba1f;Dgzf;2qpb", "site": "https://openreview.net/forum?id=5VD7dS3cZX", "pdf_size": 9707100, "rating": "1;5;5;8", "confidence": "4;4;4;3", "soundness": "1;2;3;3", "contribution": "1;2;3;3", "presentation": "1;2;2;3", "wc_summary": "80;33;49;113", "wc_strengths": "1;23;63;86", "wc_weaknesses": "561;139;391;23", "wc_questions": "88;74;6;59", "wc_review": "730;269;509;281", "wc_reply_reviewers": "2734;28;53;0", "wc_reply_authors": "2755;781;728;111", "reply_reviewers": "4;1;1;0", "reply_authors": "4;1;1;1", "rating_avg": [ 4.75, 2.48746859276655 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 68.75, 30.629846555280032 ], "wc_strengths_avg": [ 43.25, 33.214266513051285 ], "wc_weaknesses_avg": [ 278.5, 210.47743346971902 ], "wc_questions_avg": [ 56.75, 31.04331651096577 ], "wc_review_avg": [ 447.25, 189.19087583707625 ], "wc_reply_reviewers_avg": [ 703.75, 1172.3153106140003 ], "wc_reply_authors_avg": [ 1093.75, 994.6274113958452 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7543365091413573, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3EAuzBPf6zkJ:scholar.google.com/&scioq=Rethinking+the+Solution+to+Curse+of+Dimensionality+on+Randomized+Smoothing&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Tsinghua University;Simon Fraser University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.sfu.ca;https://illinois.edu", "aff_unique_abbr": "THU;SFU;UIUC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenzhen;Urbana-Champaign", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "China;Canada;United States" }, { "id": "5Vh0XqOTGi", "title": "GAN-based Vertical Federated Learning for Label Protection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Split learning (splitNN) has emerged as a popular strategy for addressing the high computational costs and low modeling efficiency in Vertical Federated Learning (VFL). However, despite its popularity, vanilla splitNN lacks encryption protection, leaving it vulnerable to privacy leakage issues, especially Label Leakage from Gradients (LLG). Motivated by the LLG issue resulting from the use of labels during training, we propose the Generative Adversarial Federated Model (GAFM), a novel method designed specifically to enhance label privacy protection by integrating splitNN with Generative Adversarial Networks (GANs). GAFM leverages GANs to indirectly utilize label information by learning the label distribution rather than relying on explicit labels, thereby mitigating LLG. GAFM also employs an additional cross-entropy loss based on the noisy labels to further improve the prediction accuracy. Our ablation experiment demonstrates that the combination of GAN and the cross-entropy loss component is necessary to enable GAFM to mitigate LLG without significantly compromising the model utility. Empirical results on various datasets show that GAFM achieves a better and more robust trade-off between model utility and privacy compared to all baselines. In addition, we provide experimental justification to substantiate GAFM's superiority over splitNN, demonstrating that it offers enhanced label protection through gradient perturbation relative to splitNN. Codes of GAFM are available at [https://anonymous.4open.science/r/Generative-Adversarial-Federated-Model-BFF7/](https://anonymous.4open.science/r/Generative-Adversarial-Federated-Model-BFF7/).", "keywords": "Federated learning;Split learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/33f7f3ca497afbaa270d8d741eb14aed957e6c1c.zip", "author": "Yujin Han;Leying Guan", "authorids": "~Yujin_Han1;~Leying_Guan1", "gender": "F;", "homepage": "https://yujinhanml.github.io/;https://campuspress.yale.edu/lguan/research/", "dblp": "317/6852;", "google_scholar": "https://scholar.google.co.kr/citations?user=SxpbS5YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Yujin_Han1;~Leying_Guan1", "aff": "the University of Hong Kong;Yale University", "aff_domain": "cs.hku.hk;yale.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nhan2024ganbased,\ntitle={{GAN}-based Vertical Federated Learning for Label Protection},\nauthor={Yujin Han and Leying Guan},\nyear={2024},\nurl={https://openreview.net/forum?id=5Vh0XqOTGi}\n}", "github": "", "project": "", "reviewers": "ph6x;KHth;wbsE;vM2y", "site": "https://openreview.net/forum?id=5Vh0XqOTGi", "pdf_size": 1816895, "rating": "3;5;6;6", "confidence": "5;4;2;4", "soundness": "2;3;3;2", "contribution": "2;2;2;2", "presentation": "2;3;3;2", "wc_summary": "141;110;66;67", "wc_strengths": "15;36;15;103", "wc_weaknesses": "775;152;109;349", "wc_questions": "77;54;34;6", "wc_review": "1008;352;224;525", "wc_reply_reviewers": "836;95;0;0", "wc_reply_authors": "1582;1078;674;958", "reply_reviewers": "2;1;0;0", "reply_authors": "5;3;2;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 96.0, 31.47220996371243 ], "wc_strengths_avg": [ 42.25, 36.10661296770994 ], "wc_weaknesses_avg": [ 346.25, 263.55964694922477 ], "wc_questions_avg": [ 42.75, 26.10914590713377 ], "wc_review_avg": [ 527.25, 297.40492178173514 ], "wc_reply_reviewers_avg": [ 232.75, 350.4392779070291 ], "wc_reply_authors_avg": [ 1073.0, 328.45547643478255 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7492686492653551, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dzv9pl0OZx0J:scholar.google.com/&scioq=GAN-based+Vertical+Federated+Learning+for+Label+Protection&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Hong Kong;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.yale.edu", "aff_unique_abbr": "HKU;Yale", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "5WDOxf519p", "title": "Bridging the Domain Gap by Clustering-based Image-Text Graph Matching", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Learning domain-invariant representations is important to train a model that can generalize well to unseen target task domains. Text descriptions inherently contain semantic structures of concepts, and such auxiliary semantic cues can be used as effective pivot embedding for domain generalization problems. Here, we want to use (image-text) multimodal graph representations to get domain-invariant pivot embeddings by considering the inherent semantic structure between local images and text descriptors. Specifically, we aim to learn domain invariant features by (i) representing the image and text descriptions with graphs, and by (ii) clustering and matching the graph-based image node features into textual graphs simultaneously. We experiment with large-scale public datasets, such as CUB-DG and DomainBed, and our model achieves matched or better state-of-the-art performance on these datasets. Our code will be publicly available upon publication.", "keywords": "Domain Generalization;Multimodal Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/29c65570338ee3c4b7ed7cd4c7713ac190dc28f4.zip", "author": "Nokyung Park;Daewon Chae;Jeongyong Shim;Sangpil Kim;Eun-Sol Kim;Jinkyu Kim", "authorids": "~Nokyung_Park1;~Daewon_Chae2;~Jeongyong_Shim2;~Sangpil_Kim4;~Eun-Sol_Kim1;~Jinkyu_Kim1", "gender": ";M;M;M;F;M", "homepage": "https://sites.google.com/view/nokyung-park;https://github.com/daewon88;https://hanyang-mllab.notion.site/Machine-Learning-Lab-1af963d60b934aeebc17abc6ca74bc0b;https://kuaicv.com/;;https://visionai.korea.ac.kr/", "dblp": "290/8702;354/4231;;182/2231;52/10086;", "google_scholar": "kvrypn8AAAAJ;OEa4E14AAAAJ;;mzH6yYgAAAAJ;JhZBnfYAAAAJ;", "orcid": ";;;0000-0002-7349-0018;;", "linkedin": "nokyungpark/;;jeongyong-shim-30b672215;spkim921;;", "or_profile": "~Nokyung_Park1;~Daewon_Chae2;~Jeongyong_Shim2;~Sangpil_Kim4;~Eun-Sol_Kim1;~Jinkyu_Kim1", "aff": "Korea University;Korea University;Hanyang University;Korea University;Hanyang University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;hanyang.ac.kr;korea.ac.kr;hanyang.ac.kr;korea.ac.kr", "position": "MS student;MS student;MS student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\npark2024bridging,\ntitle={Bridging the Domain Gap by Clustering-based Image-Text Graph Matching},\nauthor={Nokyung Park and Daewon Chae and Jeongyong Shim and Sangpil Kim and Eun-Sol Kim and Jinkyu Kim},\nyear={2024},\nurl={https://openreview.net/forum?id=5WDOxf519p}\n}", "github": "", "project": "", "reviewers": "kUjy;1niQ;LvEJ", "site": "https://openreview.net/forum?id=5WDOxf519p", "pdf_size": 4974450, "rating": "3;5;5", "confidence": "5;5;4", "soundness": "2;3;3", "contribution": "2;3;2", "presentation": "2;3;3", "wc_summary": "55;51;42", "wc_strengths": "33;49;7", "wc_weaknesses": "217;131;173", "wc_questions": "4;125;3", "wc_review": "309;356;225", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1549;796;1053", "reply_reviewers": "0;0;0", "reply_authors": "3;2;2", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 49.333333333333336, 5.436502143433364 ], "wc_strengths_avg": [ 29.666666666666668, 17.30767331432956 ], "wc_weaknesses_avg": [ 173.66666666666666, 35.11251755270318 ], "wc_questions_avg": [ 44.0, 57.27710420985567 ], "wc_review_avg": [ 296.6666666666667, 54.18691929074971 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1132.6666666666667, 312.5298207993741 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4104583030208793062&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;0;1;0", "aff_unique_norm": "Korea University;Hanyang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.korea.ac.kr;https://www.hanyang.ac.kr", "aff_unique_abbr": "KU;HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "5XUlfPcQnG", "title": "A Calibrated Simulation for Offline Training of Reinforcement Learning Agents to Optimize Energy and Emission in Office Buildings", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modern commercial Heating, Ventilation, and Air Conditioning (HVAC) systems form a complex and interconnected thermodynamic system with the building and outside weather conditions, and current setpoint control policies are not fully optimized for minimizing energy use and carbon emission. Given a suitable training environment, a Reinforcement Learning (RL) model is able to improve upon these policies, but training such a model, especially in a way that scales to thousands of buildings, presents many practical challenges. To address these challenges, we propose a novel simulation based approach, where a customized simulator is used to train the agent for each building. Our simulator is lightweight and calibrated with recorded data from the building to achieve sufficient fidelity. On a two-story, 68,000 square foot building, with 127 devices, we were able to calibrate our simulator to have just over half a degree of drift from the real world over a 6 hour period. We train an RL agent on this simulator and demonstrate that our agent is able to learn an improved policy. This approach is an important step toward having a real-world Reinforcement Learning control system that can be scaled to many buildings, allowing for greater efficiency and resulting in reduced energy consumption and carbon emissions.", "keywords": "HVAC;Reinforcement Learning;Simulation", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Judah Goldfeder;John Sipple", "authorids": "~Judah_Goldfeder1;sipple@google.com", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Judah_Goldfeder1;sipple@google.com", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\ngoldfeder2024a,\ntitle={A Calibrated Simulation for Offline Training of Reinforcement Learning Agents to Optimize Energy and Emission in Office Buildings},\nauthor={Judah Goldfeder and John Sipple},\nyear={2024},\nurl={https://openreview.net/forum?id=5XUlfPcQnG}\n}", "github": "", "project": "", "reviewers": "doaN;P7Wf;BrjB;ffyP", "site": "https://openreview.net/forum?id=5XUlfPcQnG", "pdf_size": 2438504, "rating": "1;1;3;5", "confidence": "5;5;4;4", "soundness": "2;2;3;3", "contribution": "1;1;2;2", "presentation": "1;1;1;2", "wc_summary": "80;83;36;71", "wc_strengths": "39;50;40;57", "wc_weaknesses": "265;240;228;191", "wc_questions": "9;103;262;55", "wc_review": "393;476;566;374", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 1.6583123951777 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.5, 18.714967272212903 ], "wc_strengths_avg": [ 46.5, 7.433034373659253 ], "wc_weaknesses_avg": [ 231.0, 26.67395733669828 ], "wc_questions_avg": [ 107.25, 95.32674073941688 ], "wc_review_avg": [ 452.25, 76.05384605659336 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7083121433027660792&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0 }, { "id": "5ZWxBU9sYG", "title": "How to Craft Backdoors with Unlabeled Data Alone?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Relying only on unlabeled data, Self-supervised learning (SSL) can learn rich features in an economical and scalable way. As the drive-horse for building foundation models, SSL has received a lot of attention recently with wide applications, which also raises security concerns where backdoor attack is a major type of threat: if the released dataset is maliciously poisoned, backdoored SSL models can behave badly when triggers are injected to test samples. The goal of this work is to investigate this potential risk. We notice that existing backdoors all require a considerable amount of *labeled* data that may not be available for SSL. To circumvent this limitation, we explore a more restrictive setting called no-label backdoors, where we only have access to the unlabeled data alone, where the key challenge is how to select the proper poison set without using label information. We propose two strategies for poison selection: clustering-based selection using pseudolabels, and contrastive selection derived from the mutual information principle. Experiments on CIFAR-10 and ImageNet-100 show that both no-label backdoors are effective on many SSL methods and outperform random poisoning by a large margin.", "keywords": "Backdoor Attack;Self-Supervised Learning;Deep Learning;Trustworthy Machine Learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Yifei Wang;Wenhan Ma;Yisen Wang", "authorids": "~Yifei_Wang1;~Wenhan_Ma1;~Yisen_Wang1", "gender": "M;M;M", "homepage": "https://yifeiwang77.com;https://github.com/CuteNPC;https://yisenwang.github.io/", "dblp": "00/555-1;375/2887.html;172/1346-1", "google_scholar": "-CLy6YsAAAAJ;;uMWPDboAAAAJ", "orcid": ";0009-0003-3829-4008;", "linkedin": ";;", "or_profile": "~Yifei_Wang1;~Wenhan_Ma1;~Yisen_Wang1", "aff": "Massachusetts Institute of Technology;Peking University;Peking University", "aff_domain": "mit.edu;pku.edu.cn;pku.edu.cn", "position": "Postdoc;Undergrad student;Assistant Professor", "bibtex": "@misc{\nwang2024how,\ntitle={How to Craft Backdoors with Unlabeled Data Alone?},\nauthor={Yifei Wang and Wenhan Ma and Yisen Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=5ZWxBU9sYG}\n}", "github": "", "project": "", "reviewers": "Dh81;gGhQ;yPHh;w1uC", "site": "https://openreview.net/forum?id=5ZWxBU9sYG", "pdf_size": 10906885, "rating": "3;3;5;6", "confidence": "4;4;4;4", "soundness": "2;2;2;3", "contribution": "2;2;2;2", "presentation": "3;2;3;2", "wc_summary": "112;82;104;184", "wc_strengths": "34;64;32;117", "wc_weaknesses": "104;299;308;256", "wc_questions": "38;352;4;291", "wc_review": "288;797;448;848", "wc_reply_reviewers": "93;0;0;39", "wc_reply_authors": "417;2552;1098;1798", "reply_reviewers": "1;0;0;1", "reply_authors": "1;4;2;3", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 120.5, 38.27205246651922 ], "wc_strengths_avg": [ 61.75, 34.3247359785913 ], "wc_weaknesses_avg": [ 241.75, 81.92183774794118 ], "wc_questions_avg": [ 171.25, 152.2651880765922 ], "wc_review_avg": [ 595.25, 234.8780268564942 ], "wc_reply_reviewers_avg": [ 33.0, 38.1247950814165 ], "wc_reply_authors_avg": [ 1466.25, 794.5823981815857 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17304679836458820384&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;http://www.pku.edu.cn", "aff_unique_abbr": "MIT;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "title": "ControlVideo: Training-free Controllable Text-to-video Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19424", "id": "5a79AqFr0c", "author_site": "Yabo Zhang, Yuxiang Wei, Dongsheng jiang, XIAOPENG ZHANG, Wangmeng Zuo, Qi Tian", "tldr": "", "abstract": "Text-driven diffusion models have unlocked unprecedented abilities in image generation, whereas their video counterpart lags behind due to the excessive training cost.\nTo avert the training burden, we propose a training-free ControlVideo to produce high-quality videos based on the provided text prompts and motion sequences.\nSpecifically, ControlVideo adapts a pre-trained text-to-image model (i.e., ControlNet) for controllable text-to-video generation.\nTo generate continuous videos without flicker effect, we propose an interleaved-frame smoother to smooth the intermediate frames.\nIn particular, interleaved-frame smoother splits the whole videos with successive three-frame clips, and stabilizes each clip by updating the middle frame with the interpolation among other two frames in latent space.\nFurthermore, a fully cross-frame interaction mechanism have been exploited to further enhance the frame consistency, while a hierarchical sampler is employed to produce long videos efficiently.\nExtensive experiments demonstrate that our ControlVideo outperforms the state-of-the-arts both quantitatively and qualitatively. \nIt is worthy noting that, thanks to the efficient designs, ControlVideo could generate both short and long videos within several minutes using one NVIDIA 2080Ti. \nCode and videos are available at [this link](https://github.com/YBYBZhang/ControlVideo).", "keywords": "Diffusion models;video generation", "primary_area": "generative models", "supplementary_material": "/attachment/b49b1e56aa6c7862445818bae866c218d23b9566.zip", "author": "Yabo Zhang;Yuxiang Wei;Dongsheng Jiang;XIAOPENG ZHANG;Wangmeng Zuo;Qi Tian", "authorids": "~Yabo_Zhang1;~Yuxiang_Wei1;~Dongsheng_Jiang2;~XIAOPENG_ZHANG7;~Wangmeng_Zuo3;~Qi_Tian3", "gender": "M;M;M;M;M;M", "homepage": "https://ybybzhang.github.io/;;https://sites.google.com/site/zxphistory/;;https://www.qitian1987.com/index.html;https://sites.google.com/site/dongshengjiangbme/", "dblp": "231/0624;47/8871-1;;93/2671;78/1467-1.html;85/8729", "google_scholar": "LnYDPdAAAAAJ;hORhL7YAAAAJ;Ud6aBAcAAAAJ;rUOpCEYAAAAJ;https://scholar.google.com/citations?hl=en;-eGIgsoAAAAJ", "orcid": ";0000-0002-8993-7195;;0000-0002-3330-783X;0000-0002-7252-5047;", "linkedin": ";;;;;", "or_profile": "~Yabo_Zhang1;~Yuxiang_Wei1;~XIAOPENG_ZHANG7;~Wangmeng_Zuo3;~Qi_Tian3;~Dongsheng_Jiang1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Huawei Technologies Ltd.;Harbin Institute of Technology;Huawei Technologies Ltd.;Huawei Technologies Ltd.", "aff_domain": "hit.edu.cn;hit.edu.cn;huawei.com;hit.edu.cn;huawei.com;huawei.com", "position": "PhD student;PhD student;Principal Researcher;Full Professor;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhang2024controlvideo,\ntitle={ControlVideo: Training-free Controllable Text-to-video Generation},\nauthor={Yabo Zhang and Yuxiang Wei and Dongsheng Jiang and XIAOPENG ZHANG and Wangmeng Zuo and Qi Tian},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5a79AqFr0c}\n}", "github": "", "project": "", "reviewers": "aLwG;XMqd;EvXG;HJ3a", "pdf_size": 27009780, "rating": "5;5;6;6", "confidence": "5;5;4;4", "soundness": "3;2;3;3", "contribution": "2;2;3;2", "presentation": "2;2;3;3", "wc_summary": "32;42;50;120", "wc_strengths": "36;48;30;66", "wc_weaknesses": "74;83;140;174", "wc_questions": "71;48;35;91", "wc_review": "213;221;255;451", "wc_reply_reviewers": "0;0;23;0", "wc_reply_authors": "431;647;552;928", "reply_reviewers": "0;0;1;0", "reply_authors": "3;3;3;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 61.0, 34.655446902326915 ], "wc_strengths_avg": [ 45.0, 13.74772708486752 ], "wc_weaknesses_avg": [ 117.75, 41.172654760168186 ], "wc_questions_avg": [ 61.25, 21.47527648250425 ], "wc_review_avg": [ 285.0, 97.12878049270464 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 639.5, 183.31462025708697 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 271, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2569088021339755098&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=5a79AqFr0c", "pdf": "https://openreview.net/pdf?id=5a79AqFr0c", "email": "hit.edu.cn;hit.edu.cn;huawei.com;hit.edu.cn;huawei.com;huawei.com", "author_num": 6, "aff_unique_index": "0;0;1;0;1;1", "aff_unique_norm": "Harbin Institute of Technology;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.hit.edu.cn/;https://www.huawei.com", "aff_unique_abbr": "HIT;Huawei", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "5aHmaMFJns", "title": "Reason for Future, Act for Now: A Principled Architecture for Autonomous LLM Agents", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) demonstrate impressive reasoning abilities, but translating reasoning into actions in the real world remains challenging. In particular, it remains unclear how to complete a given task provably within a minimum number of interactions with the external environment, e.g., through an internal mechanism of reasoning. To this end, we propose a principled framework with provable regret guarantees to orchestrate reasoning and acting, which we call \"reason for future, act for now\" ($\\texttt{RAFA}$). Specifically, we design a prompt template for reasoning that learns from the memory buffer and plans a future trajectory over a long horizon (\"reason for future\"). At each step, the LLM agent takes the initial action of the planned trajectory (\"act for now\"), stores the collected feedback in the memory buffer, and reinvokes the reasoning routine to replan the future trajectory from the new state. \n\nThe key idea is to cast reasoning in LLMs as learning and planning in Bayesian adaptive Markov decision processes (MDPs). Correspondingly, we prompt LLMs to form an updated posterior of the unknown environment from the memory buffer (learning) and generate an optimal trajectory for multiple future steps that maximizes a value function (planning). The learning and planning subroutines are performed in an \"in-context\" manner to emulate the actor-critic update for MDPs. Our theoretical analysis proves that the novel combination of long-term reasoning and short-term acting achieves a $\\sqrt{T}$ regret. In particular, the regret bound highlights an intriguing interplay between the prior knowledge obtained through pretraining and the uncertainty reduction achieved by reasoning and acting. Our empirical validation shows that it outperforms various existing frameworks and achieves nearly perfect scores on a few benchmarks. By incorporating \"classical\" MDP techniques, $\\texttt{RAFA}$ introduces the first autonomous LLM agent with provable regret guarantees.", "keywords": "Large language model", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/47b3fac881bf8fa45cb49687db5547bac199cd58.zip", "author": "Zhihan Liu;Hao Hu;Shenao Zhang;Hongyi Guo;Shuqi Ke;Boyi Liu;Zhaoran Wang", "authorids": "~Zhihan_Liu1;~Hao_Hu3;~Shenao_Zhang1;~Hongyi_Guo1;~Shuqi_Ke1;~Boyi_Liu1;~Zhaoran_Wang1", "gender": "M;M;M;M;;M;Not Specified", "homepage": ";https://mousehu.github.io;https://shenao-zhang.github.io/;https://gohsyi.github.io/;;;https://zhaoranwang.github.io/", "dblp": ";67/6924-6;253/4543.html;;;;117/2756", "google_scholar": "0VVg_R4AAAAJ;https://scholar.google.com/citations?hl=en;8NamuusAAAAJ;https://scholar.google.com/citations?hl=en;;1G8RH_YAAAAJ;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ", "orcid": ";;;;;;", "linkedin": ";hao-hu-tsinghua;shenao-zhang-055a53178/;;;;", "or_profile": "~Zhihan_Liu1;~Hao_Hu3;~Shenao_Zhang1;~Hongyi_Guo1;~Shuqi_Ke1;~Boyi_Liu1;~Zhaoran_Wang1", "aff": "Northwestern University;Tsinghua University;Georgia Institute of Technology;Northwestern University, Northwestern University;;ByteDance Inc.;Northwestern University", "aff_domain": "northwestern.edu;tsinghua.edu.cn;gatech.edu;u.northwestern.edu;;bytedance.com;northwestern.edu", "position": "PhD student;PhD student;MS student;PhD student;;Researcher;Associate Professor", "bibtex": "@misc{\nliu2024reason,\ntitle={Reason for Future, Act for Now: A Principled Architecture for Autonomous {LLM} Agents},\nauthor={Zhihan Liu and Hao Hu and Shenao Zhang and Hongyi Guo and Shuqi Ke and Boyi Liu and Zhaoran Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=5aHmaMFJns}\n}", "github": "", "project": "", "reviewers": "Aro2;3FoC;P2KV;n4NZ", "site": "https://openreview.net/forum?id=5aHmaMFJns", "pdf_size": 2856024, "rating": "3;5;5;6", "confidence": "4;4;3;3", "soundness": "2;3;2;3", "contribution": "1;2;2;2", "presentation": "3;4;3;1", "wc_summary": "66;76;58;289", "wc_strengths": "33;57;49;68", "wc_weaknesses": "156;444;251;657", "wc_questions": "22;119;66;177", "wc_review": "277;696;424;1191", "wc_reply_reviewers": "203;0;0;438", "wc_reply_authors": "1000;1354;1557;2028", "reply_reviewers": "1;0;0;2", "reply_authors": "2;4;3;6", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 122.25, 96.48413081952907 ], "wc_strengths_avg": [ 51.75, 12.754901018824098 ], "wc_weaknesses_avg": [ 377.0, 192.0976314273552 ], "wc_questions_avg": [ 96.0, 58.02154772151464 ], "wc_review_avg": [ 647.0, 348.19750142699183 ], "wc_reply_reviewers_avg": [ 160.25, 180.50813693570714 ], "wc_reply_authors_avg": [ 1484.75, 371.6243903459513 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4081571305387143859&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Northwestern University;Tsinghua University;Georgia Institute of Technology;ByteDance", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.northwestern.edu;https://www.tsinghua.edu.cn;https://www.gatech.edu;https://www.bytedance.com", "aff_unique_abbr": "NU;THU;Georgia Tech;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "5aayQBRGM1", "title": "Unsupervised Representation Learning to Aid Semi-Supervised Meta Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Few-shot learning or meta-learning leverages the data scarcity problem in machine learning. Traditionally, training data requires a multitude of samples and labeling for supervised learning. To address this issue, we propose a one-shot unsupervised meta-learning to learn the latent representation of the training samples. We use augmented samples as the query set during the training phase of the unsupervised meta-learning. A temperature-scaled cross-entropy loss is used in the inner loop of meta-learning to prevent overfitting during unsupervised learning. The learned parameters from this step are applied to the targeted supervised meta-learning in a transfer-learning fashion for initialization and fast adaptation with improved accuracy. The proposed method is model agnostic and can aid any meta-learning model to improve accuracy. We use model agnostic meta-learning (MAML) and relation network (RN) on Omniglot and mini-Imagenet datasets to demonstrate the performance of the proposed method. Furthermore, a meta-learning model with the proposed initialization can achieve satisfactory accuracy with significantly fewer training samples.", "keywords": "few-shot classification;meta-learning;machine learning;semi-supervised learning;unsupervised learning.", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Atik Faysal;Mohammad Rostami;huaxia wang;Avimanyu Sahoo;Ryan Antle", "authorids": "~Atik_Faysal1;~Mohammad_Rostami2;~huaxia_wang1;~Avimanyu_Sahoo1;~Ryan_Antle1", "gender": "M;M;M;M;", "homepage": "https://github.com/atik666;https://sites.google.com/view/woreom;https://sites.google.com/view/huaxia-wang;https://sites.google.com/uah.edu/accl;https://www.bakerhughes.com", "dblp": "299/1517;;40/10246;150/4130;", "google_scholar": "tHtILuYAAAAJ;Md81vrUAAAAJ;fna2KGYAAAAJ;0h-0qjIAAAAJ;", "orcid": "0000-0001-6569-4371;;;0000-0001-6113-3278;", "linkedin": "atik-faysal-64b809132/;woreom/;;avimanyu-sahoo-26a55731/;", "or_profile": "~Atik_Faysal1;~Mohammad_Rostami2;~huaxia_wang1;~Avimanyu_Sahoo1;~Ryan_Antle1", "aff": "Rowan University;Rowan College;Rowan University;University of Alabama at Huntsville;", "aff_domain": "rowan.edu;rowan.edu;rowan.edu;uah.edu;", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;", "bibtex": "@misc{\nfaysal2024unsupervised,\ntitle={Unsupervised Representation Learning to Aid Semi-Supervised Meta Learning},\nauthor={Atik Faysal and Mohammad Rostami and huaxia wang and Avimanyu Sahoo and Ryan Antle},\nyear={2024},\nurl={https://openreview.net/forum?id=5aayQBRGM1}\n}", "github": "", "project": "", "reviewers": "8qJe;vJfx;SMhQ;fvcV", "site": "https://openreview.net/forum?id=5aayQBRGM1", "pdf_size": 1180038, "rating": "1;3;3;3", "confidence": "4;4;3;4", "soundness": "1;1;1;1", "contribution": "1;1;1;1", "presentation": "1;1;1;2", "wc_summary": "44;48;42;85", "wc_strengths": "9;6;6;16", "wc_weaknesses": "181;101;47;117", "wc_questions": "26;447;2;24", "wc_review": "260;602;97;242", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.0, 0.0 ], "contribution_avg": [ 1.0, 0.0 ], "presentation_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_avg": [ 54.75, 17.597940220378067 ], "wc_strengths_avg": [ 9.25, 4.085033659592048 ], "wc_weaknesses_avg": [ 111.5, 47.778133073614335 ], "wc_questions_avg": [ 124.75, 186.28925760762482 ], "wc_review_avg": [ 300.25, 185.32184841512887 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_Fyw0kU6WYQJ:scholar.google.com/&scioq=Unsupervised+Representation+Learning+to+Aid+Semi-Supervised+Meta+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Rowan University;Rowan College;University of Alabama in Huntsville", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rowan.edu;https://www.rowan.edu;https://www.uah.edu", "aff_unique_abbr": "Rowan;;UAH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Huntsville", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "5abK7RDbuW", "title": "Text to Image for Multi-Label Image Recognition with Joint Prompt-Adapter Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Benefited from image-text contrastive learning, pre-trained vision-language models, e.g., CLIP, allow to directly leverage texts as images (TaI) for parameter-efficient fine-tuning (PEFT). While CLIP is capable of making image feature to be similar with the corresponding text features, modality gap remains a nontrivial issue and limits the MLR performance of TaI. Using multi-label image recognition (MLR) as an example, we present a novel method, called T2I-PAL to tackle the modality gap issue when using only text captions for PEFT. The core design of T2I-PAL is to leverage pretrained text-to-image generation models to generate photo-realistic and diverse images from text captions, thereby being beneficial for reducing modality gap. For better PEFT, we further combine both prompt tuning and adapter learning for enhancing classification performance. Extensive experiments on multiple benchmarks, including MS-COCO, VOC2007, and NUS- WIDE, show that our T2I-PAL can boost recognition performance by 3.47% in average above the top-ranked state-of-the-art methods. Our code and models will be made publicly available.", "keywords": "Multi-Label Image Recognition;Text to Image;Parameter-Efficient Fine-Tuning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/d34dc0106659b2614543b24d35056e07a3899838.pdf", "author": "Chun-Mei Feng;Kai Yu;Xinxing Xu;Yong Liu;Salman Khan;Wangmeng Zuo;Rick Siow Mong Goh", "authorids": "~Chun-Mei_Feng1;~Kai_Yu7;~Xinxing_Xu1;~Yong_Liu10;~Salman_Khan4;~Wangmeng_Zuo3;~Rick_Siow_Mong_Goh1", "gender": "F;M;M;M;M;M;", "homepage": "https://scholar.google.com.hk/citations?user=g2nqHBcAAAAJ&hl=zh-CN;;https://sites.google.com/site/xinxingxu666/;;https://salman-h-khan.github.io/;;https://sites.google.com/view/rickgoh/home", "dblp": "182/8416-1;197/1322-9;15/10654;29/4867-26;32/11535-1;93/2671;https://dblp.uni-trier.de/pers/g/Goh:Rick_Siow_Mong", "google_scholar": "https://scholar.google.com.hk/citations?user=g2nqHBcAAAAJ;https://scholar.google.com.hk/citations?user=gVyOSpEAAAAJ;https://scholar.google.com.sg/citations?user=neFbpuEAAAAJ;QujHYk0AAAAJ;https://scholar.google.es/citations?user=M59O9lkAAAAJ;rUOpCEYAAAAJ;https://scholar.google.com.sg/citations?user=fBsBJjoAAAAJ", "orcid": "0000-0002-3044-9779;;0000-0003-1449-3072;;0000-0002-9502-1749;0000-0002-3330-783X;0000-0001-9116-1595", "linkedin": "https://linkedin.com/in/chunmei-feng-719a72229;;;liuyongsg;;;rickgoh/", "or_profile": "~Chun-Mei_Feng1;~Kai_Yu7;~Xinxing_Xu1;~Yong_Liu10;~Salman_Khan4;~Wangmeng_Zuo3;~Rick_Siow_Mong_Goh1", "aff": "IHPC;;Institute of High Performance Computing;Institute of High Performance Computing, Singapore, A*STAR;Australian National University;Harbin Institute of Technology;Institute of High Performance Computing, Singapore, A*STAR", "aff_domain": "astar.edu.sg;;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;anu.edu.au;hit.edu.cn;ihpc.a-star.edu.sg", "position": "Researcher;;Scientist;Senior Scientist, Adjunct Assistant Professor;Lecturer;Full Professor;Director", "bibtex": "@misc{\nfeng2024text,\ntitle={Text to Image for Multi-Label Image Recognition with Joint Prompt-Adapter Learning},\nauthor={Chun-Mei Feng and Kai Yu and Xinxing Xu and Yong Liu and Salman Khan and Wangmeng Zuo and Rick Siow Mong Goh},\nyear={2024},\nurl={https://openreview.net/forum?id=5abK7RDbuW}\n}", "github": "", "project": "", "reviewers": "GRDf;oaxK;M37Z;U89w", "site": "https://openreview.net/forum?id=5abK7RDbuW", "pdf_size": 1818597, "rating": "3;5;6;6", "confidence": "4;3;2;4", "soundness": "2;3;3;3", "contribution": "2;2;3;2", "presentation": "2;1;3;3", "wc_summary": "53;119;53;67", "wc_strengths": "9;95;74;43", "wc_weaknesses": "55;539;54;39", "wc_questions": "15;65;27;67", "wc_review": "132;818;208;216", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 73.0, 27.16615541441225 ], "wc_strengths_avg": [ 55.25, 32.48364973336586 ], "wc_weaknesses_avg": [ 171.75, 212.12658367116555 ], "wc_questions_avg": [ 43.5, 22.907422377910613 ], "wc_review_avg": [ 343.5, 275.90714017582076 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4923659639173309, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15791932118392428322&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Institute of High Performance Computing;Australian National University;Harbin Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ihpc.a-star.edu.sg;https://www.anu.edu.au;http://www.hit.edu.cn/", "aff_unique_abbr": "IHPC;ANU;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;1;2;0", "aff_country_unique": "Singapore;Australia;China" }, { "title": "Certified Adversarial Robustness for Rate Encoded Spiking Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19423", "id": "5bNYf0CqxY", "author_site": "Bhaskar Mukhoty, Hilal AlQuabeh, Giulia De Masi, Huan Xiong, Bin Gu", "tldr": "", "abstract": "The spiking neural networks are inspired by the biological neurons that employ binary spikes to propagate information in the neural network. It has garnered considerable attention as the next-generation neural network, as the spiking activity simplifies the computation burden of the network to a large extent and is known for its low energy deployment enabled by specialized neuromorphic hardware. One popular technique to feed a static image to such a network is rate encoding, where each pixel is encoded into random binary spikes, following a Bernoulli distribution that uses the pixel intensity as bias. By establishing a novel connection between rate-encoding and randomized smoothing, we give the first provable robustness guarantee for spiking neural networks against adversarial perturbation of inputs bounded under $l_1$-norm. We introduce novel adversarial training algorithms for rate-encoded models that significantly improve the state-of-the-art empirical robust accuracy result. Experimental validation of the method is performed across various static image datasets, including CIFAR-10, CIFAR-100 and ImageNet-100. The code is available at \\url{https://github.com/BhaskarMukhoty/CertifiedSNN}.", "keywords": "Spiking Neural Networks;Randomized Smoothing;Adversarial Learning;Certified Robustness", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Bhaskar Mukhoty;Hilal AlQuabeh;Giulia De Masi;Huan Xiong;Bin Gu", "authorids": "~Bhaskar_Mukhoty1;~Hilal_AlQuabeh1;~Giulia_De_Masi1;~Huan_Xiong1;~Bin_Gu1", "gender": "M;M;;M;M", "homepage": ";;;https://scholar.google.com/citations?user=l4hm14MAAAAJ&hl=en;https://mbzuai.ac.ae/study/faculty/bin-gu/", "dblp": "166/1438;299/8316;147/8719;;29/1758-1", "google_scholar": "https://scholar.google.co.in/citations?user=lJglnOQAAAAJ;_vbkrqMAAAAJ;G1K5hX0AAAAJ;l4hm14MAAAAJ;Vo8OgCgAAAAJ", "orcid": "0000-0002-8594-980X;;0000-0003-3284-880X;;0000-0001-6049-1815", "linkedin": ";hilal-alquabeh-388a8a134/;;;", "or_profile": "~Bhaskar_Mukhoty1;~Hilal_AlQuabeh1;~Giulia_De_Masi1;~Huan_Xiong1;~Bin_Gu1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Technology Innovation Institute;Harbin Institute of Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;tii.ae;hit.edu.cn;mbzuai.ac.ae", "position": "Postdoc;PhD student;Principal Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmukhoty2024certified,\ntitle={Certified Adversarial Robustness for Rate Encoded Spiking Neural Networks},\nauthor={Bhaskar Mukhoty and Hilal AlQuabeh and Giulia De Masi and Huan Xiong and Bin Gu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5bNYf0CqxY}\n}", "github": "", "project": "", "reviewers": "NxbL;wpSa;FaRA;77u8", "pdf_size": 437892, "rating": "5;6;8;8", "confidence": "4;3;3;3", "soundness": "3;2;2;3", "contribution": "2;2;2;3", "presentation": "3;2;2;2", "wc_summary": "41;30;25;84", "wc_strengths": "42;11;25;128", "wc_weaknesses": "150;97;149;484", "wc_questions": "118;4;3;166", "wc_review": "351;142;202;862", "wc_reply_reviewers": "0;44;27;144", "wc_reply_authors": "783;467;572;901", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 45.0, 23.24865587512534 ], "wc_strengths_avg": [ 51.5, 45.510987684294435 ], "wc_weaknesses_avg": [ 220.0, 153.92043399107214 ], "wc_questions_avg": [ 72.75, 71.29998246844104 ], "wc_review_avg": [ 389.25, 283.35082053878017 ], "wc_reply_reviewers_avg": [ 53.75, 54.41679428264771 ], "wc_reply_authors_avg": [ 680.75, 170.64638144420175 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9706469649419747137&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=5bNYf0CqxY", "pdf": "https://openreview.net/pdf?id=5bNYf0CqxY", "email": "mbzuai.ac.ae;mbzuai.ac.ae;tii.ae;hit.edu.cn;mbzuai.ac.ae", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Technology Innovation Institute;Harbin Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://mbzuai.ac.ae;;http://www.hit.edu.cn/", "aff_unique_abbr": "MBZUAI;;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;2;0", "aff_country_unique": "United Arab Emirates;;China" }, { "title": "Structured Video-Language Modeling with Temporal Grouping and Spatial Grounding", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19422", "id": "5dlfiJIXoh", "author_site": "Yuanhao Xiong, Long Zhao, Boqing Gong, Ming-Hsuan Yang, Florian Schroff, Ting Liu, Cho-Jui Hsieh, Liangzhe Yuan", "tldr": "", "abstract": "Existing video-language pre-training methods primarily focus on instance-level alignment between video clips and captions via global contrastive learning but neglect rich fine-grained local information in both videos and text, which is of importance to downstream tasks requiring temporal localization and semantic reasoning. A powerful model is expected to be capable of capturing region-object correspondences and recognizing scene changes in a video clip, reflecting spatial and temporal granularity, respectively. To strengthen model's understanding into such fine-grained details, we propose a simple yet effective video-language modeling framework, S-ViLM, by exploiting the intrinsic structures of these two modalities. It includes two novel designs, inter-clip spatial grounding and intra-clip temporal grouping, to promote learning region-object alignment and temporal-aware features, simultaneously. Comprehensive evaluations demonstrate that S-ViLM performs favorably against existing approaches in learning more expressive representations. Specifically, S-ViLM surpasses the state-of-the-art methods substantially on four representative downstream tasks, covering text-video retrieval, video question answering, video action recognition, and temporal action localization.", "keywords": "multi-modal learning;video and language", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yuanhao Xiong;Long Zhao;Boqing Gong;Ming-Hsuan Yang;Florian Schroff;Ting Liu;Cho-Jui Hsieh;Liangzhe Yuan", "authorids": "~Yuanhao_Xiong1;~Long_Zhao2;~Boqing_Gong1;~Ming-Hsuan_Yang1;~Florian_Schroff1;~Ting_Liu4;~Cho-Jui_Hsieh1;~Liangzhe_Yuan2", "gender": "M;M;M;M;M;;M;M", "homepage": "https://xyh97.github.io/;http://garyzhao.github.io/;http://boqinggong.info;https://faculty.ucmerced.edu/mhyang/;;http://tliu.org;http://web.cs.ucla.edu/~chohsieh/index.html;https://yuanliangzhe.github.io", "dblp": "232/1248;31/5383-3;29/7457;79/3711.html;52/5594;52/5150-5;14/2770;215/4356", "google_scholar": "DVKxiMkAAAAJ;YTyBTmgAAAAJ;lv9ZeVUAAAAJ;p9-ohHsAAAAJ;eWbZJlMAAAAJ;4wSfAIQAAAAJ;Wy89g4IAAAAJ;1H9CkZgAAAAJ", "orcid": ";0000-0001-8921-8564;;0000-0003-4848-2304;;;;", "linkedin": ";garyzhao9012/;boqing-gong-46aa5821/;minghsuanyang/;florianschroff;;;", "or_profile": "~Yuanhao_Xiong1;~Long_Zhao2;~Boqing_Gong1;~Ming-Hsuan_Yang1;~Florian_Schroff1;~Ting_Liu4;~Cho-Jui_Hsieh1;~Liangzhe_Yuan2", "aff": "University of California, Los Angeles;Google DeepMind;Google;University of California at Merced;Google;Google DeepMind;University of California, Los Angeles;Google DeepMind", "aff_domain": "cs.ucla.edu;google.com;google.com;umcerced.edu;google.com;google.com;ucla.edu;google.com", "position": "PhD student;Research scientist;Research Scientist;Professor;SWE;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nxiong2024structured,\ntitle={Structured Video-Language Modeling with Temporal Grouping and Spatial Grounding},\nauthor={Yuanhao Xiong and Long Zhao and Boqing Gong and Ming-Hsuan Yang and Florian Schroff and Ting Liu and Cho-Jui Hsieh and Liangzhe Yuan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5dlfiJIXoh}\n}", "github": "", "project": "", "reviewers": "7b3w;Dxbj;tkWv;adH9", "pdf_size": 7466392, "rating": "6;6;6;8", "confidence": "5;4;5;4", "soundness": "3;3;2;3", "contribution": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "130;56;136;158", "wc_strengths": "50;48;119;145", "wc_weaknesses": "54;7;165;217", "wc_questions": "106;5;86;12", "wc_review": "340;116;506;532", "wc_reply_reviewers": "275;0;57;25", "wc_reply_authors": "1921;320;931;646", "reply_reviewers": "2;0;1;1", "reply_authors": "5;2;3;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 120.0, 38.39270764090493 ], "wc_strengths_avg": [ 90.5, 42.51176307799995 ], "wc_weaknesses_avg": [ 110.75, 83.98921061660242 ], "wc_questions_avg": [ 52.25, 44.38679420728647 ], "wc_review_avg": [ 373.5, 165.91187419832252 ], "wc_reply_reviewers_avg": [ 89.25, 109.12922385868966 ], "wc_reply_authors_avg": [ 954.5, 598.4223007208204 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t6X5vmToUokJ:scholar.google.com/&scioq=Structured+Video-Language+Modeling+with+Temporal+Grouping+and+Spatial+Grounding&hl=en&as_sdt=0,5", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=5dlfiJIXoh", "pdf": "https://openreview.net/pdf?id=5dlfiJIXoh", "email": "cs.ucla.edu;google.com;google.com;umcerced.edu;google.com;google.com;ucla.edu;google.com", "author_num": 8, "aff_unique_index": "0;1;1;2;1;1;0;1", "aff_unique_norm": "University of California, Los Angeles;Google;University of California, Merced", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.ucla.edu;https://deepmind.com;https://www.ucmerced.edu", "aff_unique_abbr": "UCLA;DeepMind;UC Merced", "aff_campus_unique_index": "0;2;3;2;0", "aff_campus_unique": "Los Angeles;;Mountain View;Merced", "aff_country_unique_index": "0;1;0;0;0;1;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "5e0yWSNGIc", "title": "Exposing the Silent Hidden Impact of Certified Training in Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep reinforcement learning research has enabled reaching significant performance levels for sequential decision making in MDPs with highly complex observations and state dynamics with the aid of deep neural networks. However, this aid came with a cost that is inherent to deep neural networks which have increased volatilities towards indistinguishable peculiarly crafted non-robust directions. To alleviate these volatilities several studies suggested techniques to cope with this problem via explicitly regulating the temporal difference loss for the worst-case sensitivity. In our study, we show that these certified training techniques come with a cost that intriguingly causes inconsistencies and overestimations in the value functions. Furthermore, our results essentially demonstrate that vanilla trained deep reinforcement learning policies have more accurate and consistent estimates for the state-action values. We believe our results reveal foundational intrinsic properties of the certified Lipschitz training techniques and demonstrate the need to rethink the approach to resilience in deep reinforcement learning.", "keywords": "Decision Boundary Stability;Volatility;Rigorous Analysis;Reinforcement Learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/f80ad8557c8521b7d47f7992edef9e68eab7dfb4.zip", "author": "Ezgi Korkmaz", "authorids": "~Ezgi_Korkmaz2", "gender": "", "homepage": "https://ezgikorkmaz.github.io/", "dblp": "300/7830.html", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Ezgi_Korkmaz2", "aff": "University College London, University of London", "aff_domain": "ucl.ac.uk", "position": "PhD student", "bibtex": "@misc{\nkorkmaz2024exposing,\ntitle={Exposing the Silent Hidden Impact of Certified Training in Reinforcement Learning},\nauthor={Ezgi Korkmaz},\nyear={2024},\nurl={https://openreview.net/forum?id=5e0yWSNGIc}\n}", "github": "", "project": "", "reviewers": "JDGz;KcnX;yyyH", "site": "https://openreview.net/forum?id=5e0yWSNGIc", "pdf_size": 1988271, "rating": "5;5;6", "confidence": "3;3;3", "soundness": "3;3;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "80;46;41", "wc_strengths": "106;66;187", "wc_weaknesses": "458;163;294", "wc_questions": "184;54;50", "wc_review": "828;329;572", "wc_reply_reviewers": "567;62;207", "wc_reply_authors": "851;722;1042", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.666666666666664, 17.326921891156037 ], "wc_strengths_avg": [ 119.66666666666667, 50.33443707399096 ], "wc_weaknesses_avg": [ 305.0, 120.68416079447488 ], "wc_questions_avg": [ 96.0, 62.24682053459973 ], "wc_review_avg": [ 576.3333333333334, 203.7389397134371 ], "wc_reply_reviewers_avg": [ 278.6666666666667, 212.3021955190813 ], "wc_reply_authors_avg": [ 871.6666666666666, 131.45425904938273 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YhN2TYHRaK0J:scholar.google.com/&scioq=Exposing+the+Silent+Hidden+Impact+of+Certified+Training+in+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "id": "5eLgTLusaR", "title": "Loco3D: Indoor Multiuser Locomotion 3D Dataset", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the context of human-AI interaction, modeling human actions is a critical and challenging endeavor, with locomotion being a particularly fundamental behavior for AI agents to understand. Modeling human trajectories in complex indoor scenes, such as the home environment, requires an understanding of how humans interact with their surroundings and other humans. These interactions are influenced by a range of factors, including the geometry and semantics of the scene, the socio-cultural context, and the task each human needs to perform. Previous research has shared datasets containing human motion and scene structure in indoor scenes, but these datasets are limited in scale due to the difficulty and time required to collect data at different locations. To solve the scale problem, we propose to use a virtual reality (VR) system to build a human motion dataset. Specifically, we present Loco3D, a dataset of multi-person interactions in over 100 different indoor VR scenes, including 3D body pose data and highly accurate spatial information. The dataset can be used for building AI agents that operate in indoor environments, such as home robots, or to create virtual avatars for games or animations that mimic human movement and posture. With an initial evaluation, we demonstrate that models trained with our dataset have improved multi-person trajectory synthesis performance on real-world data.", "keywords": "Human trajectory synthesis;Indoor;Dataset;Multi-user;3D;Virtual reality", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/463ae5689dcf0be65b0ca8c5e0a2a085a7b507fa.zip", "author": "Kojiro Takeyama;Yimeng Liu;Misha Sra", "authorids": "~Kojiro_Takeyama1;~Yimeng_Liu1;~Misha_Sra1", "gender": "M;;F", "homepage": ";;https://sites.cs.ucsb.edu/~sra", "dblp": ";;119/4545", "google_scholar": "TRtprtgAAAAJ;hROpwCcAAAAJ;yDkV9BsAAAAJ", "orcid": ";0000-0002-6742-2908;0000-0001-8154-8518", "linkedin": ";yimeng-liu-1a857a172/;mishasra", "or_profile": "~Kojiro_Takeyama1;~Yimeng_Liu1;~Misha_Sra1", "aff": "Toyota Motor North America;University of California, Santa Barbara;University of California, Santa Barbara", "aff_domain": "toyota.com;ucsb.edu;ucsb.edu", "position": "Researcher;PhD student;Assistant Professor", "bibtex": "@misc{\ntakeyama2024locod,\ntitle={Loco3D: Indoor Multiuser Locomotion 3D Dataset},\nauthor={Kojiro Takeyama and Yimeng Liu and Misha Sra},\nyear={2024},\nurl={https://openreview.net/forum?id=5eLgTLusaR}\n}", "github": "", "project": "", "reviewers": "v7u4;EhxJ;Zavr;Yanw", "site": "https://openreview.net/forum?id=5eLgTLusaR", "pdf_size": 19739885, "rating": "3;5;5;5", "confidence": "3;5;3;2", "soundness": "3;2;2;3", "contribution": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "140;36;56;40", "wc_strengths": "78;52;115;26", "wc_weaknesses": "211;193;210;147", "wc_questions": "116;86;148;95", "wc_review": "545;367;529;308", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1333;1001;1402;952", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.0, 42.23742416388575 ], "wc_strengths_avg": [ 67.75, 32.89661836724255 ], "wc_weaknesses_avg": [ 190.25, 25.974747351995553 ], "wc_questions_avg": [ 111.25, 23.84716964337697 ], "wc_review_avg": [ 437.25, 102.06462413588756 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1172.0, 197.77638888401214 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oO1oLZ5fgQMJ:scholar.google.com/&scioq=Loco3D:+Indoor+Multiuser+Locomotion+3D+Dataset&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Toyota Motor Corporation;University of California, Santa Barbara", "aff_unique_dep": ";", "aff_unique_url": "https://www.toyota.com;https://www.ucsb.edu", "aff_unique_abbr": "Toyota;UCSB", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "5elND8cf8r", "title": "Contrastive Implicit Representation Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Implicit Neural Representations have emerged as an interesting alternative to traditional array representations. The challenge of performing downstream tasks directly on implicit representations has been addressed by several methods. Overcoming this challenge would open the door to the application of implicit representations to a wide range of fields. Then again, self-supervised representation learning methods, such as the several contrastive learning frameworks which have been proven powerful representation learning methods. So far, the use of self-supervised learning for implicit representations has remained unexplored, mostly because of the difficulty of producing valid augmented views of implicit representations to be used for learning contrasts. In this work, we adapt the popular SimCLR algorithm to implicit representations that consist of multiplicative filters networks and SIRENs. While methods to obtain augmentations in SIREN have been studied in the literature, we provide methods for augmenting MFNs effectively. We show how MFNs lend themselves well to geometric augmentations. To the best of our knowledge, our work is the first to demonstrate that self-supervised learning on implicit representations of images is feasible and results in good downstream task performances.", "keywords": "Implicit neural representations;self-supervised-learning;contrastive learning;neural fields;multiplicative filter networks;SimCLR", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Riccardo Valperga;Samuele Papa;David W. Romero;Miltiadis Kofinas;Jan-jakob Sonke;Efstratios Gavves", "authorids": "~Riccardo_Valperga1;~Samuele_Papa1;~David_W._Romero1;~Miltiadis_Kofinas2;~Jan-jakob_Sonke1;~Efstratios_Gavves1", "gender": "M;M;M;M;;M", "homepage": ";https://samuelepapa.github.io/;https://davidwromero.xyz/;https://mkofinas.github.io;;https://www.egavves.com", "dblp": ";296/3702;254/1396;305/0160;20/4093;03/8693", "google_scholar": "IK64D1wAAAAJ;;7tdzmVoAAAAJ;Ur5BV8MAAAAJ;https://scholar.google.com/citations?hl=nl;https://scholar.google.nl/citations?user=QqfCvsgAAAAJ", "orcid": ";;;0000-0002-3392-4037;0000-0001-5155-5274;", "linkedin": ";samuelepapa/;david-w-romero-05893567/;miltiadiskofinas/;;", "or_profile": "~Riccardo_Valperga1;~Samuele_Papa1;~David_W._Romero1;~Miltiadis_Kofinas2;~Jan-jakob_Sonke1;~Efstratios_Gavves1", "aff": "University of Amsterdam;NXAI;NVIDIA;University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "uva.nl;nx-ai.com;nvidia.com;uva.nl;uva.nl;uva.nl", "position": "PhD student;Intern;Researcher;PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nvalperga2024contrastive,\ntitle={Contrastive Implicit Representation Learning},\nauthor={Riccardo Valperga and Samuele Papa and David W. Romero and Miltiadis Kofinas and Jan-jakob Sonke and Efstratios Gavves},\nyear={2024},\nurl={https://openreview.net/forum?id=5elND8cf8r}\n}", "github": "", "project": "", "reviewers": "QeAy;mwVc;1Uny", "site": "https://openreview.net/forum?id=5elND8cf8r", "pdf_size": 4941530, "rating": "1;3;3", "confidence": "3;3;3", "soundness": "2;3;2", "contribution": "1;1;2", "presentation": "2;3;3", "wc_summary": "43;157;54", "wc_strengths": "36;70;58", "wc_weaknesses": "688;393;160", "wc_questions": "24;191;17", "wc_review": "791;811;289", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 84.66666666666667, 51.344154703551425 ], "wc_strengths_avg": [ 54.666666666666664, 14.079141387961917 ], "wc_weaknesses_avg": [ 413.6666666666667, 216.04989135742593 ], "wc_questions_avg": [ 77.33333333333333, 80.42525860836396 ], "wc_review_avg": [ 630.3333333333334, 241.4971819481314 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ic-fr-MAHnMJ:scholar.google.com/&scioq=Contrastive+Implicit+Representation+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "University of Amsterdam;NXAI;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.uva.nl;;https://www.nvidia.com", "aff_unique_abbr": "UvA;;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;0;0;0", "aff_country_unique": "Netherlands;;United States" }, { "title": "Contextual Bandits with Online Neural Regression", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19421", "id": "5ep85sakT3", "author_site": "Rohan Deb, Yikun Ban, Shiliang Zuo, Jingrui He, Arindam Banerjee", "tldr": "", "abstract": "Recent works have shown a reduction from contextual bandits to online regression under a realizability assumption (Foster and Rakhlin, 2020; Foster and Krishnamurthy, 2021). In this work, we investigate the use of neural networks for such online regression and associated Neural Contextual Bandits (NeuCBs). Using existing results for wide networks, one can readily show a ${\\mathcal{O}}(\\sqrt{T})$ regret for online regression with square loss, which via the reduction implies a ${\\mathcal{O}}(\\sqrt{K} T^{3/4})$ regret for NeuCBs. Departing from this standard approach, we first show a $\\mathcal{O}(\\log T)$ regret for online regression with almost convex losses that satisfy QG (Quadratic Growth) condition, a generalization of the PL (Polyak-\\L ojasiewicz) condition, and that have a unique minima. Although not directly applicable to wide networks since they do not have unique minima, we show that adding a suitable small random perturbation to the network predictions surprisingly makes the loss satisfy QG with unique minima. Based on such a perturbed prediction, we show a ${\\mathcal{O}}(\\log T)$ regret for online regression with both squared loss and KL loss, and subsequently convert these respectively to $\\tilde{\\mathcal{O}}(\\sqrt{KT})$ and $\\tilde{\\mathcal{O}}(\\sqrt{KL^*} + K)$ regret for NeuCB, where $L^*$ is the loss of the best policy. Separately, we also show that existing regret bounds for NeuCBs are $\\Omega(T)$ or assume i.i.d. contexts, unlike this work. Finally, our experimental results on various datasets demonstrate that our algorithms, especially the one based on KL loss, persistently outperform existing algorithms.", "keywords": "Neural Bandits;Contextual Bandits;Regret Bounds;Deep Learning;Online Regression", "primary_area": "optimization", "supplementary_material": "/attachment/367a6ec5730f985923a8518d673013750decf8f0.zip", "author": "Rohan Deb;Yikun Ban;Shiliang Zuo;Jingrui He;Arindam Banerjee", "authorids": "~Rohan_Deb1;~Yikun_Ban1;~Shiliang_Zuo1;~Jingrui_He1;~Arindam_Banerjee4", "gender": "M;;M;F;", "homepage": "https://rohandeb24.github.io/;;;https://www.hejingrui.org;https://arindam.cs.illinois.edu/", "dblp": "305/0597.html;;195/4141;34/2685;82/4807.html", "google_scholar": "DiCbL00AAAAJ;;zkP0Vs0AAAAJ;hXpZynkAAAAJ;RY7cuPAAAAAJ", "orcid": ";;;0000-0002-6429-6272;", "linkedin": "rohan-deb-273986126;;;;", "or_profile": "~Rohan_Deb1;~Yikun_Ban1;~Shiliang_Zuo1;~Jingrui_He1;~Arindam_Banerjee4", "aff": "University of Illinois, Urbana Champaign;;Department of Computer Science, University of Illinois at Urbana-Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;;cs.illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;;PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\ndeb2024contextual,\ntitle={Contextual Bandits with Online Neural Regression},\nauthor={Rohan Deb and Yikun Ban and Shiliang Zuo and Jingrui He and Arindam Banerjee},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5ep85sakT3}\n}", "github": "", "project": "", "reviewers": "EQUF;eHnV;uhN1;PXpv;Gbgm", "pdf_size": 8456479, "rating": "5;5;6;6;8", "confidence": "3;3;4;4;4", "soundness": "2;3;3;3;3", "contribution": "3;2;3;3;3", "presentation": "2;2;3;3;4", "wc_summary": "77;77;62;71;164", "wc_strengths": "23;45;28;44;101", "wc_weaknesses": "137;87;9;272;14", "wc_questions": "41;3;64;2;27", "wc_review": "278;212;163;389;306", "wc_reply_reviewers": "0;170;0;0;17", "wc_reply_authors": "990;1693;715;1175;159", "reply_reviewers": "0;2;0;0;1", "reply_authors": "3;4;2;3;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 90.2, 37.306299736103554 ], "wc_strengths_avg": [ 48.2, 27.780568748677556 ], "wc_weaknesses_avg": [ 103.8, 96.6714021828586 ], "wc_questions_avg": [ 27.4, 23.51680250374187 ], "wc_review_avg": [ 269.6, 77.88863845260103 ], "wc_reply_reviewers_avg": [ 37.4, 66.62612100370244 ], "wc_reply_authors_avg": [ 946.4, 506.9743977756668 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.74535599249993, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=786987570628450731&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=5ep85sakT3", "pdf": "https://openreview.net/pdf?id=5ep85sakT3", "email": "cs.illinois.edu;;cs.illinois.edu;illinois.edu;illinois.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MiniLLM: Knowledge Distillation of Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19420", "id": "5h0qf7IBZZ", "author_site": "Yuxian Gu, Li Dong, Furu Wei, Minlie Huang", "tldr": "", "abstract": "Knowledge Distillation (KD) is a promising technique for reducing the high computational demand of large language models (LLMs). However, previous KD methods are primarily applied to white-box classification models or training small models to imitate black-box model APIs like ChatGPT. How to effectively distill the knowledge of white-box LLMs into small models is still under-explored, which becomes more important with the prosperity of open-source LLMs. In this work, we propose a KD approach that distills LLMs into smaller language models. We first replace the forward Kullback-Leibler divergence (KLD) objective in the standard KD approaches with reverse KLD, which is more suitable for KD on generative language models, to prevent the student model from overestimating the low-probability regions of the teacher distribution. Then, we derive an effective optimization approach to learn this objective. The student models are named MiniLLM. Extensive experiments in the instruction-following setting show that MiniLLM generates more precise responses with higher overall quality, lower exposure bias, better calibration, and higher long-text generation performance than the baselines. Our method is scalable for different model families\nwith 120M to 13B parameters. Our code, data, and model checkpoints can be found in https://github.com/microsoft/LMOps/tree/main/minillm.", "keywords": "Large Lanauge Models;Knowledge Distillation", "primary_area": "generative models", "supplementary_material": "/attachment/f056cb3654f515b1064a30e3360bbe5a89526f27.zip", "author": "Yuxian Gu;Li Dong;Furu Wei;Minlie Huang", "authorids": "~Yuxian_Gu1;~Li_Dong1;~Furu_Wei1;~Minlie_Huang1", "gender": "M;M;M;M", "homepage": "https://t1101675.github.io/;http://dong.li;https://www.microsoft.com/en-us/research/people/fuwei/;http://coai.cs.tsinghua.edu.cn/hml", "dblp": ";85/5090-4;72/5870;", "google_scholar": "zF9dr1sAAAAJ;wEfQgPgAAAAJ;G-V1VpwAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yuxian_Gu1;~Li_Dong1;~Furu_Wei1;~Minlie_Huang1", "aff": "Tsinghua University;Microsoft Research;Microsoft Research;Tsinghua University", "aff_domain": "tsinghua.edu.cn;microsoft.com;microsoft.com;tsinghua.edu.cn", "position": "PhD student;Principal Researcher;Distinguished Scientist;Full Professor", "bibtex": "@inproceedings{\ngu2024minillm,\ntitle={Mini{LLM}: Knowledge Distillation of Large Language Models},\nauthor={Yuxian Gu and Li Dong and Furu Wei and Minlie Huang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5h0qf7IBZZ}\n}", "github": "", "project": "", "reviewers": "HmVQ;oknc;v2PG;BHa2", "pdf_size": 672880, "rating": "5;6;6;8", "confidence": "3;4;4;5", "soundness": "3;2;3;4", "contribution": "2;2;3;4", "presentation": "4;3;3;4", "wc_summary": "117;119;52;190", "wc_strengths": "251;129;49;97", "wc_weaknesses": "618;233;117;160", "wc_questions": "123;60;2;383", "wc_review": "1109;541;220;830", "wc_reply_reviewers": "410;347;0;200", "wc_reply_authors": "1973;541;411;1100", "reply_reviewers": "2;2;0;1", "reply_authors": "4;2;1;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 119.5, 48.81854155953453 ], "wc_strengths_avg": [ 131.5, 74.63745708422816 ], "wc_weaknesses_avg": [ 282.0, 198.37212505793246 ], "wc_questions_avg": [ 142.0, 145.57300573938838 ], "wc_review_avg": [ 675.0, 330.6667506720323 ], "wc_reply_reviewers_avg": [ 239.25, 157.75515047059477 ], "wc_reply_authors_avg": [ 1006.25, 615.2590409737999 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9733285267845754, "gs_citation": 375, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5304283176625294100&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5h0qf7IBZZ", "pdf": "https://openreview.net/pdf?id=5h0qf7IBZZ", "email": "tsinghua.edu.cn;microsoft.com;microsoft.com;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Tsinghua University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "THU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Towards Robust Offline Reinforcement Learning under Diverse Data Corruption", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19419", "id": "5hAMmCU0bK", "author_site": "Rui Yang, Han Zhong, Jiawei Xu, Amy Zhang, Chongjie Zhang, Lei Han, Tong Zhang", "tldr": "", "abstract": "Offline reinforcement learning (RL) presents a promising approach for learning reinforced policies from offline datasets without the need for costly or unsafe interactions with the environment. However, datasets collected by humans in real-world environments are often noisy and may even be maliciously corrupted, which can significantly degrade the performance of offline RL. In this work, we first investigate the performance of current offline RL algorithms under comprehensive data corruption, including states, actions, rewards, and dynamics. Our extensive experiments reveal that implicit Q-learning (IQL) demonstrates remarkable resilience to data corruption among various offline RL algorithms. Furthermore, we conduct both empirical and theoretical analyses to understand IQL's robust performance, identifying its supervised policy learning scheme as the key factor. Despite its relative robustness, IQL still suffers from heavy-tail targets of Q functions under dynamics corruption. To tackle this challenge, we draw inspiration from robust statistics to employ the Huber loss to handle the heavy-tailedness and utilize quantile estimators to balance penalization for corrupted data and learning stability. By incorporating these simple yet effective modifications into IQL, we propose a more robust offline RL approach named Robust IQL (RIQL). Extensive experiments demonstrate that RIQL exhibits highly robust performance when subjected to diverse data corruption scenarios.", "keywords": "Offline RL;robust RL;data corruption;training-time attack", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/b59d032fee8c352db24e853035e62321928542aa.pdf", "author": "Rui Yang;Han Zhong;Jiawei Xu;Amy Zhang;Chongjie Zhang;Lei Han;Tong Zhang", "authorids": "~Rui_Yang8;~Han_Zhong1;~Jiawei_Xu1;~Amy_Zhang1;~Chongjie_Zhang1;~Lei_Han1;~Tong_Zhang2", "gender": "M;;M;;M;M;F", "homepage": "https://yangrui2015.github.io;https://hanzhong-ml.github.io/;https://github.com/jiawei415;;https://www.leihan.org;http://tongzhang-ml.org;", "dblp": "92/1942-10;137/8096.html;;29/6693;75/2307-1;07/4227-1;43/2754", "google_scholar": "QHSUy3MAAAAJ;Bk5q_pAAAAAJ;;LjxqXycAAAAJ;Tz4_zi8AAAAJ;LurWtuYAAAAJ;", "orcid": "0000-0003-3525-1726;;;;;0000-0002-5511-2558;", "linkedin": ";;;;;;", "or_profile": "~Rui_Yang8;~Han_Zhong1;~Jiawei_Xu1;~Chongjie_Zhang1;~Lei_Han1;~Tong_Zhang2;~Amy_Zhang2", "aff": "Hong Kong University of Science and Technology;Peking University;CUHK(SZ);Washington University, Saint Louis;Tencent Robotics X;UIUC;Meta Facebook", "aff_domain": "ust.hk;stu.pku.edu.cn;link.cuhk.edu.cn;wustl.edu;tencent.com;illinois.edu;facebook.com", "position": "PhD student;PhD student;PhD student;Associate Professor;Principal Researcher;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nyang2024towards,\ntitle={Towards Robust Offline Reinforcement Learning under Diverse Data Corruption},\nauthor={Rui Yang and Han Zhong and Jiawei Xu and Amy Zhang and Chongjie Zhang and Lei Han and Tong Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5hAMmCU0bK}\n}", "github": "", "project": "", "reviewers": "dDco;41NY;wKTe;t4o4", "pdf_size": 3305431, "rating": "6;6;8;8", "confidence": "4;4;5;3", "soundness": "3;3;3;3", "contribution": "3;2;3;3", "presentation": "4;3;3;3", "wc_summary": "98;90;84;136", "wc_strengths": "91;49;274;38", "wc_weaknesses": "322;104;185;14", "wc_questions": "69;33;36;81", "wc_review": "580;276;579;269", "wc_reply_reviewers": "309;23;0;12", "wc_reply_authors": "2517;753;750;681", "reply_reviewers": "2;1;0;1", "reply_authors": "5;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 102.0, 20.248456731316587 ], "wc_strengths_avg": [ 113.0, 95.03420436874295 ], "wc_weaknesses_avg": [ 156.25, 113.20860170499414 ], "wc_questions_avg": [ 54.75, 20.716840975399702 ], "wc_review_avg": [ 426.0, 153.52035695633333 ], "wc_reply_reviewers_avg": [ 86.0, 129.00581382247856 ], "wc_reply_authors_avg": [ 1175.25, 775.1949351614728 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9291883893276173496&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5hAMmCU0bK", "pdf": "https://openreview.net/pdf?id=5hAMmCU0bK", "email": "ust.hk;stu.pku.edu.cn;link.cuhk.edu.cn;wustl.edu;tencent.com;illinois.edu;facebook.com", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "Hong Kong University of Science and Technology;Peking University;Chinese University of Hong Kong, Shenzhen;Washington University in St. Louis;Tencent;University of Illinois Urbana-Champaign;Meta", "aff_unique_dep": ";;;;Tencent Robotics X;;Meta Platforms, Inc.", "aff_unique_url": "https://www.ust.hk;http://www.pku.edu.cn;https://www.cuhk.edu.cn/sz;https://wustl.edu;https://www.tencent.com;https://www illinois.edu;https://meta.com", "aff_unique_abbr": "HKUST;Peking U;CUHK(SZ);WUSTL;Tencent Robotics X;UIUC;Meta", "aff_campus_unique_index": "0;2;3;4", "aff_campus_unique": "Hong Kong SAR;;Shenzhen;Saint Louis;Urbana-Champaign", "aff_country_unique_index": "0;0;0;1;0;1;1", "aff_country_unique": "China;United States" }, { "title": "INViTE: INterpret and Control Vision-Language Models with Text Explanations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19418", "id": "5iENGLEJKG", "author_site": "Haozhe Chen, Junfeng Yang, Carl Vondrick, Chengzhi Mao", "tldr": "", "abstract": "Large-scale pre-trained vision foundation models, such as CLIP, have become de facto backbones for various vision tasks. However, due to their black-box nature, understanding the underlying rules behind these models\u2019 predictions and controlling model behaviors have remained open challenges. We present INViTE: a framework for INterpreting Vision Transformer\u2019s latent tokens with Text Explanations. Given a latent token, INViTE retains its semantic information to the final layer using transformer\u2019s local operations and retrieves the closest text for explanation. INViTE enables understanding of model visual reasoning procedure without needing additional model training or data collection. Based on the obtained interpretations, INViTE allows for model editing that controls model reasoning behaviors and improves model robustness against biases and spurious correlations. Our code is available at https://github.com/tonychenxyz/vit-interpret.", "keywords": "Interpretation; Transformer", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Haozhe Chen;Junfeng Yang;Carl Vondrick;Chengzhi Mao", "authorids": "~Haozhe_Chen2;~Junfeng_Yang1;~Carl_Vondrick2;~Chengzhi_Mao2", "gender": "M;M;M;M", "homepage": ";https://www.cs.columbia.edu/~junfeng/;http://www.cs.columbia.edu/~vondrick/;http://www.cs.columbia.edu/~mcz/", "dblp": ";71/3724.html;26/8610;", "google_scholar": ";JJ9AvbAAAAAJ;3MzhkFIAAAAJ;pTTEiHUAAAAJ", "orcid": ";0009-0000-2277-6545;;", "linkedin": "haozhe-chen/;;;", "or_profile": "~Haozhe_Chen2;~Junfeng_Yang1;~Carl_Vondrick2;~Chengzhi_Mao2", "aff": "Columbia University;Columbia University;Columbia University;Mila - Quebec Artificial Intelligence Institute", "aff_domain": "columbia.edu;columbia.edu;columbia.edu;mila.quebec", "position": "Undergrad student;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024invite,\ntitle={{INV}i{TE}: {IN}terpret and Control Vision-Language Models with Text Explanations},\nauthor={Haozhe Chen and Junfeng Yang and Carl Vondrick and Chengzhi Mao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5iENGLEJKG}\n}", "github": "", "project": "", "reviewers": "jxBW;ZFTq;aYwS;jZ36", "pdf_size": 4058511, "rating": "3;5;5;8", "confidence": "3;4;4;4", "soundness": "2;3;3;2", "contribution": "2;2;2;3", "presentation": "3;3;2;4", "wc_summary": "68;86;102;88", "wc_strengths": "55;53;67;91", "wc_weaknesses": "220;39;221;154", "wc_questions": "23;60;2;19", "wc_review": "366;238;392;352", "wc_reply_reviewers": "0;22;0;0", "wc_reply_authors": "1081;1240;998;1328", "reply_reviewers": "0;1;0;0", "reply_authors": "3;3;3;3", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.0, 12.083045973594572 ], "wc_strengths_avg": [ 66.5, 15.124483462254174 ], "wc_weaknesses_avg": [ 158.5, 74.1434420565973 ], "wc_questions_avg": [ 26.0, 21.15419580130618 ], "wc_review_avg": [ 337.0, 58.93216439263028 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 1161.75, 129.51520181044387 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7276068751089989, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7823960357726140750&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=5iENGLEJKG", "pdf": "https://openreview.net/pdf?id=5iENGLEJKG", "email": "columbia.edu;columbia.edu;columbia.edu;mila.quebec", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Columbia University;Quebec Artificial Intelligence Institute", "aff_unique_dep": ";Artificial Intelligence", "aff_unique_url": "https://www.columbia.edu;https://mila.quebec", "aff_unique_abbr": "Columbia;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Canada" }, { "id": "5j6wtOO6Fk", "title": "Hieros: Hierarchical Imagination on Structured State Space Sequence World Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "One of the biggest challenges to modern deep reinforcement learning (DRL) algorithms is sample efficiency. Many approaches learn a world model in order to train an agent entirely in imagination, eliminating the need for direct environment interaction during training. However, these methods often suffer from either a lack of imagination accuracy, exploration capabilities, or runtime efficiency. We propose HIEROS, a hierarchical policy that learns time abstracted world representations and imagines trajectories at multiple time scales in latent space. HIEROS uses an S5 layer-based world model, which predicts next world states in parallel during training and iteratively during environment interaction. Due to the special properties of S5 layers, our method can train in parallel and predict next world states iteratively during imagination. This allows for more efficient training than RNN-based world models and more efficient imagination than Transformer-based world models. We show that our approach outperforms the state of the art in terms of mean and median normalized human score on the Atari 100k benchmark, and that our proposed world model is able to predict complex dynamics very accurately. We also show that HIEROS displays superior exploration capabilities compared to existing approaches.", "keywords": "Reinforcement Learning;Hierarchical Models;Deep Learning;Structured State Space Model", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/75980abe34c0f43e2c8ffcd073f836bed822aed4.zip", "author": "Paul Mattes;Rainer Schlosser;Ralf Herbrich", "authorids": "~Paul_Mattes1;~Rainer_Schlosser1;~Ralf_Herbrich1", "gender": "M;;M", "homepage": "https://github.com/Snagnar;https://hpi.de/herbrich/people/postdocs/dr-rainer-schlosser.html;https://herbrich.me", "dblp": ";129/4931;h/RalfHerbrich", "google_scholar": ";https://scholar.google.de/citations?user=A5TrKKcAAAAJ;RuvHkikAAAAJ", "orcid": ";0000-0002-6627-4026;", "linkedin": "paul-mattes-1a3455228/;;ralf-herbrich-28a8324/", "or_profile": "~Paul_Mattes1;~Rainer_Schlosser1;~Ralf_Herbrich1", "aff": ";Hasso Plattner Institute;Hasso Plattner Institute", "aff_domain": ";hpi.de;hpi.de", "position": ";Researcher;Full Professor", "bibtex": "@misc{\nmattes2024hieros,\ntitle={Hieros: Hierarchical Imagination on Structured State Space Sequence World Models},\nauthor={Paul Mattes and Rainer Schlosser and Ralf Herbrich},\nyear={2024},\nurl={https://openreview.net/forum?id=5j6wtOO6Fk}\n}", "github": "", "project": "", "reviewers": "UXtg;EjYN;5ii1", "site": "https://openreview.net/forum?id=5j6wtOO6Fk", "pdf_size": 5414859, "rating": "3;3;8", "confidence": "4;3;4", "soundness": "2;2;4", "contribution": "2;3;3", "presentation": "3;1;4", "wc_summary": "71;140;99", "wc_strengths": "29;156;102", "wc_weaknesses": "266;947;248", "wc_questions": "236;388;25", "wc_review": "602;1631;474", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "342;833;241", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 103.33333333333333, 28.335294049804546 ], "wc_strengths_avg": [ 95.66666666666667, 52.04058245467879 ], "wc_weaknesses_avg": [ 487.0, 325.3521169440887 ], "wc_questions_avg": [ 216.33333333333334, 148.84518429861575 ], "wc_review_avg": [ 902.3333333333334, 517.8882333305345 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 472.0, 258.5742962219305 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5000000000000001, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16594461642192045397&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0", "aff_unique_norm": "Hasso Plattner Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.hpi.de", "aff_unique_abbr": "HPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Some Fundamental Aspects about Lipschitz Continuity of Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19417", "id": "5jWsW08zUh", "author_site": "Grigory Khromov, Sidak Pal Singh", "tldr": "", "abstract": "Lipschitz continuity is a crucial functional property of any predictive model, that naturally governs its robustness, generalisation, as well as adversarial vulnerability. Contrary to other works that focus on obtaining tighter bounds and developing different practical strategies to enforce certain Lipschitz properties, we aim to thoroughly examine and characterise the Lipschitz behaviour of Neural Networks. Thus, we carry out an empirical investigation in a range of different settings (namely, architectures, datasets, label noise, and more) by exhausting the limits of the simplest and the most general lower and upper bounds. As a highlight of this investigation, we showcase a remarkable fidelity of the lower Lipschitz bound, identify a striking Double Descent trend in both upper and lower bounds to the Lipschitz and explain the intriguing effects of label noise on function smoothness and generalisation.", "keywords": "Lipschitz continuity;Double Descent;Label Noise;Generalization", "primary_area": "learning theory", "supplementary_material": "/attachment/6f2067e3b67cea7bfa8af7bc5a39136f8a81af0d.zip", "author": "Grigory Khromov;Sidak Pal Singh", "authorids": "~Grigory_Khromov1;~Sidak_Pal_Singh1", "gender": "M;", "homepage": ";http://sidakpal.com/", "dblp": "340/7871;189/9168", "google_scholar": "https://scholar.google.com/citations?hl=en;c59mPS4AAAAJ", "orcid": ";", "linkedin": "gkhromov/;", "or_profile": "~Grigory_Khromov1;~Sidak_Pal_Singh1", "aff": ";Max Planck Institute for Intelligent Systems", "aff_domain": ";tuebingen.mpg.de", "position": ";PhD student", "bibtex": "@inproceedings{\nkhromov2024some,\ntitle={Some Fundamental Aspects about Lipschitz Continuity of Neural Networks},\nauthor={Grigory Khromov and Sidak Pal Singh},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5jWsW08zUh}\n}", "github": "", "project": "", "reviewers": "Sk7T;EeTb;Jyw9;F3fH", "pdf_size": 5628959, "rating": "3;6;6;8", "confidence": "4;2;3;4", "soundness": "2;3;3;4", "contribution": "1;3;3;3", "presentation": "2;3;3;3", "wc_summary": "57;177;113;89", "wc_strengths": "54;21;56;92", "wc_weaknesses": "188;8;194;196", "wc_questions": "42;29;72;316", "wc_review": "341;235;435;693", "wc_reply_reviewers": "31;5;104;33", "wc_reply_authors": "1466;489;754;1457", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;3", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 109.0, 44.0 ], "wc_strengths_avg": [ 55.75, 25.1234452255259 ], "wc_weaknesses_avg": [ 146.5, 80.01718565408308 ], "wc_questions_avg": [ 114.75, 117.23347431514601 ], "wc_review_avg": [ 426.0, 169.61426826773743 ], "wc_reply_reviewers_avg": [ 43.25, 36.772102197182036 ], "wc_reply_authors_avg": [ 1041.5, 430.33504389022283 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.1266600992762247, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13201720659336215886&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=5jWsW08zUh", "pdf": "https://openreview.net/pdf?id=5jWsW08zUh", "email": ";tuebingen.mpg.de", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Jointly Training Large Autoregressive Multimodal Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19416", "id": "5jcav5RcKw", "author_site": "Emanuele Aiello, Lili Yu, Yixin Nie, Armen Aghajanyan, Barlas Oguz", "tldr": "", "abstract": "In recent years, advances in the large-scale pretraining of language and text-to-image models have revolutionized the field of machine learning. Yet, integrating these two modalities into a single, robust model capable of generating seamless multimodal outputs remains a significant challenge. To address this gap, we present the Joint Autoregressive Mixture (JAM) framework, a modular approach that systematically fuses existing text and image generation models. We also introduce a specialized, data-efficient instruction-tuning strategy, tailored for mixed-modal generation tasks. Our final instruct-tuned model demonstrates unparalleled performance in generating high-quality multimodal outputs and represents the first model explicitly designed for this purpose.", "keywords": "Large Multimodal Models; Joint Training; Interleaved Image-Text Generation; Autoregressive Models", "primary_area": "generative models", "supplementary_material": "", "author": "Emanuele Aiello;LILI YU;Yixin Nie;Armen Aghajanyan;Barlas Oguz", "authorids": "~Emanuele_Aiello1;~LILI_YU1;~Yixin_Nie2;~Armen_Aghajanyan1;~Barlas_Oguz1", "gender": "M;F;M;;", "homepage": ";https://scholar.google.com/citations?hl=en&user=wY932-AAAAAJ&view_op=list_works&authuser=1&sortby=pubdate;https://easonnie.github.io;;", "dblp": ";;205/2725;;https://dblp.org/pers/hd/o/Oguz:Barlas", "google_scholar": "oZlKgH4AAAAJ;https://scholar.google.com/citations?hl=en;g5QpITUAAAAJ;;iPmTQZMAAAAJ", "orcid": "0000-0001-7133-4137;;;;", "linkedin": "emanuele-aiello-765945105/;lili-yu-6771961a/;;;barlas-o%C4%9Fuz-25465050", "or_profile": "~Emanuele_Aiello1;~LILI_YU1;~Yixin_Nie2;~Armen_Aghajanyan1;~Barlas_Oguz1", "aff": "Politecnico di Torino;Meta Facebook;Meta Platforms, Inc.;;Meta", "aff_domain": "polito.it;fb.com;meta.com;;meta.com", "position": "PhD student;Researcher;Researcher;;Research Scientist", "bibtex": "@inproceedings{\naiello2024jointly,\ntitle={Jointly Training Large Autoregressive Multimodal Models},\nauthor={Emanuele Aiello and LILI YU and Yixin Nie and Armen Aghajanyan and Barlas Oguz},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5jcav5RcKw}\n}", "github": "", "project": "", "reviewers": "Ghzd;qfqe;eaYf;jvv1", "pdf_size": 44391665, "rating": "5;5;6;6", "confidence": "3;4;3;3", "soundness": "3;2;2;4", "contribution": "2;2;4;3", "presentation": "3;2;2;3", "wc_summary": "176;20;74;38", "wc_strengths": "73;44;39;158", "wc_weaknesses": "97;344;58;157", "wc_questions": "193;36;43;47", "wc_review": "539;444;214;400", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1075;1148;818;962", "reply_reviewers": "0;0;0;0", "reply_authors": "3;3;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 77.0, 60.37383539249432 ], "wc_strengths_avg": [ 78.5, 47.699580710945455 ], "wc_weaknesses_avg": [ 164.0, 109.74288131810646 ], "wc_questions_avg": [ 79.75, 65.50333960951914 ], "wc_review_avg": [ 399.25, 118.16381637371062 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1000.75, 124.59409095137698 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5901155463886075827&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5jcav5RcKw", "pdf": "https://openreview.net/pdf?id=5jcav5RcKw", "email": "polito.it;fb.com;meta.com;;meta.com", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Politecnico di Torino;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.polito.it;https://meta.com", "aff_unique_abbr": "Polito;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Italy;United States" }, { "id": "5jxtlpla15", "title": "Diffusion Models for Open-Vocabulary Segmentation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The variety of objects in the real world is unlimited and is thus impossible to capture using models trained on a closed, pre-defined set of categories. Recently, open-vocabulary recognition has garnered significant attention, largely facilitated by advances in large-scale vision-language modelling. In this paper, we present OVDiff, a novel method that leverages the generative properties of text-to-image diffusion models for open-vocabulary segmentation. Specifically, we propose to synthesise support image sets from arbitrary textual categories, creating for each category a set of prototypes representative of both the category itself and its surrounding context (background). Our method relies solely on pre-trained components: segmentation is obtained by simply comparing a target image to the prototypes without further fine-tuning. We show that our method can be used to ground any pre-trained self-supervised feature extractor in natural language and provide explainable predictions by mapping back to regions in the support set. Our approach shows strong performance on a range of open-vocabulary segmentation benchmarks, obtaining a lead of more than 10% over prior work on PASCAL VOC.", "keywords": "computer vision;semantic segmentation;open-vocabulary segmentation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Laurynas Karazija;Iro Laina;Andrea Vedaldi;Christian Rupprecht", "authorids": "~Laurynas_Karazija1;~Iro_Laina1;~Andrea_Vedaldi1;~Christian_Rupprecht1", "gender": "M;M;M;", "homepage": "https://karazijal.github.io;https://www.robots.ox.ac.uk/~vedaldi/;http://chrirupp.github.io;", "dblp": "206/6117;99/2825;https://dblp.uni-trier.de/pid/76/744-1;182/2070", "google_scholar": "Kyt9trwAAAAJ;bRT7t28AAAAJ;https://scholar.google.de/citations?user=IrYlproAAAAJ;n9nXAPcAAAAJ", "orcid": ";0000-0003-1374-2858;;0000-0001-8857-7709", "linkedin": "laurynas-karazija-b9591b103/;;;", "or_profile": "~Laurynas_Karazija1;~Andrea_Vedaldi1;~Christian_Rupprecht1;~Iro_Laina2", "aff": "University of Oxford;Meta;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;meta.com;ox.ac.uk;ox.ac.uk", "position": "PhD student;Researcher;Associate Professor;Lecturer", "bibtex": "@misc{\nkarazija2024diffusion,\ntitle={Diffusion Models for Open-Vocabulary Segmentation},\nauthor={Laurynas Karazija and Iro Laina and Andrea Vedaldi and Christian Rupprecht},\nyear={2024},\nurl={https://openreview.net/forum?id=5jxtlpla15}\n}", "github": "", "project": "", "reviewers": "PKyW;zXW9;FmYQ;kQ2p", "site": "https://openreview.net/forum?id=5jxtlpla15", "pdf_size": 3544489, "rating": "3;5;6;6", "confidence": "4;3;5;3", "soundness": "2;3;3;3", "contribution": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "28;143;38;45", "wc_strengths": "21;48;50;41", "wc_weaknesses": "246;167;88;119", "wc_questions": "5;68;3;19", "wc_review": "300;426;179;224", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "891;804;385;951", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.5, 46.295248136282844 ], "wc_strengths_avg": [ 40.0, 11.467344941179714 ], "wc_weaknesses_avg": [ 155.0, 59.602852280742404 ], "wc_questions_avg": [ 23.75, 26.280934153869037 ], "wc_review_avg": [ 282.25, 93.58518846484202 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 757.75, 221.46260971098485 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6050314759315356107&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Oxford;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ox.ac.uk;https://meta.com", "aff_unique_abbr": "Oxford;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Time-Efficient Reinforcement Learning with Stochastic Stateful Policies", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19415", "id": "5liV2xUdJL", "author_site": "Firas Al-Hafez, Guoping Zhao, Jan Peters, Davide Tateo", "tldr": "", "abstract": "Stateful policies play an important role in reinforcement learning, such as handling partially observable environments, enhancing robustness, or imposing an inductive bias directly into the policy structure. The conventional method for training stateful policies is Backpropagation Through Time (BPTT), which comes with significant drawbacks, such as slow training due to sequential gradient propagation and the occurrence of vanishing or exploding gradients. The gradient is often truncated to address these issues, resulting in a biased policy update. We present a novel approach for training stateful policies by decomposing the latter into a stochastic internal state kernel and a stateless policy, jointly optimized by following the stateful policy gradient. We introduce different versions of the stateful policy gradient theorem, enabling us to easily instantiate stateful variants of popular reinforcement learning and imitation learning algorithms. Furthermore, we provide a theoretical analysis of our new gradient estimator and compare it with BPTT. We evaluate our approach on complex continuous control tasks, e.g. humanoid locomotion, and demonstrate that our gradient estimator scales effectively with task complexity while offering a faster and simpler alternative to BPTT.", "keywords": "reinforcement learning;recurrent neural networks;stateful policies;backpropagation through time;imitation learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Firas Al-Hafez;Guoping Zhao;Jan Peters;Davide Tateo", "authorids": "~Firas_Al-Hafez1;~Guoping_Zhao1;~Jan_Peters3;~Davide_Tateo2", "gender": "M;M;M;M", "homepage": "https://firasalhafez.com/;;https://www.jan-peters.net;https://www.ias.informatik.tu-darmstadt.de/Team/DavideTateo", "dblp": ";;p/JanPeters1;214/0808", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.de/citations?user=-kIVAcAAAAAJ;https://scholar.google.it/citations?user=LGnu3SEAAAAJ", "orcid": ";0000-0002-1908-5388;0000-0002-5266-8091;0000-0002-7193-923X", "linkedin": ";;janrpeters/;", "or_profile": "~Firas_Al-Hafez1;~Guoping_Zhao1;~Jan_Peters3;~Davide_Tateo2", "aff": "Technische Universit\u00e4t Darmstadt;Southeast University;TU Darmstadt;Technische Universit\u00e4t Darmstadt", "aff_domain": "tu-darmstadt.de;seu.edu.cn;tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nal-hafez2024timeefficient,\ntitle={Time-Efficient Reinforcement Learning with Stochastic Stateful Policies},\nauthor={Firas Al-Hafez and Guoping Zhao and Jan Peters and Davide Tateo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5liV2xUdJL}\n}", "github": "", "project": "", "reviewers": "gHcm;wQTz;HaqJ", "pdf_size": 8449457, "rating": "5;6;8", "confidence": "3;4;4", "soundness": "3;3;4", "contribution": "2;3;3", "presentation": "3;1;4", "wc_summary": "195;93;115", "wc_strengths": "65;348;91", "wc_weaknesses": "313;243;130", "wc_questions": "3;231;72", "wc_review": "576;915;408", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1306;1138;685", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 134.33333333333334, 43.827946437049604 ], "wc_strengths_avg": [ 168.0, 127.72105020969201 ], "wc_weaknesses_avg": [ 228.66666666666666, 75.39378105800387 ], "wc_questions_avg": [ 102.0, 95.46727187890099 ], "wc_review_avg": [ 633.0, 210.86962796951104 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1043.0, 262.27085236449744 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1548611879883268075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5liV2xUdJL", "pdf": "https://openreview.net/pdf?id=5liV2xUdJL", "email": "tu-darmstadt.de;seu.edu.cn;tu-darmstadt.de;tu-darmstadt.de", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Southeast University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.seu.edu.cn/", "aff_unique_abbr": "TUD;SEU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;China" }, { "id": "5mtwoRNzjm", "title": "Optimization without retraction on the random generalized Stiefel manifold for canonical correlation analysis", "track": "main", "status": "Reject", "tldr": "", "abstract": "Optimization over the set of matrices that satisfy $X^\\top B X = I_p$, referred to as the generalized Stiefel manifold, appears in many applications such as canonical correlation analysis (CCA) and the generalized eigenvalue problem. Solving these problems for large-scale datasets is computationally expensive and is typically done by either computing the closed-form solution with subsampled data or by iterative methods such as Riemannian approaches. Building on the work of Ablin \\& Peyr\u00e9 (2022), we propose an inexpensive iterative method that does not enforce the constraint in every iteration exactly, but instead it produces iterations that converge to the generalized Stiefel manifold. We also tackle the random case, where the matrix $B$ is an expectation. Our method requires only efficient matrix multiplications, and has the same sublinear convergence rate as its Riemannian counterpart. Experiments demonstrate its effectiveness in various machine learning applications involving generalized orthogonality constraints, including CCA for measuring model representation similarity.", "keywords": "Canonical correlation analysis;generalized eigenvalue problem;optimization on manifolds;streaming CCA", "primary_area": "optimization", "supplementary_material": "/attachment/7cbb0595a5fa111ba476edf6d81ebb05b4510932.pdf", "author": "Simon Vary;Pierre Ablin;Bin Gao;Pierre-Antoine Absil", "authorids": "~Simon_Vary1;~Pierre_Ablin2;gaobin@lsec.cc.ac.cn;~Pierre-Antoine_Absil1", "gender": "M;M;;", "homepage": "https://simonvary.github.io;https://pierreablin.com/;;https://sites.uclouvain.be/absil/", "dblp": "230/4630;174/0980.html;;08/1880", "google_scholar": "V6OqU-cAAAAJ;1ZsunaYAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Simon_Vary1;~Pierre_Ablin2;gaobin@lsec.cc.ac.cn;~Pierre-Antoine_Absil1", "aff": "University of Oxford;Apple;;UCLouvain", "aff_domain": "stats.ox.ac.uk;apple.com;;uclouvain.be", "position": "Postdoc;Researcher;;Full Professor", "bibtex": "@misc{\nvary2024optimization,\ntitle={Optimization without retraction on the random generalized Stiefel manifold for canonical correlation analysis},\nauthor={Simon Vary and Pierre Ablin and Bin Gao and Pierre-Antoine Absil},\nyear={2024},\nurl={https://openreview.net/forum?id=5mtwoRNzjm}\n}", "github": "", "project": "", "reviewers": "44jd;dA73;1p92;6Nz4", "site": "https://openreview.net/forum?id=5mtwoRNzjm", "pdf_size": 1440442, "rating": "5;5;6;10", "confidence": "3;3;4;4", "soundness": "2;3;3;4", "contribution": "2;3;3;4", "presentation": "3;2;3;4", "wc_summary": "94;66;142;56", "wc_strengths": "58;24;37;53", "wc_weaknesses": "227;137;62;40", "wc_questions": "36;116;144;24", "wc_review": "415;343;385;173", "wc_reply_reviewers": "0;126;35;14", "wc_reply_authors": "1533;1588;1152;75", "reply_reviewers": "0;1;1;1", "reply_authors": "3;3;2;1", "rating_avg": [ 6.5, 2.0615528128088303 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.5, 33.35790760824186 ], "wc_strengths_avg": [ 43.0, 13.435028842544403 ], "wc_weaknesses_avg": [ 116.5, 73.23421331590858 ], "wc_questions_avg": [ 80.0, 51.146847410177685 ], "wc_review_avg": [ 329.0, 93.6269192059634 ], "wc_reply_reviewers_avg": [ 43.75, 49.09366048686938 ], "wc_reply_authors_avg": [ 1087.0, 607.9239261618184 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7276068751089989, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3SLRpxMdePsJ:scholar.google.com/&scioq=Optimization+without+retraction+on+the+random+generalized+Stiefel+manifold+for+canonical+correlation+analysis&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Oxford;Apple;Universit\u00e9 catholique de Louvain", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.ox.ac.uk;https://www.apple.com;https://www.uclouvain.be", "aff_unique_abbr": "Oxford;Apple;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United Kingdom;United States;Belgium" }, { "id": "5nEmi3YIz4", "title": "ProtoNMF: Turning a Black Box into a Prototype Based Interpretable Model via Non-negative Matrix Factorization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Models using parts of images as prototypes for interpretable image classification are receiving increasing attention due to their abilities to provide a transparent reasoning process in a \"this looks like that\" manner. However, existing models are typically constructed by incorporating an additional prototype layer before the final classification head, which often involve complex multi-stage training procedures and intricate loss designs while under-performing their black box counterparts in terms of accuracy. In order to guarantee the recognition performance, we take the first step to explore the reverse direction and investigate how to turn a trained black box model into the form of a prototype based model. To this end, we propose to leverage the Non-negative Matrix Factorization (NMF) to discover interpretable prototypes due to its capability of yielding parts based representations. Then we use these prototypes as the basis to reconstruct the trained black box's classification head via linear convex optimization for transparent reasoning. Denote the reconstruction difference as the residual prototype, all discovered prototypes together guarantee a precise final reconstruction. To the best of our knowledge, this is the first prototype based model that guarantees the recognition performance on par with black boxes for interpretable image classification. We demonstrate that our simple strategy can easily turn a trained black box into a prototype based model while discovering meaningful prototypes in various benchmark datasets and networks.", "keywords": "XAI;prototype based inherently interpretable model;non-negative matrix factorization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Andong Tan;Hao Chen", "authorids": "~Andong_Tan1;~Hao_Chen1", "gender": "M;M", "homepage": ";https://cse.hkust.edu.hk/~jhc/", "dblp": "225/7131;86/475-11", "google_scholar": "zt49vSoAAAAJ;https://scholar.google.com.hk/citations?user=Z_t5DjwAAAAJ", "orcid": ";0000-0002-8400-3780", "linkedin": ";", "or_profile": "~Andong_Tan1;~Hao_Chen1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "connect.ust.hk;ust.hk", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\ntan2024protonmf,\ntitle={Proto{NMF}: Turning a Black Box into a Prototype Based Interpretable Model via Non-negative Matrix Factorization},\nauthor={Andong Tan and Hao Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=5nEmi3YIz4}\n}", "github": "", "project": "", "reviewers": "eeCj;XoPR;A726", "site": "https://openreview.net/forum?id=5nEmi3YIz4", "pdf_size": 5367528, "rating": "3;5;5", "confidence": "2;3;3", "soundness": "2;3;2", "contribution": "2;3;2", "presentation": "2;2;2", "wc_summary": "67;85;229", "wc_strengths": "26;96;12", "wc_weaknesses": "295;221;91", "wc_questions": "9;95;50", "wc_review": "397;497;382", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "555;324;1057", "reply_reviewers": "0;0;0", "reply_authors": "1;1;2", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 127.0, 72.49827584156743 ], "wc_strengths_avg": [ 44.666666666666664, 36.745370078721784 ], "wc_weaknesses_avg": [ 202.33333333333334, 84.32213364367757 ], "wc_questions_avg": [ 51.333333333333336, 35.122009560324926 ], "wc_review_avg": [ 425.3333333333333, 51.044642770378516 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 645.3333333333334, 305.9872909488599 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:u3WY7AfvIQsJ:scholar.google.com/&scioq=ProtoNMF:+Turning+a+Black+Box+into+a+Prototype+Based+Interpretable+Model+via+Non-negative+Matrix+Factorization&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "5nF9rGNpi3", "title": "From Fake to Real: Pretraining on Balanced Synthetic Images to Prevent Bias", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Visual recognition models are prone to learning spurious correlations induced by a biased training set where certain conditions $B$ (\\eg, Indoors) are over-represented in certain classes $Y$ (\\eg, Big Dogs). Synthetic data from generative models offers a promising direction to mitigate this issue by augmenting underrepresented conditions in the real dataset. However, this introduces another potential source of bias from generative model artifacts in the synthetic data. Indeed, as we will show, prior work uses synthetic data to resolve the model's bias toward $B$, but it doesn't correct the models' bias toward the pair $(B, G)$ where $G$ denotes whether the sample is real or synthetic. Thus, the model could simply learn signals based on the pair $(B, G)$ (\\eg, Synthetic Indoors) to make predictions about $Y$ (\\eg, Big Dogs). To address this issue, we propose a two-step training pipeline that we call From Fake to Real (FFR). The first step of FFR pre-trains a model on balanced synthetic data to learn robust representations across subgroups. In the second step, FFR fine-tunes the model on real data using ERM or common loss-based bias mitigation methods. By training on real and synthetic data separately, FFR avoids the issue of bias toward signals from the pair $(B, G)$. In other words, synthetic data in the first step provides effective unbiased representations that boosts performance in the second step. Indeed, our analysis of high bias setting (99.9\\%) shows that FFR improves performance over the state-of-the-art by 7-14\\% over three datasets (CelebA, UTK-Face, and SpuCO Animals).", "keywords": "Spurious Correlations;Bias;Fairness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/74343866fe9638bfdebcdf61cde24d8266488f51.zip", "author": "Maan Qraitem;Kate Saenko;Bryan A. Plummer", "authorids": "~Maan_Qraitem1;~Kate_Saenko1;~Bryan_A._Plummer1", "gender": ";F;M", "homepage": "https://cs-people.bu.edu/mqraitem/;http://ai.bu.edu;http://bryanplummer.com/", "dblp": "250/5589;88/2754;163/2330", "google_scholar": "l8r237UAAAAJ;https://scholar.google.com.tw/citations?user=9xDADY4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-5704-7614;", "linkedin": ";;", "or_profile": "~Maan_Qraitem1;~Kate_Saenko1;~Bryan_Allen_Plummer1", "aff": "Boston University;Boston University, Boston University;Boston University", "aff_domain": "bu.edu;bu.edu;bu.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nqraitem2024from,\ntitle={From Fake to Real: Pretraining on Balanced Synthetic Images to Prevent Bias},\nauthor={Maan Qraitem and Kate Saenko and Bryan A. Plummer},\nyear={2024},\nurl={https://openreview.net/forum?id=5nF9rGNpi3}\n}", "github": "", "project": "", "reviewers": "EAfJ;tzaP;DngP;RuLj", "site": "https://openreview.net/forum?id=5nF9rGNpi3", "pdf_size": 3686706, "rating": "3;3;5;6", "confidence": "4;5;4;3", "soundness": "2;2;2;2", "contribution": "1;2;2;2", "presentation": "1;3;3;2", "wc_summary": "63;98;189;145", "wc_strengths": "13;87;78;98", "wc_weaknesses": "131;1362;325;517", "wc_questions": "85;56;153;14", "wc_review": "292;1603;745;774", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "505;1245;1098;768", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 123.75, 47.59923843928598 ], "wc_strengths_avg": [ 69.0, 33.09833832687073 ], "wc_weaknesses_avg": [ 583.75, 469.59097893805415 ], "wc_questions_avg": [ 77.0, 50.62114182829147 ], "wc_review_avg": [ 853.5, 473.05522933374283 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 904.0, 287.92967891483505 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12799463189263644552&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Linear Log-Normal Attention with Unbiased Concentration", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19414", "id": "5nM2AHzqUj", "author_site": "Yury Nahshan, Joseph Kampeas, Emir Haleva", "tldr": "", "abstract": "Transformer models have achieved remarkable results in a wide range of applications. However, their scalability is hampered by the quadratic time and memory complexity of the self-attention mechanism concerning the sequence length. This limitation poses a substantial obstacle when dealing with long documents or high-resolution images. In this work, we study the self-attention mechanism by analyzing the distribution of the attention matrix and its concentration ability. Furthermore, we propose instruments to measure these quantities and introduce a novel self-attention mechanism, Linear Log-Normal Attention, designed to emulate the distribution and concentration behavior of the original self-attention. Our experimental results on popular natural language benchmarks reveal that our proposed Linear Log-Normal Attention outperforms other linearized attention alternatives, offering a promising avenue for enhancing the scalability of transformer models.", "keywords": "Neural Networks;Transformers;Self-Attention;Linear Attention;Scalable Transformers;Efficient Attention;Attention with Linear Complexity;Linearized Attention;Self-Attention Analysis", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/8f60cc0ad88fd2dc4567319746c7ee3b660e005d.zip", "author": "Yury Nahshan;Joseph Kampeas;Emir Haleva", "authorids": "~Yury_Nahshan1;~Joseph_Kampeas1;~Emir_Haleva1", "gender": "M;M;", "homepage": ";;", "dblp": "228/7866;61/10966;", "google_scholar": "vdRZRhIAAAAJ;kampeas;k4ZFigYAAAAJ", "orcid": ";0000-0002-3412-6854;", "linkedin": ";;", "or_profile": "~Yury_Nahshan1;~Joseph_Kampeas1;~Emir_Haleva1", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.", "aff_domain": "huawei.com;huawei.com;huawei.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nnahshan2024linear,\ntitle={Linear Log-Normal Attention with Unbiased Concentration},\nauthor={Yury Nahshan and Joseph Kampeas and Emir Haleva},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5nM2AHzqUj}\n}", "github": "", "project": "", "reviewers": "m7qC;zqGD;aZn4;xXRJ", "pdf_size": 1415613, "rating": "5;5;6;8", "confidence": "4;4;3;3", "soundness": "3;2;3;3", "contribution": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "29;223;49;68", "wc_strengths": "26;190;19;174", "wc_weaknesses": "66;217;35;85", "wc_questions": "2;3;2;2", "wc_review": "123;633;105;329", "wc_reply_reviewers": "0;0;10;0", "wc_reply_authors": "576;1545;392;308", "reply_reviewers": "0;0;1;0", "reply_authors": "1;3;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.25, 76.73778404410699 ], "wc_strengths_avg": [ 102.25, 79.98867107284632 ], "wc_weaknesses_avg": [ 100.75, 69.44917206129962 ], "wc_questions_avg": [ 2.25, 0.4330127018922193 ], "wc_review_avg": [ 297.5, 212.75514094846216 ], "wc_reply_reviewers_avg": [ 2.5, 4.330127018922194 ], "wc_reply_authors_avg": [ 705.25, 494.4235911644993 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8164965809277259, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1876833627498889789&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=5nM2AHzqUj", "pdf": "https://openreview.net/pdf?id=5nM2AHzqUj", "email": "huawei.com;huawei.com;huawei.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Goodhart's Law in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19413", "id": "5o9G4XF1LI", "author_site": "Jacek Karwowski, Oliver Hayman, Xingjian Bai, Klaus Kiendlhofer, Charlie Griffin, Joar Skalse", "tldr": "", "abstract": "Implementing a reward function that perfectly captures a complex task in the real world is impractical. As a result, it is often appropriate to think of the reward function as a *proxy* for the true objective rather than as its definition. We study this phenomenon through the lens of *Goodhart\u2019s law*, which predicts that increasing optimisation of an imperfect proxy beyond some critical point decreases performance on the true objective. First, we propose a way to *quantify* the magnitude of this effect and *show empirically* that optimising an imperfect proxy reward often leads to the behaviour predicted by Goodhart\u2019s law for a wide range of environments and reward functions. We then provide a *geometric explanation* for why Goodhart's law occurs in Markov decision processes. We use these theoretical insights to propose an *optimal early stopping method* that provably avoids the aforementioned pitfall and derive theoretical *regret bounds* for this method. Moreover, we derive a training method that maximises worst-case reward, for the setting where there is uncertainty about the true reward function. Finally, we evaluate our early stopping method experimentally. Our results support a foundation for a theoretically-principled study of reinforcement learning under reward misspecification.", "keywords": "reinforcement learning;goodhart's law;misspecification;reward learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Jacek Karwowski;Oliver Hayman;Xingjian Bai;Klaus Kiendlhofer;Charlie Griffin;Joar Max Viktor Skalse", "authorids": "~Jacek_Karwowski1;~Oliver_Hayman1;~Xingjian_Bai1;~Klaus_Kiendlhofer1;~Charlie_Griffin1;~Joar_Max_Viktor_Skalse1", "gender": "M;M;M;M;;M", "homepage": ";https://www.linkedin.com/in/oliver-hayman-2b61b31b7/;https://xingjianbai.com/;;;", "dblp": "304/2081;;188/9534;;;242/8125", "google_scholar": ";;;;;GuzLUmQAAAAJ", "orcid": "0000-0002-8361-2912;;;;;", "linkedin": ";oliver-hayman-2b61b31b7/;;klaus-kiendlhofer-5a27291bb/;;", "or_profile": "~Jacek_Karwowski1;~Oliver_Hayman1;~Xingjian_Bai1;~Klaus_Kiendlhofer1;~Charlie_Griffin1;~Joar_Max_Viktor_Skalse1", "aff": "University of Oxford;University of Oxford;University of Oxford;OxAI Safety Hub;;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;ox.ac.uk;aisafetyhub.org;;ox.ac.uk", "position": "PhD student;Undergrad student;Undergrad student;Researcher;;PhD student", "bibtex": "@inproceedings{\nkarwowski2024goodharts,\ntitle={Goodhart's Law in Reinforcement Learning},\nauthor={Jacek Karwowski and Oliver Hayman and Xingjian Bai and Klaus Kiendlhofer and Charlie Griffin and Joar Max Viktor Skalse},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5o9G4XF1LI}\n}", "github": "", "project": "", "reviewers": "Y2A1;eHjA;YkV9;MUg9", "pdf_size": 2697174, "rating": "5;6;6;8", "confidence": "3;3;3;2", "soundness": "3;4;3;3", "contribution": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "108;115;99;77", "wc_strengths": "91;101;66;157", "wc_weaknesses": "360;144;438;108", "wc_questions": "31;1;4;76", "wc_review": "590;361;607;418", "wc_reply_reviewers": "0;0;0;84", "wc_reply_authors": "1211;618;1457;1121", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.75, 14.306903927824496 ], "wc_strengths_avg": [ 103.75, 33.281939546847326 ], "wc_weaknesses_avg": [ 262.5, 139.83829947478623 ], "wc_questions_avg": [ 28.0, 30.074906483645133 ], "wc_review_avg": [ 494.0, 106.59502802663921 ], "wc_reply_reviewers_avg": [ 21.0, 36.373066958946424 ], "wc_reply_authors_avg": [ 1101.75, 305.1732090141597 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2184078619705942762&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5o9G4XF1LI", "pdf": "https://openreview.net/pdf?id=5o9G4XF1LI", "email": "ox.ac.uk;ox.ac.uk;ox.ac.uk;aisafetyhub.org;;ox.ac.uk", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "5oJlyJXUxK", "title": "Beyond Concept Bottleneck Models: How to Make Black Boxes Intervenable?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, interpretable machine learning has re-explored concept bottleneck models (CBM), comprising step-by-step prediction of the high-level concepts from the raw features and the target variable from the predicted concepts. A compelling advantage of this model class is the user's ability to intervene on the predicted concept values, consequently affecting the model's downstream output. In this work, we introduce a method to perform such concept-based interventions on already-trained neural networks, which are not interpretable by design. Furthermore, we formalise the model's *intervenability* as a measure of the effectiveness of concept-based interventions and leverage this definition to fine-tune black-box models. Empirically, we explore the intervenability of black-box classifiers on synthetic tabular and natural image benchmarks. We demonstrate that fine-tuning improves intervention effectiveness and often yields better-calibrated predictions. To showcase the practical utility of the proposed techniques, we apply them to deep chest X-ray classifiers and show that fine-tuned black boxes can be as intervenable and more performant than CBMs.", "keywords": "interpretability;explainability;concepts;concept bottleneck models;model interventions;healthcare", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/53e866cf86a808281bedae3a80e1e88c12ba35a8.zip", "author": "Ri\u010dards Marcinkevi\u010ds;Sonia Laguna;Moritz Vandenhirtz;Julia E Vogt", "authorids": "~Ri\u010dards_Marcinkevi\u010ds1;~Sonia_Laguna1;~Moritz_Vandenhirtz1;~Julia_E_Vogt1", "gender": "F;M;F;M", "homepage": "https://mds.inf.ethz.ch/team/detail/sonia-laguna;;http://mds.inf.ethz.ch;https://rmarcinkevics.github.io/", "dblp": "313/3156;;13/8412;234/8553", "google_scholar": "PljVnCQAAAAJ;H2cG0BwAAAAJ;UoeV-8kAAAAJ;https://scholar.google.ch/citations?user=XcxXOJsAAAAJ", "orcid": "0000-0003-3504-2051;;;0000-0001-8901-5062", "linkedin": ";moritz-simon-vandenhirtz-488b0b16b/;julia-vogt-50b53895;ri%C4%8Dards-m-668568106?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3Byeq5%2FsReRoWG3HN7r6A5Lw%3D%3D", "or_profile": "~Sonia_Laguna1;~Moritz_Vandenhirtz1;~Julia_E_Vogt1;~Ricards_Marcinkevics1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch", "position": "PhD student;PhD student;Assistant Professor;PhD student", "bibtex": "@misc{\nmarcinkevi{\\v{c}}s2024beyond,\ntitle={Beyond Concept Bottleneck Models: How to Make Black Boxes Intervenable?},\nauthor={Ri{\\v{c}}ards Marcinkevi{\\v{c}}s and Sonia Laguna and Moritz Vandenhirtz and Julia E Vogt},\nyear={2024},\nurl={https://openreview.net/forum?id=5oJlyJXUxK}\n}", "github": "", "project": "", "reviewers": "Emzi;JHNX;icNv;39mA", "site": "https://openreview.net/forum?id=5oJlyJXUxK", "pdf_size": 12053856, "rating": "5;5;8;8", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "contribution": "2;2;3;4", "presentation": "2;3;3;3", "wc_summary": "156;80;35;341", "wc_strengths": "45;14;38;66", "wc_weaknesses": "352;202;57;168", "wc_questions": "227;28;72;123", "wc_review": "780;324;202;698", "wc_reply_reviewers": "304;191;46;35", "wc_reply_authors": "2508;1994;672;1541", "reply_reviewers": "2;1;1;1", "reply_authors": "6;5;2;4", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 153.0, 116.83963368651924 ], "wc_strengths_avg": [ 40.75, 18.565761497983324 ], "wc_weaknesses_avg": [ 194.75, 105.44044527599455 ], "wc_questions_avg": [ 112.5, 74.16367035145981 ], "wc_review_avg": [ 501.0, 243.60829214129802 ], "wc_reply_reviewers_avg": [ 144.0, 111.01126069007594 ], "wc_reply_authors_avg": [ 1678.75, 674.4551041396306 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 1.479019945774904 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2536299489321927736&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "5pKLogzjQP", "title": "Purify Perturbative Availability Poisons via Rate-Constrained Variational Autoencoders", "track": "main", "status": "Reject", "tldr": "", "abstract": "Perturbative availability poisoning attacks seek to maximize testing error by making subtle modifications to training examples that are correctly labeled.\nDefensive strategies against these attacks can be categorized based on whether specific interventions are adopted during the training phase.\nThe first approach is training-time defense, such as adversarial training, which can effectively mitigate poisoning effects but is computationally intensive.\nThe other approach is pre-training purification, *e.g.,* image short squeezing, which consists of several simple compressions but often encounters challenges in dealing with various poison types.\nOur work provides a novel disentanglement mechanism to build an efficient pre-training purification method that achieves superior performance to all existing defenses.\nFirstly, we uncover rate-constrained variational autoencoders (VAEs), demonstrating a clear tendency to suppress poison patterns by minimizing mutual information in the latent space. We subsequently conduct a theoretical analysis to offer an explanation for this phenomenon.\nBuilding upon these insights, we introduce a disentangle variational autoencoder (D-VAE), capable of disentangling the added perturbations with learnable class-wise embeddings.\nBased on this network, a two-stage purification approach is naturally developed. The first stage focuses on roughly suppressing poison patterns, while the second stage produces refined, poison-free results, ensuring the effectiveness and robustness across various scenarios and datasets.\nExtensive experiments demonstrate the remarkable performance of our method across CIFAR-10, CIFAR-100, and a 100-class ImageNet-subset with multiple poison types and different perturbation levels.", "keywords": "perturbative availability poisoning attack;defense;variational autoencoders", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Yi Yu;Yufei Wang;Song Xia;Qichen Zheng;Wenhan Yang;Shijian Lu;Yap-peng Tan;Alex Kot", "authorids": "~Yi_Yu5;~Yufei_Wang5;~Song_Xia1;~Qichen_Zheng1;~Wenhan_Yang6;~Shijian_Lu1;~Yap-peng_Tan1;~Alex_Kot1", "gender": ";M;M;M;M;M;M;", "homepage": "https://github.com/yuyi-sd;https://github.com/wyf0912/;;https://github.com/QichenZheng;https://flyywh.github.io/;https://personal.ntu.edu.sg/shijian.lu/;https://personal.ntu.edu.sg/eyptan/;https://www.ntu.edu.sg/home/eackot/", "dblp": "99/111-11.html;;;285/4546;156/2359.html;42/2718;93/4472.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;jLd1l_sAAAAJ;x_CkEE8AAAAJ;d6AbpzgAAAAJ;S8nAnakAAAAJ;https://scholar.google.com.sg/scholar?hl=en;https://scholar.google.com.sg/citations?user=t9EqYQIAAAAJ;", "orcid": "0000-0003-2730-9553;;0009-0002-1224-470X;;;;0000-0002-0645-9109;", "linkedin": "%E7%9B%8A-%E4%BD%99-6b453a229;;;;;;;", "or_profile": "~Yi_Yu5;~Yufei_Wang5;~Song_Xia1;~Qichen_Zheng1;~Wenhan_Yang6;~Shijian_Lu1;~Yap-peng_Tan1;~Alex_Kot1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Peng Cheng Laboratory;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu;pcl.ac.cn;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nyu2024purify,\ntitle={Purify Perturbative Availability Poisons via Rate-Constrained Variational Autoencoders},\nauthor={Yi Yu and Yufei Wang and Song Xia and Qichen Zheng and Wenhan Yang and Shijian Lu and Yap-peng Tan and Alex Kot},\nyear={2024},\nurl={https://openreview.net/forum?id=5pKLogzjQP}\n}", "github": "", "project": "", "reviewers": "h4wy;D3kf;kPQt;PyJm", "site": "https://openreview.net/forum?id=5pKLogzjQP", "pdf_size": 4195767, "rating": "5;5;5;6", "confidence": "2;3;2;4", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "2;1;1;3", "wc_summary": "28;114;34;63", "wc_strengths": "31;200;11;53", "wc_weaknesses": "108;912;131;306", "wc_questions": "8;103;2;106", "wc_review": "175;1329;178;528", "wc_reply_reviewers": "27;458;0;301", "wc_reply_authors": "611;4327;1109;805", "reply_reviewers": "1;2;0;3", "reply_authors": "2;9;3;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 59.75, 34.00275724114149 ], "wc_strengths_avg": [ 73.75, 74.38875923148605 ], "wc_weaknesses_avg": [ 364.25, 325.3816028911284 ], "wc_questions_avg": [ 54.75, 49.806500579743606 ], "wc_review_avg": [ 552.5, 470.7199273453377 ], "wc_reply_reviewers_avg": [ 196.5, 191.47127721932603 ], "wc_reply_authors_avg": [ 1713.0, 1519.5953408720363 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 4.25, 2.7726341266023544 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DfoaPqEgb1wJ:scholar.google.com/&scioq=Purify+Perturbative+Availability+Poisons+via+Rate-Constrained+Variational+Autoencoders&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;0;0;0", "aff_unique_norm": "Nanyang Technological University;Pengcheng Laboratory", "aff_unique_dep": ";Peng Cheng Laboratory", "aff_unique_url": "https://www.ntu.edu.sg;http://www.pcl.ac.cn", "aff_unique_abbr": "NTU;PCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "Singapore;China" }, { "id": "5qxdlSyyB3", "title": "S$^6$-DAMON: Unlocking Structured Sparsity in Self-Supervised Speech Models via Data-Model Co-Compression", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Driven by the increasing demand for deploying deep neural network (DNN)-powered automatic speech recognition (ASR) systems on mobile platforms, speech models pretrained through self-supervised learning (SSL) have emerged to reduce reliance on the availability of transcribed speech data. However, this has enlarged the gap between the prohibitive model complexity and the limited resources of mobile devices. Therefore, there is a strong desire to streamline the complexity of speech SSL models for real-time acceleration on mobile platforms, which is particularly challenging as the pretrained speech representation may undergo significant degradation. To this end, we develop a framework dubbed S$^6$-DAMON to unlock structured sparsity in speech SSL models via data-model co-compression. On the data side, leveraging both the duration of each phoneme and the pauses between phonemes of human utterances, we develop a salient audio token detector, dubbed SALAD, to remove redundant input audio tokens; On the model side, we identify that the failure of SOTA ASR pruning methods under structured sparsity is caused by a sparsity discrepancy between finetuning/deployment and their limited adaptability of sparsity distributions. We address this through a new ASR pruning pipeline named SAFARI, which adopts a three-step pipeline - sparsify, finetune, and adjust sparsity. Extensive experiments validate that S$^6$-DAMON can significantly accelerate speech SSL models on mobile devices with limited transcribed speech data while maintaining decent ASR accuracy. All source code will be released.", "keywords": "Automated Speech Recognition;Model Compression", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Yonggan Fu;Zhifan Ye;Zhongzhi Yu;Yingyan Celine Lin", "authorids": "~Yonggan_Fu1;~Zhifan_Ye1;~Zhongzhi_Yu1;~Yingyan_Celine_Lin1", "gender": "M;M;M;F", "homepage": "https://www.yongganfu.com/;https://github.com/LemonAndRabbit;;https://eiclab.scs.gatech.edu/", "dblp": "244/8166;168/9226.html;198/8338;120/6981", "google_scholar": "https://scholar.google.com/citations?hl=en;zlPfnWEAAAAJ;KjvcaBQAAAAJ;dio8IesAAAAJ", "orcid": ";0000-0003-0755-8843;;", "linkedin": "yonggan-fu-b211831b0;zhifan-ye/;zhongzhi-yu/;yingyan-celine-lin-a281211a/", "or_profile": "~Yonggan_Fu1;~Zhifan_Ye1;~Zhongzhi_Yu1;~Yingyan_Lin1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Nvidia Research;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;nivida.com;gatech.edu", "position": "PhD student;PhD student;Research Intern;Associate Professor", "bibtex": "@misc{\nfu2024sdamon,\ntitle={S\\${\\textasciicircum}6\\$-{DAMON}: Unlocking Structured Sparsity in Self-Supervised Speech Models via Data-Model Co-Compression},\nauthor={Yonggan Fu and Zhifan Ye and Zhongzhi Yu and Yingyan Celine Lin},\nyear={2024},\nurl={https://openreview.net/forum?id=5qxdlSyyB3}\n}", "github": "", "project": "", "reviewers": "FqdU;7VsP;NUX3;NFEu", "site": "https://openreview.net/forum?id=5qxdlSyyB3", "pdf_size": 776522, "rating": "3;3;5;5", "confidence": "5;5;3;4", "soundness": "4;2;2;2", "contribution": "3;2;3;3", "presentation": "1;2;2;3", "wc_summary": "122;27;118;102", "wc_strengths": "117;29;56;44", "wc_weaknesses": "825;155;118;268", "wc_questions": "133;4;157;50", "wc_review": "1197;215;449;464", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 92.25, 38.408169703853375 ], "wc_strengths_avg": [ 61.5, 33.44024521441193 ], "wc_weaknesses_avg": [ 341.5, 284.565018932405 ], "wc_questions_avg": [ 86.0, 61.78592072632729 ], "wc_review_avg": [ 581.25, 368.9596014470961 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EBcqKKesHNYJ:scholar.google.com/&scioq=S%24%5E6%24-DAMON:+Unlocking+Structured+Sparsity+in+Self-Supervised+Speech+Models+via+Data-Model+Co-Compression&hl=en&as_sdt=0,30", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Georgia Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Research", "aff_unique_url": "https://www.gatech.edu;https://www.nvidia.com/research", "aff_unique_abbr": "Georgia Tech;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "5rhgOIu4Tr", "title": "BOT: Bootstrapped Optimal Transport for Multi-label Noise Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Multi-label learning with label noise is a practical but more challenging problem, as the underlying label dependency complicates the modeling from clean labels to noisy variants. Progress in this area is usually explored from the perspectives of semi-supervised learning, robust loss functions, or noise transition, which are less effective on complicated datasets or highly sensitive to transition matrix estimation. To refine the noisy labels in a general framework, we propose a simple but effective method, named Bootstrapped Optimal Transport method (BOT). Unlike the \\emph{explicit} linear transition matrix with stringent conditions, BOT considers the modeling between true labels and noisy labels as an \\emph{implicit} optimal transport procedure which has a more powerful degree of freedom. We show that with the proper reference by bootstrapping and adversarial orientation, the underlying true labels can be effectively estimated for training by the Sinkhorn-Knopp algorithm. Despite the simplicity, extensive experiments on a range of benchmark datasets prove that BOT consistently outperforms state-of-the-art methods, and comprehensive ablations explain the success behind BOT.", "keywords": "multi-label learning;label noise", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Lefei Zhang;Geng Yu;Jiangchao Yao;Yew-Soon Ong;Yanfeng Wang;Ivor Tsang", "authorids": "~Lefei_Zhang2;~Geng_Yu1;~Jiangchao_Yao1;~Yew-Soon_Ong1;~Yanfeng_Wang1;~Ivor_Tsang1", "gender": "M;M;M;M;M;M", "homepage": ";https://warriors-30.github.io/;https://sunarker.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/;https://www.a-star.edu.sg/cfar/about-cfar/management/prof-ivor-tsang;http://www.ntu.edu.sg/home/asysong/", "dblp": ";;166/5900;55/5407-1.html;35/5873;64/4136", "google_scholar": "https://scholar.google.com/citations?hl=en;SNp2hXIAAAAJ;w8oDh9QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;rJMOlVsAAAAJ;https://scholar.google.com.tw/citations?user=h9oWOsEAAAAJ", "orcid": ";;;0000-0002-3196-2347;;0000-0002-4480-169X", "linkedin": ";;;;;", "or_profile": "~Lefei_Zhang2;~Geng_Yu1;~Jiangchao_Yao1;~Yanfeng_Wang1;~Ivor_W_Tsang1;~Yew_Soon_Ong1", "aff": "School of Computer Science and Engineering, Nanyang Technological University;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University;A*STAR;Nanyang Technological University", "aff_domain": "scse.ntu.edu.sg;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;cfar.a-star.edu.sg;ntu.edu.sg", "position": "PhD student;MS student;Researcher;Full Professor;Principal Researcher;Full Professor", "bibtex": "@misc{\nzhang2024bot,\ntitle={{BOT}: Bootstrapped Optimal Transport for Multi-label Noise Learning},\nauthor={Lefei Zhang and Geng Yu and Jiangchao Yao and Yew-Soon Ong and Yanfeng Wang and Ivor Tsang},\nyear={2024},\nurl={https://openreview.net/forum?id=5rhgOIu4Tr}\n}", "github": "", "project": "", "reviewers": "5LvG;DxXQ;hG8D;8aJ1", "site": "https://openreview.net/forum?id=5rhgOIu4Tr", "pdf_size": 609405, "rating": "3;3;3;6", "confidence": "3;3;3;4", "soundness": "3;1;3;3", "contribution": "2;1;2;3", "presentation": "2;1;2;4", "wc_summary": "54;22;46;145", "wc_strengths": "18;16;47;220", "wc_weaknesses": "27;96;475;71", "wc_questions": "89;104;224;122", "wc_review": "188;238;792;558", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 66.75, 46.687123492457744 ], "wc_strengths_avg": [ 75.25, 84.4670793859951 ], "wc_weaknesses_avg": [ 167.25, 179.38837058181892 ], "wc_questions_avg": [ 134.75, 52.83642209688313 ], "wc_review_avg": [ 444.0, 246.00406500706447 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ydKVBCjku4kJ:scholar.google.com/&scioq=BOT:+Bootstrapped+Optimal+Transport+for+Multi-label+Noise+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;3;0", "aff_unique_norm": "Nanyang Technological University;Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory;Agency for Science, Technology and Research", "aff_unique_dep": "School of Computer Science and Engineering;;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.sjtu.edu.cn;http://www.shailab.org/;https://www.a-star.edu.sg", "aff_unique_abbr": "NTU;SJTU;Shanghai AI Lab;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;0", "aff_country_unique": "Singapore;China" }, { "id": "5rrYpa2vts", "title": "EA2N: Evidence-based AMR Attention Network for Fake News Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Proliferation of fake news has become a critical issue in today's information-driven society. Our study includes external knowledge from Wikidata and deviates from the reliance on social information to detect fake news, that many state-of-the-art (SOTA) fact-checking models adopt. This paper introduces EA$^2$N, an Evidence-based AMR Attention Network for Fake News Detection. EA$^2$N leverages Abstract Meaning Representation (AMR) and incorporates knowledge from Wikidata using proposed evidence linking algorithm, pushing the boundaries of fake news detection. The proposed framework encompasses a combination of novel language encoder and graph encoder to detect the fake news. While the language encoder effectively combines transformer encoded textual features with affective lexical features, the graph encoder encodes AMR with evidence through external knowledge, referred as WikiAMR graph. A path-aware graph learning module is designed to capture crucial semantic relationships among entities over evidences. Extensive experiments supports our model's superior performance, surpassing SOTA methodologies. This research not only advances the field of Fake News Detection but also showcases the potential of AMR and external knowledge for robust NLP applications, promising a more trustworthy information landscape.", "keywords": "Fake News Detection;AMR Network;Natural Language Processing", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Shubham Gupta;Abhishek Rajora;Suman Kundu", "authorids": "~Shubham_Gupta5;~Abhishek_Rajora1;~Suman_Kundu1", "gender": "M;M;M", "homepage": "https://www.csa-iitj.group/author/shubham-gupta/;https://brillard.netlify.app/;https://sumankundu.info", "dblp": ";;45/9812", "google_scholar": "z1lEsUgAAAAJ;;https://scholar.google.co.in/citations?user=2h2t9cEAAAAJ", "orcid": ";;0000-0002-7856-4768", "linkedin": ";abhishek-rajora/;drskundu/", "or_profile": "~Shubham_Gupta5;~Abhishek_Rajora1;~Suman_Kundu1", "aff": "Indian Institute of Technology, Jodhpur;Indian Institute of Technology Jodhpur;Indian Institute of Technology Jodhpur", "aff_domain": "iitj.ac.in;iitj.ac.in;iitj.ac.in", "position": "PhD student;Undergrad student;Assistant Professor", "bibtex": "@misc{\ngupta2024ean,\ntitle={{EA}2N: Evidence-based {AMR} Attention Network for Fake News Detection},\nauthor={Shubham Gupta and Abhishek Rajora and Suman Kundu},\nyear={2024},\nurl={https://openreview.net/forum?id=5rrYpa2vts}\n}", "github": "", "project": "", "reviewers": "PY31;b3Lz;hTw2;cpwY", "site": "https://openreview.net/forum?id=5rrYpa2vts", "pdf_size": 4474923, "rating": "3;3;5;5", "confidence": "4;4;2;4", "soundness": "2;3;3;3", "contribution": "1;2;2;2", "presentation": "2;3;3;3", "wc_summary": "187;44;62;127", "wc_strengths": "44;28;36;58", "wc_weaknesses": "350;97;15;200", "wc_questions": "933;2;1;62", "wc_review": "1514;171;114;447", "wc_reply_reviewers": "175;0;16;0", "wc_reply_authors": "2630;1171;170;1125", "reply_reviewers": "1;0;1;0", "reply_authors": "6;3;2;3", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.0, 56.519907997094265 ], "wc_strengths_avg": [ 41.5, 11.07925990308017 ], "wc_weaknesses_avg": [ 165.5, 125.07297869643946 ], "wc_questions_avg": [ 249.5, 395.39126191659824 ], "wc_review_avg": [ 561.5, 564.1615460131965 ], "wc_reply_reviewers_avg": [ 47.75, 73.75762672429204 ], "wc_reply_authors_avg": [ 1274.0, 878.9712737058021 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vKKJu-GsGLYJ:scholar.google.com/&scioq=EA2N:+Evidence-based+AMR+Attention+Network+for+Fake+News+Detection&hl=en&as_sdt=0,47", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Indian Institute of Technology;Indian Institute of Technology Jodhpur", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitj.ac.in;https://www.iitj.ac.in", "aff_unique_abbr": "IIT Jodhpur;IIT Jodhpur", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Jodhpur", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "title": "Whittle Index with Multiple Actions and State Constraint for Inventory Management", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19412", "id": "5sixirvG0I", "author_site": "Chuheng Zhang, Xiangsen Wang, Wei Jiang, Xianliang Yang, Siwei Wang, Lei Song, Jiang Bian", "tldr": "", "abstract": "Whittle index is a heuristic tool that leads to good performance for the restless bandits problem. In this paper, we extend Whittle index to a new multi-agent reinforcement learning (MARL) setting with multiple discrete actions and a possibly changing constraint on the state space, resulting in WIMS (Whittle Index with Multiple actions and State constraint). This setting is common for inventory management where each agent chooses a replenishing quantity level for the corresponding stock-keeping-unit (SKU) such that the total profit is maximized while the total inventory does not exceed a certain limit. Accordingly, we propose a deep MARL algorithm based on WIMS for inventory management. Empirically, our algorithm is evaluated on real large-scale inventory management problems with up to 2307 SKUs and outperforms operation-research-based methods and baseline MARL algorithms.", "keywords": "MARL;Inventory Management;Whittle Index", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/12a415ec19b65954850e84b6c838bc6cb9025594.pdf", "author": "Chuheng Zhang;Xiangsen Wang;Wei Jiang;Xianliang Yang;Siwei Wang;Lei Song;Jiang Bian", "authorids": "~Chuheng_Zhang1;~Xiangsen_Wang1;~Wei_Jiang12;~Xianliang_Yang1;~Siwei_Wang2;~Lei_Song3;~Jiang_Bian1", "gender": "M;M;;M;M;M;M", "homepage": ";https://github.com/sanmuyang;;https://github.com/VictorYXL;https://www.microsoft.com/en-us/research/people/siweiwang/publications/;;https://sites.google.com/view/jiangbian", "dblp": "241/9716;341/5749;21/3839-24;;51/8279-2;76/893-1.html;09/851-2.html", "google_scholar": "q7M83KQAAAAJ;;hDMnEDEAAAAJ;;;pXDSOocAAAAJ;pZBEnY8AAAAJ", "orcid": ";0000-0002-5349-9170;;;;;0000-0002-9472-600X", "linkedin": ";;wei-jiang-b364121ab/;;;;jbian/", "or_profile": "~Chuheng_Zhang1;~Xiangsen_Wang1;~Wei_Jiang12;~Xianliang_Yang1;~Siwei_Wang2;~Lei_Song3;~Jiang_Bian1", "aff": "Microsoft;;University of Illinois Urbana-Champaign;Microsoft;Microsoft;Microsoft;Microsoft", "aff_domain": "microsoft.com;;illinois.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;;MS student;Researcher;Researcher;Principal Researcher;Partner Research Manager", "bibtex": "@inproceedings{\nzhang2024whittle,\ntitle={Whittle Index with Multiple Actions and State Constraint for Inventory Management},\nauthor={Chuheng Zhang and Xiangsen Wang and Wei Jiang and Xianliang Yang and Siwei Wang and Lei Song and Jiang Bian},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5sixirvG0I}\n}", "github": "", "project": "", "reviewers": "xHVA;qxXR;spvh", "pdf_size": 498190, "rating": "3;5;8", "confidence": "4;3;3", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "2;3;4", "wc_summary": "67;156;130", "wc_strengths": "36;87;234", "wc_weaknesses": "301;119;296", "wc_questions": "4;76;251", "wc_review": "408;438;911", "wc_reply_reviewers": "505;211;0", "wc_reply_authors": "1251;1221;1365", "reply_reviewers": "1;1;0", "reply_authors": "4;4;3", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 117.66666666666667, 37.366057086910075 ], "wc_strengths_avg": [ 119.0, 83.94045508573325 ], "wc_weaknesses_avg": [ 238.66666666666666, 84.64172861078761 ], "wc_questions_avg": [ 110.33333333333333, 103.7186364910162 ], "wc_review_avg": [ 585.6666666666666, 230.37119804543468 ], "wc_reply_reviewers_avg": [ 238.66666666666666, 207.09149883297695 ], "wc_reply_authors_avg": [ 1279.0, 62.03224967708329 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8029550685469661, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5813781585575829176&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "openreview": "https://openreview.net/forum?id=5sixirvG0I", "pdf": "https://openreview.net/pdf?id=5sixirvG0I", "email": "microsoft.com;;illinois.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Microsoft;University of Illinois Urbana-Champaign", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://illinois.edu", "aff_unique_abbr": "Microsoft;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Robust Angular Synchronization via Directed Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19411", "id": "5sjxMwWmk8", "author_site": "Yixuan He, Gesine Reinert, David Wipf, Mihai Cucuringu", "tldr": "", "abstract": "The angular synchronization problem aims to accurately estimate (up to a constant additive phase) a set of unknown angles $\\theta_1, \\dots, \\theta_n\\in[0, 2\\pi)$ from $m$ noisy measurements of their offsets $\\theta_i-\\theta_j$ mod $2\\pi.$ Applications include, for example, sensor network localization, phase retrieval, and distributed clock synchronization. \nAn extension of the problem to the heterogeneous setting (dubbed $k$-synchronization) is to estimate $k$ groups of angles simultaneously, given noisy observations (with unknown group assignment) from each group. Existing methods for angular synchronization usually perform poorly in high-noise regimes, which are common in applications. In this paper, we leverage neural networks for the angular synchronization problem, and its heterogeneous extension, by proposing GNNSync, a theoretically-grounded end-to-end trainable framework using directed graph neural networks. In addition, new loss functions are devised to encode synchronization objectives. Experimental results on extensive data sets demonstrate that GNNSync attains competitive, and often superior, performance against a comprehensive set of baselines for the angular synchronization problem and its extension, validating the robustness of GNNSync even at high noise levels.", "keywords": "group synchronization;angular synchronization;neural networks;directed graphs;deep learning;cycle consistency", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Yixuan He;Gesine Reinert;David Wipf;Mihai Cucuringu", "authorids": "~Yixuan_He2;~Gesine_Reinert1;~David_Wipf1;~Mihai_Cucuringu1", "gender": "F;F;M;M", "homepage": "https://sherylhyx.github.io/;http://www.stats.ox.ac.uk/~reinert/;http://www.davidwipf.com/;https://www.math.ucla.edu/~mihai/", "dblp": "226/6494;86/1736;81/6421;58/6857", "google_scholar": "SWme_nYAAAAJ;2gvyN5oAAAAJ;YJx1WSgAAAAJ;GFvVRzwAAAAJ", "orcid": "0000-0002-5990-0658;;;", "linkedin": "yixuan-he-sheryl/;gesine-reinert-77b64913/?originalSubdomain=uk;;mihai-cucuringu-9a866634/", "or_profile": "~Yixuan_He2;~Gesine_Reinert1;~David_Wipf1;~Mihai_Cucuringu1", "aff": "University of Oxford;University of Oxford;Amazon AI Research Lab;The Alan Turing Institute", "aff_domain": "ox.ac.uk;ox.ac.uk;amazon.com;turing.ac.uk", "position": "PhD student;Professor;Principal Research Scientist;Fellow", "bibtex": "@inproceedings{\nhe2024robust,\ntitle={Robust Angular Synchronization via Directed Graph Neural Networks},\nauthor={Yixuan He and Gesine Reinert and David Wipf and Mihai Cucuringu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5sjxMwWmk8}\n}", "github": "", "project": "", "reviewers": "BzAL;LD2r;UofK;tjS3", "pdf_size": 14152854, "rating": "5;6;6;8", "confidence": "2;2;3;2", "soundness": "3;3;3;3", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "146;49;90;102", "wc_strengths": "14;43;41;86", "wc_weaknesses": "13;17;236;46", "wc_questions": "30;62;86;39", "wc_review": "203;171;453;273", "wc_reply_reviewers": "0;0;15;0", "wc_reply_authors": "371;330;1339;196", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;3;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 96.75, 34.56425176392511 ], "wc_strengths_avg": [ 46.0, 25.777897509300484 ], "wc_weaknesses_avg": [ 78.0, 92.10591729091026 ], "wc_questions_avg": [ 54.25, 21.72987574745884 ], "wc_review_avg": [ 275.0, 109.1879114188013 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 559.0, 454.95988834181856 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4650950799787545403&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=5sjxMwWmk8", "pdf": "https://openreview.net/pdf?id=5sjxMwWmk8", "email": "ox.ac.uk;ox.ac.uk;amazon.com;turing.ac.uk", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Oxford;Amazon;Alan Turing Institute", "aff_unique_dep": ";Amazon AI Research Lab;", "aff_unique_url": "https://www.ox.ac.uk;https://www.amazon.com;https://www.turing.ac.uk", "aff_unique_abbr": "Oxford;Amazon AI;ATI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Pose Modulated Avatars from Video", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19410", "id": "5t44vPlv9x", "author_site": "Chunjin Song, Bastian Wandt, Helge Rhodin", "tldr": "", "abstract": "It is now possible to reconstruct dynamic human motion and shape from a sparse set of cameras using Neural Radiance Fields (NeRF) driven by an underlying skeleton. However, a challenge remains to model the deformation of cloth and skin in relation to skeleton pose. Unlike existing avatar models that are learned implicitly or rely on a proxy surface, our approach is motivated by the observation that different poses necessitate unique frequency assignments. Neglecting this distinction yields noisy artifacts in smooth areas or blurs fine-grained texture and shape details in sharp regions. We develop a two-branch neural network that is adaptive and explicit in the frequency domain. The first branch is a graph neural network that models correlations among body parts locally, taking skeleton pose as input. The second branch combines these correlation features to a set of global frequencies and then modulates the feature encoding. Our experiments demonstrate that our network outperforms state-of-the-art methods in terms of preserving details and generalization capabilities. Our code is available at https://github.com/ChunjinSong/PM-Avatars.", "keywords": "NeRF;Neural Rendering;Dynamic Avatars;Frequency Modulation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/e4ee940037bf645fa5a64eb09b228220e3b9f496.zip", "author": "Chunjin Song;Bastian Wandt;Helge Rhodin", "authorids": "~Chunjin_Song1;~Bastian_Wandt2;~Helge_Rhodin5", "gender": "F;M;", "homepage": "https://chunjinsong.github.io/;http://bastianwandt.de;", "dblp": "230/8001;;", "google_scholar": ";z4aXEBYAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chunjin_Song1;~Bastian_Wandt2;~Helge_Rhodin5", "aff": "University of British Columbia;Link\u00f6ping University;", "aff_domain": "cs.ubc.ca;liu.se;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nsong2024pose,\ntitle={Pose Modulated Avatars from Video},\nauthor={Chunjin Song and Bastian Wandt and Helge Rhodin},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5t44vPlv9x}\n}", "github": "", "project": "", "reviewers": "PkgA;SAAP;McVe", "pdf_size": 8773642, "rating": "6;6;6", "confidence": "4;5;4", "soundness": "3;2;3", "contribution": "2;2;2", "presentation": "3;3;4", "wc_summary": "58;56;143", "wc_strengths": "35;41;115", "wc_weaknesses": "287;84;205", "wc_questions": "7;9;95", "wc_review": "387;190;558", "wc_reply_reviewers": "43;134;0", "wc_reply_authors": "818;1719;746", "reply_reviewers": "1;1;0", "reply_authors": "3;4;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 85.66666666666667, 40.54901012629312 ], "wc_strengths_avg": [ 63.666666666666664, 36.38070306571267 ], "wc_weaknesses_avg": [ 192.0, 83.3826520726384 ], "wc_questions_avg": [ 37.0, 41.02032016777376 ], "wc_review_avg": [ 378.3333333333333, 150.36030800122157 ], "wc_reply_reviewers_avg": [ 59.0, 55.86292748027682 ], "wc_reply_authors_avg": [ 1094.3333333333333, 442.68298162705804 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1398260736606413018&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=5t44vPlv9x", "pdf": "https://openreview.net/pdf?id=5t44vPlv9x", "email": "cs.ubc.ca;liu.se;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of British Columbia;Link\u00f6ping University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ubc.ca;https://www.liu.se", "aff_unique_abbr": "UBC;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;Sweden" }, { "title": "Learning to Relax: Setting Solver Parameters Across a Sequence of Linear System Instances", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19409", "id": "5t57omGVMw", "author_site": "Mikhail Khodak, Edmond Chow, Nina Balcan, Ameet Talwalkar", "tldr": "", "abstract": "Solving a linear system ${\\bf Ax}={\\bf b}$ is a fundamental scientific computing primitive for which numerous solvers and preconditioners have been developed. \n\tThese come with parameters whose optimal values depend on the system being solved and are often impossible or too expensive to identify;\n\tthus in practice sub-optimal heuristics are used.\n\tWe consider the common setting in which many related linear systems need to be solved, e.g. during a single numerical simulation.\n\tIn this scenario, can we sequentially choose parameters that attain a near-optimal overall number of iterations, without extra matrix computations?\n\tWe answer in the affirmative for Successive Over-Relaxation (SOR), a standard solver whose parameter $\\omega$ has a strong impact on its runtime.\n\tFor this method, we prove that a bandit online learning algorithm\u2014using only the number of iterations as feedback\u2014can select parameters for a sequence of instances such that the overall cost approaches that of the best fixed $\\omega$ as the sequence length increases.\n\tFurthermore, when given additional structural information, we show that a _contextual_ bandit method asymptotically achieves the performance of the _instance-optimal_ policy, which selects the best $\\omega$ for each instance.\n\tOur work provides the first learning-theoretic treatment of high-precision linear system solvers and the first end-to-end guarantees for data-driven scientific computing, demonstrating theoretically the potential to speed up numerical methods using well-understood learning algorithms.", "keywords": "scientific computing;data-driven algorithm design;online learning;multi-armed bandits;contextual bandits;numerical analysis;learning-augmented algorithms;algorithms with predictions", "primary_area": "learning theory", "supplementary_material": "/attachment/33e10f5af5711e4f2e735654e9558c08c4e7e509.zip", "author": "Mikhail Khodak;Edmond Chow;Maria Florina Balcan;Ameet Talwalkar", "authorids": "~Mikhail_Khodak1;~Edmond_Chow1;~Maria_Florina_Balcan1;~Ameet_Talwalkar1", "gender": ";M;;M", "homepage": ";https://faculty.cc.gatech.edu/~echow/;;http://www.cs.cmu.edu/~atalwalk/", "dblp": ";96/2502;;56/5528", "google_scholar": ";jGqGKGMAAAAJ;;https://scholar.google.com.tw/citations?user=TW7U1W0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Mikhail_Khodak1;~Edmond_Chow1;~Maria_Florina_Balcan1;~Ameet_Talwalkar1", "aff": ";Georgia Institute of Technology;;Carnegie Mellon University", "aff_domain": ";gatech.edu;;cmu.edu", "position": ";Full Professor;;Associate Professor", "bibtex": "@inproceedings{\nkhodak2024learning,\ntitle={Learning to Relax: Setting Solver Parameters Across a Sequence of Linear System Instances},\nauthor={Mikhail Khodak and Edmond Chow and Maria Florina Balcan and Ameet Talwalkar},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5t57omGVMw}\n}", "github": "", "project": "", "reviewers": "655G;vVy3;7bs2;9VfQ", "pdf_size": 3100334, "rating": "8;8;8;8", "confidence": "2;3;3;3", "soundness": "3;3;3;3", "contribution": "3;4;3;3", "presentation": "3;3;4;3", "wc_summary": "27;155;415;73", "wc_strengths": "31;304;93;49", "wc_weaknesses": "34;293;81;52", "wc_questions": "112;343;234;14", "wc_review": "204;1095;823;188", "wc_reply_reviewers": "14;116;0;0", "wc_reply_authors": "323;798;764;179", "reply_reviewers": "1;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 167.5, 150.0691507272564 ], "wc_strengths_avg": [ 119.25, 109.02379327467926 ], "wc_weaknesses_avg": [ 115.0, 104.12732590439457 ], "wc_questions_avg": [ 175.75, 124.0894334744099 ], "wc_review_avg": [ 577.5, 393.474586218729 ], "wc_reply_reviewers_avg": [ 32.5, 48.54636958620078 ], "wc_reply_authors_avg": [ 516.0, 270.1138648792394 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17888411012785696749&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=5t57omGVMw", "pdf": "https://openreview.net/pdf?id=5t57omGVMw", "email": ";gatech.edu;;cmu.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Georgia Institute of Technology;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.cmu.edu", "aff_unique_abbr": "Georgia Tech;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Prompt Risk Control: A Rigorous Framework for Responsible Deployment of Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19408", "id": "5tGGWOijvq", "author_site": "Thomas Zollo, Todd Morrill, Zhun Deng, Jake Snell, Toniann Pitassi, Richard Zemel", "tldr": "", "abstract": "With the explosion of the zero-shot capabilities of (and thus interest in) pre-trained large language models, there has come accompanying interest in how best to prompt a language model to perform a given task. While it may be tempting to choose a prompt based on empirical results on a validation set, this can lead to a deployment where an unexpectedly high loss occurs. To mitigate this prospect, we propose a lightweight framework, Prompt Risk Control, for selecting a prompt based on rigorous upper bounds on families of informative risk measures. We provide and compare different methods for producing bounds on a diverse set of risk metrics like mean, CVaR, and the Gini coefficient of the loss distribution. In addition, we extend the underlying statistical bounding techniques to accommodate the possibility of distribution shifts in deployment. Extensive experiments on high-impact applications like chatbots, medical question answering, and news summarization highlight why such a framework is necessary to reduce exposure to the worst outcomes.", "keywords": "distribution-free uncertainty quantification;large language models;responsible AI", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/3c5a3c65a364905473f9e6916dd413b2fa2576d4.zip", "author": "Thomas P Zollo;Todd Morrill;Zhun Deng;Jake Snell;Toniann Pitassi;Richard Zemel", "authorids": "~Thomas_P_Zollo1;~Todd_Morrill1;~Zhun_Deng1;~Jake_Snell1;~Toniann_Pitassi3;~Richard_Zemel1", "gender": "M;Not Specified;M;M;F;M", "homepage": "https://www.thomaszollo.com/;http://toddmorrill.github.io/;https://www.zhundeng.org/;https://www.jakesnell.com;http://www.cs.columbia.edu/~toni;http://www.cs.columbia.edu/~zemel", "dblp": "336/8946;;204/4353;172/1406;p/TPitassi;16/6366", "google_scholar": "Xp7LgAwAAAAJ;;nkmi-moAAAAJ;MbXKAK8AAAAJ;;https://scholar.google.ca/citations?user=iBeDoRAAAAAJ", "orcid": ";;;;;", "linkedin": "thomas-zollo/;todd-morrill-a0b47b2a/;;;;", "or_profile": "~Thomas_P_Zollo1;~Todd_Morrill1;~Zhun_Deng1;~Jake_Snell1;~Toniann_Pitassi3;~Richard_Zemel1", "aff": "Columbia University;Columbia University;Columbia University;Princeton University;Columbia University;Department of Computer Science, University of Toronto", "aff_domain": "columbia.edu;columbia.edu;columbia.edu;princeton.edu;columbia.edu;cs.toronto.edu", "position": "PhD student;MS student;Postdoc;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzollo2024prompt,\ntitle={Prompt Risk Control: A Rigorous Framework for Responsible Deployment of Large Language Models},\nauthor={Thomas P Zollo and Todd Morrill and Zhun Deng and Jake Snell and Toniann Pitassi and Richard Zemel},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5tGGWOijvq}\n}", "github": "", "project": "", "reviewers": "FU87;sC6o;YTab;CoFo", "pdf_size": 2319945, "rating": "6;6;6;8", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "contribution": "3;2;3;3", "presentation": "3;2;4;3", "wc_summary": "92;82;135;216", "wc_strengths": "117;56;23;82", "wc_weaknesses": "64;178;59;113", "wc_questions": "86;193;178;218", "wc_review": "359;509;395;629", "wc_reply_reviewers": "0;0;10;6", "wc_reply_authors": "328;761;480;472", "reply_reviewers": "0;0;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 131.25, 52.826958080131774 ], "wc_strengths_avg": [ 69.5, 34.48550420104076 ], "wc_weaknesses_avg": [ 103.5, 47.90876746483883 ], "wc_questions_avg": [ 168.75, 49.866697303912154 ], "wc_review_avg": [ 473.0, 105.72606112023658 ], "wc_reply_reviewers_avg": [ 4.0, 4.242640687119285 ], "wc_reply_authors_avg": [ 510.25, 156.89865359524282 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15589174670380778205&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=5tGGWOijvq", "pdf": "https://openreview.net/pdf?id=5tGGWOijvq", "email": "columbia.edu;columbia.edu;columbia.edu;princeton.edu;columbia.edu;cs.toronto.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Columbia University;Princeton University;University of Toronto", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.columbia.edu;https://www.princeton.edu;https://www.utoronto.ca", "aff_unique_abbr": "Columbia;Princeton;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Canada" }, { "id": "5tSLtvkHCh", "title": "Learning Temporal Causal Representation under Non-Invertible Generation Process", "track": "main", "status": "Reject", "tldr": "", "abstract": "Identifying the underlying time-delayed latent causal processes in sequential data is vital for grasping temporal dynamics and making downstream reasoning. While some recent methods can robustly identify these latent causal variables, they rely on strict assumptions about the invertible generation process from latent variables to observed data. These assumptions are often hard to satisfy in real-world applications containing information loss. For instance, the visual perception process translates a 3D space into 2D images, or the phenomenon of persistence of vision incorporates historical data into current perceptions. To address this challenge, we establish an identifiability theory that allows for the recovery of independent latent components even when they come from a nonlinear and non-invertible mix. Using this theory as a foundation, we propose a principled approach, CaRiNG, to learn the Causal Representation of Non-invertible Generative temporal data with identifiability guarantees. Specifically, we utilize the temporal context to recover lost latent information and employ the conditions in our theory to guide the training process. Through experiments conducted on synthetic datasets, we validate that the causal process is reliably identified by CaRiNG, even when the generation process is non-invertible. Moreover, we show that our approach considerably improves temporal understanding and reasoning in practical applications.", "keywords": "Causal Representation Learning;Uninvertible Mixing Function;Temporal Series;Indentifiability", "primary_area": "causal reasoning", "supplementary_material": "/attachment/4ff61499968ef63f351f65f927be1c1538469adb.zip", "author": "Guangyi Chen;Yifan Shen;Zhenhao Chen;Xiangchen Song;Yuewen Sun;Weiran Yao;Xiao Liu;Kun Zhang", "authorids": "~Guangyi_Chen1;~Yifan_Shen4;~Zhenhao_Chen1;~Xiangchen_Song1;~Yuewen_Sun1;~Weiran_Yao1;~Xiao_Liu23;~Kun_Zhang1", "gender": "M;M;M;M;F;M;M;M", "homepage": "https://chengy12.github.io/;https://sanshuiii.github.io/about/;https://zhenhaochenofficial.github.io/;https://xiangchensong.github.io/;https://yuewen-sun.github.io/;;;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "c/GuangyiChen-2;59/7950-4;192/7717;261/9024;219/9893;192/3295;;96/3115-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;PMKkElwAAAAJ;xOAtM0YAAAAJ;foR8BIoAAAAJ;https://scholar.google.com/citations?hl=en;rr_leUAAAAAJ;;RGoypN4AAAAJ", "orcid": ";0000-0003-2358-1146;;;;;;", "linkedin": ";;;;;;xiao-l-699069205/;", "or_profile": "~Guangyi_Chen1;~Yifan_Shen4;~Zhenhao_Chen1;~Xiangchen_Song1;~Yuewen_Sun1;~Weiran_Yao1;~Xiao_Liu23;~Kun_Zhang1", "aff": "Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;SalesForce.com;Technische Universit\u00e4t Darmstadt;Carnegie Mellon University", "aff_domain": "cmu.edu;mbzuai.ac.ae;mbzuai.ac.ae;cmu.edu;mbzuai.ac.ae;salesforce.com;tu-darmstadt.de;cmu.edu", "position": "Postdoc;MS student;PhD student;PhD student;Postdoc;Researcher;Researcher;Associate Professor", "bibtex": "@misc{\nchen2024learning,\ntitle={Learning Temporal Causal Representation under Non-Invertible Generation Process},\nauthor={Guangyi Chen and Yifan Shen and Zhenhao Chen and Xiangchen Song and Yuewen Sun and Weiran Yao and Xiao Liu and Kun Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=5tSLtvkHCh}\n}", "github": "", "project": "", "reviewers": "ur5z;5cYn;WQAA;uMXb", "site": "https://openreview.net/forum?id=5tSLtvkHCh", "pdf_size": 3436455, "rating": "3;5;6;8", "confidence": "4;3;3;2", "soundness": "2;3;3;3", "contribution": "3;2;4;3", "presentation": "1;3;3;3", "wc_summary": "591;99;101;114", "wc_strengths": "76;32;58;32", "wc_weaknesses": "1269;346;383;85", "wc_questions": "44;125;35;304", "wc_review": "1980;602;577;535", "wc_reply_reviewers": "349;151;166;14", "wc_reply_authors": "3789;2347;2290;1130", "reply_reviewers": "2;1;3;1", "reply_authors": "8;7;8;3", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 226.25, 210.66724353823972 ], "wc_strengths_avg": [ 49.5, 18.621224449536072 ], "wc_weaknesses_avg": [ 520.75, 447.009158183588 ], "wc_questions_avg": [ 127.0, 108.03471664238306 ], "wc_review_avg": [ 923.5, 610.4402100124139 ], "wc_reply_reviewers_avg": [ 170.0, 119.11549017655092 ], "wc_reply_authors_avg": [ 2389.0, 942.9536043729829 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 6.5, 2.0615528128088303 ], "replies_avg": [ 39, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9805806756909202, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:abend1rZPOMJ:scholar.google.com/&scioq=Learning+Temporal+Causal+Representation+under+Non-Invertible+Generation+Process&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;1;2;3;0", "aff_unique_norm": "Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;Salesforce;Technische Universit\u00e4t Darmstadt", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cmu.edu;https://mbzuai.ac.ae;https://www.salesforce.com;https://www.tu-darmstadt.de", "aff_unique_abbr": "CMU;MBZUAI;Salesforce;TUD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;0;2;0", "aff_country_unique": "United States;United Arab Emirates;Germany" }, { "id": "5tYTCyYI27", "title": "Calibration Bottleneck: What Makes Neural Networks less Calibratable?", "track": "main", "status": "Reject", "tldr": "", "abstract": "While modern deep neural networks have achieved remarkable success, they have exhibited a notable deficiency in reliably estimating uncertainty. Many existing studies address the uncertainty calibration problem by incorporating regularization techniques to penalize the overconfident outputs during training. In this study, we shift the focus from the miscalibration encountered in the training phase to an investigation of the concept of calibratability, assessing how amenable a model is to be recalibrated in post-training phase. We find that the use of regularization techniques might compromise calibratability, subsequently leading to a decline in final calibration performance after recalibration. To identify the underlying causes leading to poor calibratability, we delve into the calibration of intermediate features across neural networks\u2019 hidden layers. Our study demonstrates that the overtraining of the top layers in neural networks poses a significant obstacle to calibration, while these layers typically offer minimal improvement to the discriminability of features. Based on this observation, we introduce a weak classifier hypothesis: Given a weak classification head, the bottom layers of a neural network can be learned better for producing calibratable features. Consequently, we propose a progressively layer-peeled training (PLT) method to exploit this hypothesis, thereby enhancing model calibratability. Comprehensive experiments show the effectiveness of our method, which improves model calibration and also yields competitive predictive performance.", "keywords": "Uncertainty Calibration;Post-hoc Calibration", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Deng-Bao Wang;Min-Ling Zhang", "authorids": "~Deng-Bao_Wang1;~Min-Ling_Zhang2", "gender": "M;M", "homepage": "https://dengbaowang.github.io/;http://palm.seu.edu.cn/zhangml/", "dblp": "204/2255;84/271.html", "google_scholar": "QCA7j2cAAAAJ;uFHCIM0AAAAJ", "orcid": ";0000-0003-1880-5918", "linkedin": ";", "or_profile": "~Deng-Bao_Wang1;~Min-Ling_Zhang2", "aff": "Southeast University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@misc{\nwang2024calibration,\ntitle={Calibration Bottleneck: What Makes Neural Networks less Calibratable?},\nauthor={Deng-Bao Wang and Min-Ling Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=5tYTCyYI27}\n}", "github": "", "project": "", "reviewers": "VAxV;x8eC;7Hnm", "site": "https://openreview.net/forum?id=5tYTCyYI27", "pdf_size": 1609285, "rating": "3;5;8", "confidence": "5;4;4", "soundness": "2;1;3", "contribution": "2;3;3", "presentation": "2;2;4", "wc_summary": "55;70;238", "wc_strengths": "35;35;144", "wc_weaknesses": "208;208;72", "wc_questions": "6;29;569", "wc_review": "304;342;1023", "wc_reply_reviewers": "140;170;0", "wc_reply_authors": "636;850;990", "reply_reviewers": "1;1;0", "reply_authors": "3;3;3", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 121.0, 82.95782060782456 ], "wc_strengths_avg": [ 71.33333333333333, 51.383092766222454 ], "wc_weaknesses_avg": [ 162.66666666666666, 64.1110148275803 ], "wc_questions_avg": [ 201.33333333333334, 260.1491025461275 ], "wc_review_avg": [ 556.3333333333334, 330.34762834861635 ], "wc_reply_reviewers_avg": [ 103.33333333333333, 74.08703590297624 ], "wc_reply_authors_avg": [ 825.3333333333334, 145.56861688640936 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8029550685469661, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Hcc0M9RomiEJ:scholar.google.com/&scioq=Calibration+Bottleneck:+What+Makes+Neural+Networks+less+Calibratable%3F&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "5twh6pM4SR", "title": "Automating Continual Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "General-purpose learning systems should improve themselves in open-ended fashion in ever-changing environments. Conventional learning algorithms for neural networks, however, suffer from the so-called catastrophic forgetting (CF) problem---previously acquired skills are forgotten when a new task is learned. Developing continual learning algorithms to address CF remains an open research question.\nInstead of hand-crafting such algorithms, our new Automated Continual Learning (ACL) trains self-referential neural networks to meta-learn their own in-context continual (meta-)learning algorithms. ACL encodes all desiderata---good performance on both old and new tasks---into its learning objectives. We demonstrate the effectiveness and promise of ACL on multiple few-shot and standard image classification tasks adopted for continual learning: Mini-ImageNet, Omniglot, FC100, MNIST-families, and CIFAR-10.", "keywords": "continual learning;in-context learning;meta-learning;self-referential learning;linear Transformers", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/925a674e61a8908054ac10f952d54a9d6cb7b1ee.zip", "author": "Kazuki Irie;R\u00f3bert Csord\u00e1s;J\u00fcrgen Schmidhuber", "authorids": "~Kazuki_Irie1;~R\u00f3bert_Csord\u00e1s1;~J\u00fcrgen_Schmidhuber1", "gender": ";M;M", "homepage": "https://sites.harvard.edu/kazuki-irie/;https://robertcsordas.github.io/;http://people.idsia.ch/~juergen/", "dblp": "148/9667;166/4773.html;s/JurgenSchmidhuber", "google_scholar": "https://scholar.google.de/citations?user=-gZ-BdwAAAAJ;av1lplwAAAAJ;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ", "orcid": "0000-0003-0923-691X;;", "linkedin": ";robertcsordas/;", "or_profile": "~Kazuki_Irie1;~R\u00f3bert_Csord\u00e1s1;~J\u00fcrgen_Schmidhuber1", "aff": "Harvard University;IDSIA;IDSIA", "aff_domain": "fas.harvard.edu;idsia.ch;idsia.ch", "position": "Postpostdoc;Postdoc;Scientific Director", "bibtex": "@misc{\nirie2024automating,\ntitle={Automating Continual Learning},\nauthor={Kazuki Irie and R{\\'o}bert Csord{\\'a}s and J{\\\"u}rgen Schmidhuber},\nyear={2024},\nurl={https://openreview.net/forum?id=5twh6pM4SR}\n}", "github": "", "project": "", "reviewers": "StyD;jN2L;WhrD", "site": "https://openreview.net/forum?id=5twh6pM4SR", "pdf_size": 492759, "rating": "5;5;6", "confidence": "4;4;3", "soundness": "3;2;2", "contribution": "2;2;2", "presentation": "2;3;4", "wc_summary": "44;102;60", "wc_strengths": "56;35;80", "wc_weaknesses": "327;48;115", "wc_questions": "79;455;55", "wc_review": "506;640;310", "wc_reply_reviewers": "834;405;15", "wc_reply_authors": "2585;2136;551", "reply_reviewers": "2;2;1", "reply_authors": "5;6;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 68.66666666666667, 24.458581770458856 ], "wc_strengths_avg": [ 57.0, 18.384776310850235 ], "wc_weaknesses_avg": [ 163.33333333333334, 118.91827258901617 ], "wc_questions_avg": [ 196.33333333333334, 183.1671974514602 ], "wc_review_avg": [ 485.3333333333333, 135.51219707301465 ], "wc_reply_reviewers_avg": [ 418.0, 334.4816885869838 ], "wc_reply_authors_avg": [ 1757.3333333333333, 872.4793534647238 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 4.333333333333333, 1.699673171197595 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16950957273444981926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Harvard University;Institute of Digital Technologies", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.idsia.ch", "aff_unique_abbr": "Harvard;IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Switzerland" }, { "id": "5vJe8XKFv0", "title": "CoNO: Complex Neural Operator for Continuous Dynamical Systems", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Neural operators extend data-driven models to map between infinite-dimensional functional spaces. These models have successfully solved continuous dynamical systems represented by differential equations, viz weather forecasting, fluid flow, or solid mechanics. However, the existing operators still rely on real space, thereby losing rich representations potentially captured in the complex space by functional transforms. In this paper, we introduce a Complex Neural Operator (CoNO), that parameterizes the integral kernel in the complex fractional Fourier domain. Additionally, the model employing a complex-valued neural network along with aliasing-free activation functions preserves the complex values and complex algebraic properties, thereby enabling improved representation, robustness to noise, and generalization. We show that the model effectively captures the underlying partial differential equation with a single complex fractional Fourier transform. We perform an extensive empirical evaluation of CoNO on several datasets and additional tasks such as zero-shot super-resolution, evaluation of out-of-distribution data, data efficiency, and robustness to noise. CoNO exhibits comparable or superior performance to all the state-of-the-art models in these tasks. Altogether, CoNO presents a robust and superior model for modeling continuous dynamical systems, providing a fillip to scientific machine learning. Our code implementation is available at https://anonymous.4open.science/r/anonymous-cono.", "keywords": "Complex valued neural network;neural operator;partial differential equations;dynamical systems", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Karn Tiwari;N M Anoop Krishnan;Prathosh AP", "authorids": "~Karn_Tiwari1;~N_M_Anoop_Krishnan1;~Prathosh_AP1", "gender": "M;M;M", "homepage": ";;https://sites.google.com/view/prathosh", "dblp": ";;218/5887", "google_scholar": ";https://scholar.google.co.in/citations?user=fGnjHcEAAAAJ;https://scholar.google.co.in/citations?user=OEwV4bsAAAAJ", "orcid": ";0000-0003-1500-4947;", "linkedin": "karn3003/;;prathosh-ap-phd-50ab9511/", "or_profile": "~Karn_Tiwari1;~N_M_Anoop_Krishnan1;~Prathosh_AP1", "aff": "Indian Institute of Science, Indian institute of science, Bangalore;Indian Institute of Technology Delhi;Indian Institute of Science, Indian institute of science, Bangalore", "aff_domain": "iisc.ac.in;iitd.ac.in;iisc.ac.in", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\ntiwari2024cono,\ntitle={Co{NO}: Complex Neural Operator for Continuous Dynamical Systems},\nauthor={Karn Tiwari and N M Anoop Krishnan and Prathosh AP},\nyear={2024},\nurl={https://openreview.net/forum?id=5vJe8XKFv0}\n}", "github": "", "project": "", "reviewers": "Ubji;CUfC;8MNi;HK2j", "site": "https://openreview.net/forum?id=5vJe8XKFv0", "pdf_size": 2293479, "rating": "3;3;5;5", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "contribution": "2;2;3;2", "presentation": "1;1;3;2", "wc_summary": "47;54;290;76", "wc_strengths": "17;42;152;80", "wc_weaknesses": "361;130;276;180", "wc_questions": "39;89;2;272", "wc_review": "464;315;720;608", "wc_reply_reviewers": "0;0;0;21", "wc_reply_authors": "135;284;261;396", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 116.75, 100.59665749914357 ], "wc_strengths_avg": [ 72.75, 50.957703048705014 ], "wc_weaknesses_avg": [ 236.75, 88.87456047711291 ], "wc_questions_avg": [ 100.5, 103.71716347837517 ], "wc_review_avg": [ 526.75, 152.25205253132057 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 269.0, 92.70113267916417 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11430387391731248986&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Indian Institute of Science;Indian Institute of Technology Delhi", "aff_unique_dep": ";", "aff_unique_url": "https://www.iisc.ac.in;https://www.iitd.ac.in", "aff_unique_abbr": "IISc;IIT Delhi", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Bangalore;Delhi", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "5vXDQ65dzH", "title": "ParFam - Symbolic Regression Based on Continuous Global Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "The problem of symbolic regression (SR) arises in many different applications, such as identifying physical laws or deriving mathematical equations describing the behavior of financial markets from given data. Various methods exist to address the problem of SR, often based on genetic programming. However, these methods are usually quite complicated and require a lot of hyperparameter tuning and computational resources. \nIn this paper, we present our new method ParFam that utilizes parametric families of suitable symbolic functions to translate the discrete symbolic regression problem into a continuous one, resulting in a more straightforward setup compared to current state-of-the-art methods. \nIn combination with a powerful global optimizer, this approach results in an effective method to tackle the problem of SR. \nFurthermore, it can be easily extended to more advanced algorithms, e.g., by adding a deep neural network to find good-fitting parametric families. \nWe prove the performance of ParFam with extensive numerical experiments based on the common SR benchmark suit SRBench, showing that we achieve state-of-the-art results. Our code can be found at https://anonymous.4open.science/r/parfam-90FC/README.md.", "keywords": "symbolic regression;global optimization;deep learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Philipp Scholl;Katharina Bieker;Hillary Hauger;Gitta Kutyniok", "authorids": "~Philipp_Scholl2;~Katharina_Bieker1;~Hillary_Hauger3;~Gitta_Kutyniok2", "gender": "M;;F;F", "homepage": ";;;https://www.ai.math.lmu.de/kutyniok", "dblp": "31/6987;;;13/2736", "google_scholar": "https://scholar.google.com/citations?hl=en;;;https://scholar.google.de/citations?user=JHs9LssAAAAJ", "orcid": ";;;0000-0001-9738-2487", "linkedin": ";;hillary-hauger-391a7721b;gitta-kutyniok-2606b215/?originalSubdomain=de", "or_profile": "~Philipp_Scholl2;~Katharina_Bieker1;~Hillary_Hauger3;~Gitta_Kutyniok2", "aff": "University of Munich, Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;;Technische Universit\u00e4t M\u00fcnchen;LMU Munich", "aff_domain": "campus.lmu.de;;tum.de;uni-muenchen.de", "position": "PhD student;;MS student;Full Professor", "bibtex": "@misc{\nscholl2024parfam,\ntitle={ParFam - Symbolic Regression Based on Continuous Global Optimization},\nauthor={Philipp Scholl and Katharina Bieker and Hillary Hauger and Gitta Kutyniok},\nyear={2024},\nurl={https://openreview.net/forum?id=5vXDQ65dzH}\n}", "github": "", "project": "", "reviewers": "1Ff3;LMH9;5PDr;8mnU", "site": "https://openreview.net/forum?id=5vXDQ65dzH", "pdf_size": 510269, "rating": "5;5;5;6", "confidence": "4;3;4;2", "soundness": "1;2;2;2", "contribution": "2;2;2;2", "presentation": "3;2;3;2", "wc_summary": "30;135;105;67", "wc_strengths": "22;33;46;28", "wc_weaknesses": "166;469;101;187", "wc_questions": "90;27;103;31", "wc_review": "308;664;355;313", "wc_reply_reviewers": "56;60;0;0", "wc_reply_authors": "1091;544;577;1443", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.25, 39.51819201329939 ], "wc_strengths_avg": [ 32.25, 8.842369591913696 ], "wc_weaknesses_avg": [ 230.75, 141.16014841307017 ], "wc_questions_avg": [ 62.75, 34.09087121210017 ], "wc_review_avg": [ 410.0, 147.77855054100374 ], "wc_reply_reviewers_avg": [ 29.0, 29.03446228191595 ], "wc_reply_authors_avg": [ 913.75, 374.7128066933395 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7557527737156987216&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lmu.de;https://www.tum.de;https://www.lmu.de", "aff_unique_abbr": "LMU;TUM;LMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Munich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "5vY0HNpy9F", "title": "PyTrial: Machine Learning Software and Benchmark for Clinical Trial Applications", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Clinical trials are conducted to test the effectiveness and safety of potential drugs in humans for regulatory approval. Machine learning (ML) has recently emerged as a new tool to assist in clinical trials. Despite this progress, there have been few efforts to document and benchmark ML4Trial algorithms available to the ML research community. Additionally, the accessibility to clinical trial-related datasets is limited, and there is a lack of well-defined clinical tasks to facilitate the development of new algorithms.\n\nTo fill this gap, we have developed PyTrial that provides benchmarks and open-source implementations of a series of ML algorithms for clinical trial design and operations. In this paper, we thoroughly investigate 34 ML algorithms for clinical trials across 6 different tasks, including patient outcome prediction, trial site selection, trial outcome prediction, patient-trial matching, trial similarity search, and synthetic data generation. We have also collected and prepared 23 ML-ready datasets as well as their working examples in Jupyter Notebooks for quick implementation and testing.\n\nPyTrial defines each task through a simple four-step process: data loading, model specification, model training, and model evaluation, all achievable with just a few lines of code. Furthermore, our modular API architecture empowers practitioners to expand the framework to incorporate new algorithms and tasks effortlessly.", "keywords": "Drug development;Clinical trial;Healthcare;Machine Learning;Deep Learning", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Zifeng Wang;Brandon Philip Theodorou;Tianfan Fu;Cao Xiao;Jimeng Sun", "authorids": "~Zifeng_Wang3;~Brandon_Philip_Theodorou1;~Tianfan_Fu1;~Cao_Xiao2;~Jimeng_Sun3", "gender": "M;M;M;F;", "homepage": "https://zifengwang.xyz;;https://futianfan.github.io/;https://sites.google.com/view/danicaxiao/home;http://sunlab.org", "dblp": ";;;170/1833;", "google_scholar": "kMlWwTAAAAAJ;xWjAUGsAAAAJ;KPQ49w4AAAAJ;ahaV25EAAAAJ;9jmmp5sAAAAJ", "orcid": ";;;;0000-0003-1512-6426", "linkedin": ";brandon-theodorou-284b6b148;;caoxiao/;jimengsun/", "or_profile": "~Zifeng_Wang3;~Brandon_Philip_Theodorou1;~Tianfan_Fu1;~Cao_Xiao2;~Jimeng_Sun3", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Rensselaer Polytechnic Institute;GE Healthcare;Georgia Institute of Technology", "aff_domain": "illinois.edu;illinois.edu;rpi.edu;ge.com;gatech.edu", "position": "PhD student;PhD student;Assistant Professor;VP of AI;Associate Professor", "bibtex": "@misc{\nwang2024pytrial,\ntitle={PyTrial: Machine Learning Software and Benchmark for Clinical Trial Applications},\nauthor={Zifeng Wang and Brandon Philip Theodorou and Tianfan Fu and Cao Xiao and Jimeng Sun},\nyear={2024},\nurl={https://openreview.net/forum?id=5vY0HNpy9F}\n}", "github": "", "project": "", "reviewers": "dwRR;cWL5;BJYq;nm57", "site": "https://openreview.net/forum?id=5vY0HNpy9F", "pdf_size": 1515128, "rating": "5;5;5;5", "confidence": "3;5;4;3", "soundness": "2;2;3;3", "contribution": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "51;45;105;148", "wc_strengths": "110;40;64;160", "wc_weaknesses": "292;261;60;148", "wc_questions": "13;3;158;152", "wc_review": "466;349;387;608", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 42.14483954175173 ], "wc_strengths_avg": [ 93.5, 45.89934640057525 ], "wc_weaknesses_avg": [ 190.25, 92.34277177992873 ], "wc_questions_avg": [ 81.5, 73.61555542139175 ], "wc_review_avg": [ 452.5, 99.20307454912877 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9688703640964513495&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "University of Illinois Urbana-Champaign;Rensselaer Polytechnic Institute;GE Healthcare;Georgia Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://illinois.edu;https://www.rpi.edu;https://www.gehealthcare.com;https://www.gatech.edu", "aff_unique_abbr": "UIUC;RPI;GEHC;Georgia Tech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5vcqlmDokC", "title": "Enhanced Gradient Aligned Continual Learning via Pareto Optimization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Catastrophic forgetting remains a core challenge in continual learning (CL), whereby the models struggle to retain previous knowledge when learning new tasks. While existing gradient-alignment-based CL methods have been proposed to tackle this challenge by aligning gradients between previous and current tasks, they do not carefully consider the interdependence between previously learned tasks and fully explore the potential of seen tasks. Against this issue, we first adopt the MiniMax theorem and reformulate the existing commonly-adopted gradient alignment optimization problem in a gradient weighting framework. Then we incorporate the Pareto optimality to capture the interrelationship among previously learned tasks, and design a Pareto regularized gradient alignment algorithm (PRGA), which effectively enhances the overall performance of past tasks while ensuring the performance of the current task. Comprehensive empirical results demonstrate that the proposed PRGA outperforms current state-of-the-art continual learning methods across multiple datasets and different settings.", "keywords": "continual learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Yichen Wu;Hong Wang;Long-Kai Huang;Yefeng Zheng;Peilin Zhao;Ying Wei", "authorids": "~Yichen_Wu2;~Hong_Wang5;~Long-Kai_Huang1;~Yefeng_Zheng2;~Peilin_Zhao2;~Ying_Wei1", "gender": "M;F;;M;;F", "homepage": "https://wuyichen-97.github.io/;https://hongwang01.github.io/;https://sites.google.com/site/longkaihugo/home;https://en.westlake.edu.cn/faculty/yefeng-zheng.html;;https://wei-ying.net/", "dblp": ";83/5522-21;133/2006;44/6510;84/8411;14/4899-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;I5RH0CwAAAAJ;CaP64WUAAAAJ;vAIECxgAAAAJ;https://scholar.google.com.hk/citations?user=HPeX_YcAAAAJ;5UpFdKsAAAAJ", "orcid": "0000-0003-2859-3285;;0000-0001-5263-1443;0000-0003-2195-2847;0000-0001-8543-3953;", "linkedin": ";;;yefeng-zheng-bb45641/?originalSubdomain=cn;;", "or_profile": "~Yichen_Wu2;~Hong_Wang5;~Long-Kai_Huang1;~Yefeng_Zheng2;~Peilin_Zhao2;~Ying_Wei1", "aff": "City University of Hong Kong;Tencent ;Tencent;Tencent Jarvis Lab;Tencent;Nanyang Technological University", "aff_domain": "cityu.edu.hk;tencent.com;tencent.com;tencent.com;tencent.com;ntu.edu.sg", "position": "PhD student;Senior Researcher;Researcher;Director;Researcher;Assistant Professor", "bibtex": "@misc{\nwu2024enhanced,\ntitle={Enhanced Gradient Aligned Continual Learning via Pareto Optimization},\nauthor={Yichen Wu and Hong Wang and Long-Kai Huang and Yefeng Zheng and Peilin Zhao and Ying Wei},\nyear={2024},\nurl={https://openreview.net/forum?id=5vcqlmDokC}\n}", "github": "", "project": "", "reviewers": "hYp1;FMNS;UYXS;F5TA", "site": "https://openreview.net/forum?id=5vcqlmDokC", "pdf_size": 766962, "rating": "3;5;5;5", "confidence": "4;5;4;5", "soundness": "1;3;2;3", "contribution": "2;2;2;2", "presentation": "1;3;3;3", "wc_summary": "131;99;48;59", "wc_strengths": "92;24;37;47", "wc_weaknesses": "739;208;129;42", "wc_questions": "11;1;5;229", "wc_review": "973;332;219;377", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 84.25, 32.99526481178777 ], "wc_strengths_avg": [ 50.0, 25.583197610932064 ], "wc_weaknesses_avg": [ 279.5, 271.71170383330934 ], "wc_questions_avg": [ 61.5, 96.77163840712835 ], "wc_review_avg": [ 475.25, 293.0839256936484 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2804882248122467226&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1;2", "aff_unique_norm": "City University of Hong Kong;Tencent;Nanyang Technological University", "aff_unique_dep": ";Tencent Holdings Limited;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.tencent.com;https://www.ntu.edu.sg", "aff_unique_abbr": "CityU;Tencent;NTU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "id": "5xKixQzhDE", "title": "Calibrated Dataset Condensation for Faster Hyperparameter Search", "track": "main", "status": "Reject", "tldr": "", "abstract": "Dataset condensation can be used to reduce the computational cost of training multiple models on a large dataset by condensing the training dataset into a small synthetic set. State-of-the-art approaches rely on matching the model gradients between the real and synthetic data. However, there is no theoretical guarantee of the generalizability of the condensed data: data condensation often generalizes poorly across hyperparameters/architectures in practice. This paper considers a different condensation objective specifically geared toward hyperparameter search. We aim to generate a synthetic validation dataset so that the validation-performance rankings of the models, with different hyperparameters, on the condensed and original datasets are comparable. We propose a novel hyperparameter-calibrated dataset condensation (HCDC) algorithm, which obtains the synthetic validation dataset by matching the hyperparameter gradients computed via implicit differentiation and efficient inverse Hessian approximation. Experiments demonstrate that the proposed framework effectively maintains the validation-performance rankings of models and speeds up hyperparameter/architecture search for tasks on both images and graphs.", "keywords": "Dataset Condensation;Hyperparameter Optimization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/a037174fabd3da720c05c302437ab54913168f58.pdf", "author": "Mucong Ding;Yuancheng Xu;Tahseen Rabbani;Xiaoyu Liu;Brian Gravelle;Teresa Ranadive;tai-ching tuan;Furong Huang", "authorids": "~Mucong_Ding1;~Yuancheng_Xu1;~Tahseen_Rabbani1;~Xiaoyu_Liu3;bjgrave@lps.umd.edu;~Teresa_Ranadive1;~tai-ching_tuan1;~Furong_Huang1", "gender": "M;M;M;F;;F;M;F", "homepage": "http://www.cs.umd.edu/~mcding/;https://yuancheng-xu.github.io;https://www.cs.umd.edu/people/trabbani;;;;;https://furong-huang.com", "dblp": "232/1754.html;;280/2362;;;;24/2795;72/8513", "google_scholar": "_bVao2MAAAAJ;OPB0QgwAAAAJ;;;;;;13yyuCcAAAAJ", "orcid": "0000-0002-6173-8055;;;0000-0003-3385-4726;;0000-0001-5418-6259;;", "linkedin": "mucong-ding-489296104;yuancheng-xu/;;;;;;", "or_profile": "~Mucong_Ding1;~Yuancheng_Xu1;~Tahseen_Rabbani1;~Xiaoyu_Liu3;bjgrave@lps.umd.edu;~Teresa_Ranadive1;~tai-ching_tuan1;~Furong_Huang1", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;;Laboratory for Physical Sciences;laboratory for Physical Sciences;University of Maryland", "aff_domain": "cs.umd.edu;umd.edu;umd.edu;umd.edu;;lps.umd.edu;lps.umd.edu;cs.umd.edu", "position": "PhD student;PhD student;PhD student;PhD student;;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@misc{\nding2024calibrated,\ntitle={Calibrated Dataset Condensation for Faster Hyperparameter Search},\nauthor={Mucong Ding and Yuancheng Xu and Tahseen Rabbani and Xiaoyu Liu and Brian Gravelle and Teresa Ranadive and tai-ching tuan and Furong Huang},\nyear={2024},\nurl={https://openreview.net/forum?id=5xKixQzhDE}\n}", "github": "", "project": "", "reviewers": "UefQ;4Foe;EsH1;sKjo", "site": "https://openreview.net/forum?id=5xKixQzhDE", "pdf_size": 1016051, "rating": "3;5;6;6", "confidence": "4;4;5;3", "soundness": "2;3;4;3", "contribution": "3;3;4;3", "presentation": "2;3;4;3", "wc_summary": "84;56;103;77", "wc_strengths": "55;54;88;54", "wc_weaknesses": "149;64;259;89", "wc_questions": "5;90;4;5", "wc_review": "293;264;454;225", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "904;862;1107;639", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.0, 16.80773631397161 ], "wc_strengths_avg": [ 62.75, 14.58380951603524 ], "wc_weaknesses_avg": [ 140.25, 75.19765621347517 ], "wc_questions_avg": [ 26.0, 36.952672433803755 ], "wc_review_avg": [ 309.0, 87.12347559642005 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 878.0, 166.2031888984083 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9391495782177684095&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;1;2;0;1", "aff_unique_norm": "University of Maryland, College Park;University of Maryland;Laboratory for Physical Sciences", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;", "aff_unique_abbr": "UMD;UMD;", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5xV0yTP50n", "title": "Inductive Link Prediction in Knowledge Graphs using Path-based Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Link prediction is a crucial research area in knowledge graphs, with many downstream applications. In many real-world scenarios, inductive link prediction is required, where predictions have to be made among unseen entities. Embedding-based models usually need fine-tuning on new entity embeddings, and hence are difficult to be directly applied to inductive link prediction tasks. Logical rules captured by rule-based models can be directly applied to new entities with the same graph typologies, but the captured rules are discrete and usually lack generosity. Graph neural networks (GNNs) can generalize topological information to new graphs taking advantage of deep neural networks, which however may still need fine-tuning on new entity embeddings. In this paper, we propose SiaILP, a path-based model for inductive link prediction using light-weight siamese neural networks. Our model only depends on relation and path embeddings, which can be generalized to new entities without fine-tuning. Experiments show that our model achieves several new state-of-the-art performances in link prediction tasks using inductive versions of WN18RR, FB15k-237, and Nell995.", "keywords": "Knowledge Graph;Inductive Link Prediction;Siamese Neural Network;Transfer Learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/3fd339e98cf1614ef5c6c593576a80da58bd3c90.zip", "author": "Canlin Zhang;Qian Liu;Xiuwen Liu", "authorids": "~Canlin_Zhang1;~Qian_Liu10;~Xiuwen_Liu1", "gender": "M;F;M", "homepage": "https://www.linkedin.com/in/canlin-zhang-44a902107/;https://scholar.google.com/citations?user=KEUynwwAAAAJ&hl=en;http://www.cs.fsu.edu/~liux", "dblp": "234/3478;;89/3077", "google_scholar": "jBt4_QYAAAAJ;KEUynwwAAAAJ;2GH5rWkAAAAJ", "orcid": "0000-0001-7747-2848;;0000-0002-9320-3872", "linkedin": "canlin-zhang-44a902107/;;", "or_profile": "~Canlin_Zhang1;~Qian_Liu10;~Xiuwen_Liu1", "aff": "Sorenson Communications;;Florida State University", "aff_domain": "sorenson.com;;fsu.edu", "position": "Researcher;;full Professor", "bibtex": "@misc{\nzhang2024inductive,\ntitle={Inductive Link Prediction in Knowledge Graphs using Path-based Neural Networks},\nauthor={Canlin Zhang and Qian Liu and Xiuwen Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=5xV0yTP50n}\n}", "github": "", "project": "", "reviewers": "yt8Z;cRRq;QF3p;GEeR", "site": "https://openreview.net/forum?id=5xV0yTP50n", "pdf_size": 415903, "rating": "3;3;3;3", "confidence": "5;4;5;3", "soundness": "2;2;2;2", "contribution": "2;2;1;1", "presentation": "2;1;3;1", "wc_summary": "71;47;125;57", "wc_strengths": "25;23;14;40", "wc_weaknesses": "206;100;447;243", "wc_questions": "57;3;1;39", "wc_review": "359;173;587;379", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 75.0, 30.099833886584822 ], "wc_strengths_avg": [ 25.5, 9.340770846134703 ], "wc_weaknesses_avg": [ 249.0, 125.78751925370021 ], "wc_questions_avg": [ 25.0, 23.874672772626646 ], "wc_review_avg": [ 374.5, 146.6449794571911 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15702917113674669699&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Sorenson Communications;Florida State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sorenson.com;https://www.fsu.edu", "aff_unique_abbr": ";FSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Scale-Adaptive Diffusion Model for Complex Sketch Synthesis", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19407", "id": "5xadJmgwix", "author_site": "Jijin Hu, Ke Li, Yonggang Qi, Yi-Zhe Song", "tldr": "", "abstract": "While diffusion models have revolutionized generative AI, their application to human sketch generation, especially in the creation of complex yet concise and recognizable sketches, remains largely unexplored. Existing efforts have primarily focused on vector-based sketches, limiting their ability to handle intricate sketch data. This paper introduces an innovative extension of diffusion models to pixellevel sketch generation, addressing the challenge of dynamically optimizing the guidance scale for classifier-guided diffusion. Our approach achieves a delicate balance between recognizability and complexity in generated sketches through scale-adaptive classifier-guided diffusion models, a scaling indicator, and the concept of a residual sketch. We also propose a three-phase sampling strategy to enhance sketch diversity and quality. Experiments on the QuickDraw dataset showcase the potential of diffusion models to push the boundaries of sketch generation, particularly in complex scenarios unattainable by vector-based methods.", "keywords": "Generative model;Classifier-based Diffusion Model;Sketch", "primary_area": "generative models", "supplementary_material": "", "author": "Jijin Hu;Ke Li;Yonggang Qi;Yi-Zhe Song", "authorids": "~Jijin_Hu1;~Ke_Li2;~Yonggang_Qi2;~Yi-Zhe_Song2", "gender": "F;M;M;M", "homepage": "https://github.com/HuJijin;https://keli-sketchx.github.io/;https://qugank.github.io/;http://personal.ee.surrey.ac.uk/Personal/Y.Song/", "dblp": "382/4015;75/6627-4;139/7002;98/1684", "google_scholar": ";KZOFaz4AAAAJ;https://scholar.google.com.hk/citations?user=pQNpf7cAAAAJ;https://scholar.google.co.uk/citations?user=irZFP_AAAAAJ", "orcid": ";0000-0002-9739-7969;;", "linkedin": ";;;", "or_profile": "~Jijin_Hu1;~Ke_Li2;~Yonggang_Qi2;~Yi-Zhe_Song2", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;University of Surrey", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;surrey.ac.uk", "position": "MS student;Associate Professor;Associate Professor;Professor", "bibtex": "@inproceedings{\nhu2024scaleadaptive,\ntitle={Scale-Adaptive Diffusion Model for Complex Sketch Synthesis},\nauthor={Jijin Hu and Ke Li and Yonggang Qi and Yi-Zhe Song},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=5xadJmgwix}\n}", "github": "", "project": "", "reviewers": "st3k;SPRZ;t4As;bpbS", "pdf_size": 7113197, "rating": "5;6;6;6", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "contribution": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "96;129;50;86", "wc_strengths": "25;56;75;39", "wc_weaknesses": "82;166;118;119", "wc_questions": "7;115;21;113", "wc_review": "210;466;264;357", "wc_reply_reviewers": "0;0;21;67", "wc_reply_authors": "415;733;394;949", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.25, 28.16358464400439 ], "wc_strengths_avg": [ 48.75, 18.713297411199342 ], "wc_weaknesses_avg": [ 121.25, 29.82762980861872 ], "wc_questions_avg": [ 64.0, 50.24937810560445 ], "wc_review_avg": [ 324.25, 97.27377601388773 ], "wc_reply_reviewers_avg": [ 22.0, 27.358728040608906 ], "wc_reply_authors_avg": [ 622.75, 231.3443050952411 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12043012526220433934&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=5xadJmgwix", "pdf": "https://openreview.net/pdf?id=5xadJmgwix", "email": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;surrey.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;University of Surrey", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.surrey.ac.uk", "aff_unique_abbr": "BUPT;Surrey", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "5zNJQV60Wm", "title": "Natural Language Embedded Programs for Hybrid Language Symbolic Reasoning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "How can we perform computations over natural language representations to solve tasks that require symbolic and numeric reasoning? We propose natural language embedded programs (NLEP) as a unifying framework for addressing math/symbolic reasoning, natural language understanding, and instruction following tasks. Our approach prompts a language model to generate full Python programs that define functions over data structures which contain natural language representations of structured knowledge. A Python interpreter then executes the generated code and prints the output. Despite using a task-general prompt, we find that this approach can improve upon strong baselines across a range of different tasks including math and symbolic reasoning, text classification, question answering, and instruction following. We further find the generated programs are often interpretable and enable post-hoc verification of the intermediate reasoning steps.", "keywords": "natural language embedded program (NLEP);large language model", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "", "author": "Tianhua Zhang;Jiaxin Ge;Hongyin Luo;Yung-Sung Chuang;Mingye Gao;Yuan Gong;Xixin Wu;Yoon Kim;Helen M. Meng;James R. Glass", "authorids": "~Tianhua_Zhang2;~Jiaxin_Ge1;~Hongyin_Luo1;~Yung-Sung_Chuang1;~Mingye_Gao1;~Yuan_Gong3;~Xixin_Wu1;~Yoon_Kim1;~Helen_M._Meng1;~James_R._Glass1", "gender": "F;F;M;M;F;M;;;F;", "homepage": ";https://jiaxin.ge/;;https://people.csail.mit.edu/yungsung/;https://onelab.mit.edu/people;;https://www1.se.cuhk.edu.hk/~wuxx/;https://people.csail.mit.edu/yoonkim/;http://www.se.cuhk.edu.hk/people/academic-staff/prof-meng-mei-ling-helen/;", "dblp": "01/8403;;147/4317;64/3095;;;125/2836;;92/3270;", "google_scholar": "https://scholar.google.com.hk/citations?user=dEfp5vQAAAAJ;I6P0SwgAAAAJ;;3ar1DOwAAAAJ;;MuhvvOkAAAAJ;;n_ts4eYAAAAJ;;", "orcid": ";;;0000-0002-1723-5063;;;;;;", "linkedin": ";;;yschuang;;;;;;", "or_profile": "~Tianhua_Zhang2;~Jiaxin_Ge1;~Hongyin_Luo1;~Yung-Sung_Chuang1;~Mingye_Gao1;~Yuan_Gong3;~Xixin_Wu1;~Yoon_Kim1;~Helen_M._Meng1;~James_R._Glass1", "aff": "Chinese University of Hong Kong, The Chinese University of Hong Kong;Peking University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;The Chinese University of Hong Kong;Massachusetts Institute of Technology;The Chinese University of Hong Kong;", "aff_domain": "se.cuhk.edu.hk;pku.edu.cn;mit.edu;mit.edu;mit.edu;mit.edu;cuhk.edu.hk;mit.edu;cuhk.edu.hk;", "position": "PhD student;Undergrad student;Postdoc;PhD student;PhD student;Researcher;Assistant Professor;Assistant Professor;Full Professor;", "bibtex": "@misc{\nzhang2024natural,\ntitle={Natural Language Embedded Programs for Hybrid Language Symbolic Reasoning},\nauthor={Tianhua Zhang and Jiaxin Ge and Hongyin Luo and Yung-Sung Chuang and Mingye Gao and Yuan Gong and Xixin Wu and Yoon Kim and Helen M. Meng and James R. Glass},\nyear={2024},\nurl={https://openreview.net/forum?id=5zNJQV60Wm}\n}", "github": "", "project": "", "reviewers": "ZKfU;pxPW;2MWH;T68U", "site": "https://openreview.net/forum?id=5zNJQV60Wm", "pdf_size": 724845, "rating": "3;3;5;8", "confidence": "3;3;3;4", "soundness": "3;2;2;3", "contribution": "2;2;1;3", "presentation": "2;3;3;3", "wc_summary": "16;52;79;92", "wc_strengths": "19;109;245;98", "wc_weaknesses": "143;82;117;91", "wc_questions": "141;14;1;84", "wc_review": "319;257;442;365", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.75, 29.089302157322372 ], "wc_strengths_avg": [ 117.75, 81.25692278200054 ], "wc_weaknesses_avg": [ 108.25, 23.826193569263218 ], "wc_questions_avg": [ 60.0, 56.422513237182194 ], "wc_review_avg": [ 345.75, 67.50324066294891 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.9169493006161777, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13668199448688252438&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;2;2;2;0;2;0", "aff_unique_norm": "Chinese University of Hong Kong;Peking University;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.pku.edu.cn;https://web.mit.edu", "aff_unique_abbr": "CUHK;Peking U;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;1;1;1;0;1;0", "aff_country_unique": "China;United States" }, { "id": "5zwrpqYIK5", "title": "Outlier-Robust Orthogonal Regression on Manifolds", "track": "main", "status": "Reject", "tldr": "", "abstract": "Motivated by machine learning and computer vision applications, we formulate the problem of Outlier-Robust Orthogonal Regression to find a point in a manifold that satisfies as many linear equations as possible. Existing approaches addressing special cases of our formulation either lack theoretical support, are computationally costly, or somewhat ignore the manifold constraint; the latter two limit them from many applications. In this paper, we propose a unified approach based on solving a non-convex and non-smooth $\\ell^1$ optimization problem over the manifold. We give conditions on the geometry of the input data, the manifold, and their interplay, under which the minimizers recover the ground truth; notably the conditions can hold even when the inliers are skewed within the true hyperplane. We provide a Riemannian subgradient method and an iteratively reweighted least squares method, suiting different computational oracles, and prove their linear/sub-linear convergence to minimizers/critical points. Experiments demonstrate that respecting the manifold constraints increases robustness against outliers in robust essential matrix estimation and robust rotation search.", "keywords": "Optimization over manifolds;orthogonal regression;subspace learning", "primary_area": "optimization", "supplementary_material": "/attachment/ca6e39aeb74ddee277a38a1368ef376029c3c5b1.pdf", "author": "Tianjiao Ding;Liangzu Peng;Rene Vidal", "authorids": "~Tianjiao_Ding1;~Liangzu_Peng2;~Rene_Vidal1", "gender": "M;M;", "homepage": "https://tianjiaoding.com/;https://liangzu.github.io/;http://www.vision.jhu.edu", "dblp": "230/1227;228/7974;v/ReneVidal", "google_scholar": "L3wy9QMAAAAJ;A39MlcYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-0708-7543;", "linkedin": ";;rene-vidal-74844928/", "or_profile": "~Tianjiao_Ding1;~Liangzu_Peng2;~Rene_Vidal1", "aff": "University of Pennsylvania;University of Pennsylvania;Amazon", "aff_domain": "seas.upenn.edu;upenn.edu;amazon.com", "position": "PhD student;PhD student;Principal Researcher", "bibtex": "@misc{\nding2024outlierrobust,\ntitle={Outlier-Robust Orthogonal Regression on Manifolds},\nauthor={Tianjiao Ding and Liangzu Peng and Rene Vidal},\nyear={2024},\nurl={https://openreview.net/forum?id=5zwrpqYIK5}\n}", "github": "", "project": "", "reviewers": "CDEb;ovBv;BNmQ;6ASC", "site": "https://openreview.net/forum?id=5zwrpqYIK5", "pdf_size": 1817912, "rating": "3;3;3;5", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "contribution": "2;2;3;2", "presentation": "2;3;3;2", "wc_summary": "29;118;666;79", "wc_strengths": "22;56;10;226", "wc_weaknesses": "219;228;41;143", "wc_questions": "10;166;7;90", "wc_review": "280;568;724;538", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 223.0, 257.70428789603017 ], "wc_strengths_avg": [ 78.5, 86.81445732134712 ], "wc_weaknesses_avg": [ 157.75, 75.05789432165014 ], "wc_questions_avg": [ 68.25, 65.52241982710956 ], "wc_review_avg": [ 527.5, 159.38867588382809 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HTxhkrJJgzkJ:scholar.google.com/&scioq=Outlier-Robust+Orthogonal+Regression+on+Manifolds&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Pennsylvania;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.upenn.edu;https://www.amazon.com", "aff_unique_abbr": "UPenn;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "60e1hl06Ec", "title": "Mitigating Simplicity Bias in Deep Learning for Improved OOD Generalization and Robustness", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural networks (NNs) are known to exhibit simplicity bias where they tend to prefer learning 'simple' features over more 'complex' ones, even when the latter may be more informative. Simplicity bias can lead to the model making biased predictions which have poor out-of-distribution (OOD) generalization. To address this, we propose a framework that encourages the model to use a more diverse set of features to make predictions. We first train a simple model, and then regularize the conditional mutual information with respect to it to obtain the final model. We demonstrate the effectiveness of this framework in various problem settings and real-world applications, showing that it effectively addresses simplicity bias and leads to more features being used, enhances OOD generalization, and improves subgroup robustness and fairness. We complement these results with theoretical analyses of the effect of the regularization and its OOD generalization properties.", "keywords": "Simplicity Bias;Spurious Features;OOD Generalization;Subgroup Robustness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/c1b7359eecd0506e40bcc843665a244e810c081e.zip", "author": "Bhavya Vasudeva;Kameron Shahabi;Vatsal Sharan", "authorids": "~Bhavya_Vasudeva1;~Kameron_Shahabi1;~Vatsal_Sharan1", "gender": ";M;M", "homepage": "https://estija.github.io;https://kyshahab.github.io/;https://vatsalsharan.github.io/", "dblp": "250/9545;281/6744;126/2543", "google_scholar": "https://scholar.google.co.in/citations?user=ZCSsIokAAAAJ;;Ize17HEAAAAJ", "orcid": ";;", "linkedin": ";kameron-shahabi-76784218b/;", "or_profile": "~Bhavya_Vasudeva1;~Kameron_Shahabi1;~Vatsal_Sharan1", "aff": "University of Southern California;University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@misc{\nvasudeva2024mitigating,\ntitle={Mitigating Simplicity Bias in Deep Learning for Improved {OOD} Generalization and Robustness},\nauthor={Bhavya Vasudeva and Kameron Shahabi and Vatsal Sharan},\nyear={2024},\nurl={https://openreview.net/forum?id=60e1hl06Ec}\n}", "github": "", "project": "", "reviewers": "Q6kW;pjci;h3b4;5jvr", "site": "https://openreview.net/forum?id=60e1hl06Ec", "pdf_size": 1593024, "rating": "5;6;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;2", "contribution": "1;2;2;3", "presentation": "3;3;3;3", "wc_summary": "51;27;36;102", "wc_strengths": "31;61;30;62", "wc_weaknesses": "456;764;347;263", "wc_questions": "85;45;45;3", "wc_review": "623;897;458;430", "wc_reply_reviewers": "207;0;68;0", "wc_reply_authors": "909;518;180;278", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.0, 29.008619408720573 ], "wc_strengths_avg": [ 46.0, 15.508062419270823 ], "wc_weaknesses_avg": [ 457.5, 189.72677723505453 ], "wc_questions_avg": [ 44.5, 28.99568933479596 ], "wc_review_avg": [ 602.0, 185.59768317519485 ], "wc_reply_reviewers_avg": [ 68.75, 84.50850548909264 ], "wc_reply_authors_avg": [ 471.25, 281.0617147531837 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4331722416726197918&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "NeurRev: Train Better Sparse Neural Network Practically via Neuron Revitalization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19406", "id": "60lNoatp7u", "author_site": "Gen Li, Lu Yin, Jie Ji, Wei Niu, Minghai Qin, Bin Ren, Linke Guo, Shiwei Liu, Xiaolong Ma", "tldr": "", "abstract": "Dynamic Sparse Training (DST) employs a greedy search mechanism to identify an optimal sparse subnetwork by periodically pruning and growing network connections during training. To guarantee effectiveness, DST algorithms rely on high search frequency, which consequently, requires large learning rate and batch size to enforce stable neuron learning. Such settings demand extreme memory consumption, as well as generating significant system overheads that limit the wide deployment of deep learning-based applications on resource-constraint platforms. To reconcile such, we propose $\\underline{Neur}$on $\\underline{Rev}$italization framework for DST (NeurRev), based on an innovative finding that dormant neurons exist with the presence of weight sparsity, and cannot be revitalized (i.e., activated for learning) even with high sparse mask search frequency. These dormant neurons produce a large quantity of zeros during training, which contribute relatively little to the outputs of succeeding layers or to the final results. Different from most existing DST algorithms that spare no effort designing weight growing criteria, NeurRev focuses on optimizing the long-neglected pruning part, which awakes dormant neurons by pruning and incurs no additional computation costs. As such, NeurRev advances more effective neuron learning, which not only achieves outperforming accuracy in a variety of networks and datasets, but also promoting a low-cost dynamism at system-level. Systematical evaluations on training speed and system overhead are conducted on the mobile devices, where the proposed NeurRev framework consistently outperforms representative state-of-the-arts. Code will be released.", "keywords": "Dynamic Sparse Training; Neuron Revitalization", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Gen Li;Lu Yin;Jie Ji;Wei Niu;Minghai Qin;Bin Ren;Linke Guo;Shiwei Liu;Xiaolong Ma", "authorids": "~Gen_Li4;~Lu_Yin1;~Jie_Ji1;~Wei_Niu3;~Minghai_Qin1;~Bin_Ren1;~Linke_Guo2;~Shiwei_Liu2;~Xiaolong_Ma2", "gender": "M;;;M;M;M;M;M;M", "homepage": "https://coulsonlee.github.io;https://luuyin.com/;;https://www.niuwei.info;https://sites.google.com/site/minghaiqin/home;http://www.cs.wm.edu/~bren/;http://cecas.clemson.edu/~linkeg/index.html;https://shiweiliuiiiiiii.github.io/;https://xiaolongma2016.com", "dblp": "28/538-12;87/2528-6;;68/828-2.html;;;;234/8697-3.html;", "google_scholar": ";G4Xe1NkAAAAJ;;w1RoaOMAAAAJ;MSgWKbYAAAAJ;9Uqwy4UAAAAJ;https://scholar.google.com/citations?hl=en;73IbXtsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;;0000-0003-3753-7648", "linkedin": ";;;;;;;;xiaolong-ma-66b98910b/", "or_profile": "~Gen_Li4;~Lu_Yin1;~Jie_Ji1;~Wei_Niu3;~Minghai_Qin1;~Bin_Ren1;~Linke_Guo2;~Shiwei_Liu2;~Xiaolong_Ma2", "aff": "Clemson University;University of Aberdeen;;University of Georgia;Western Digital Corporation;William & Mary;Clemson University;University of Oxford;Clemson University", "aff_domain": "clemson.edu;abdn.ac.uk;;uga.edu;wdc.com;cs.wm.edu;clemson.edu;ox.ac.uk;clemson.edu", "position": "PhD student;Assistant Professor;;Assistant Professor;senior technologist;Associate Professor;Associate Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nli2024neurrev,\ntitle={NeurRev: Train Better Sparse Neural Network Practically via Neuron Revitalization},\nauthor={Gen Li and Lu Yin and Jie Ji and Wei Niu and Minghai Qin and Bin Ren and Linke Guo and Shiwei Liu and Xiaolong Ma},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=60lNoatp7u}\n}", "github": "", "project": "", "reviewers": "AQcf;5GHY;Fgjo", "pdf_size": 3353694, "rating": "6;6;8", "confidence": "3;4;4", "soundness": "2;3;4", "contribution": "2;3;3", "presentation": "3;3;4", "wc_summary": "46;95;71", "wc_strengths": "42;40;80", "wc_weaknesses": "93;87;78", "wc_questions": "42;176;2", "wc_review": "223;398;231", "wc_reply_reviewers": "23;230;0", "wc_reply_authors": "759;1277;416", "reply_reviewers": "1;2;0", "reply_authors": "2;3;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 70.66666666666667, 20.00555478416488 ], "wc_strengths_avg": [ 54.0, 18.40289832245635 ], "wc_weaknesses_avg": [ 86.0, 6.164414002968976 ], "wc_questions_avg": [ 73.33333333333333, 74.41027408153317 ], "wc_review_avg": [ 284.0, 80.67630796377996 ], "wc_reply_reviewers_avg": [ 84.33333333333333, 103.42898798477898 ], "wc_reply_authors_avg": [ 817.3333333333334, 353.9136743457207 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6553735554510376726&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=60lNoatp7u", "pdf": "https://openreview.net/pdf?id=60lNoatp7u", "email": "clemson.edu;abdn.ac.uk;;uga.edu;wdc.com;cs.wm.edu;clemson.edu;ox.ac.uk;clemson.edu", "author_num": 9, "aff_unique_index": "0;1;2;3;4;0;5;0", "aff_unique_norm": "Clemson University;University of Aberdeen;University of Georgia;Western Digital Corporation;College of William & Mary;University of Oxford", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.clemson.edu;https://www.abdn.ac.uk;https://www.uga.edu;https://www.westerndigital.com;https://www.wm.edu;https://www.ox.ac.uk", "aff_unique_abbr": "Clemson;Aberdeen;UGA;WDC;WM;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "61DYdiyQqk", "title": "Two Heads Are Better Than One: Exploiting Both Sequence and Graph Models in AMR-To-Text Generation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Abstract meaning representation (AMR) is a special semantic representation language, which can capture the core meaning of a sentence with a syntax-irrelevant graph.\nAMR-to-text generation, which aims to generate a sentence according to a given AMR graph, is a well-studied task and has shown its helpfulness in various other NLP tasks.\nExisting AMR-to-text generation methods can be roughly divided into two categories, while either has its own advantages and disadvantages.\nThe first one adopts a sequence-to-sequence model, especially a pretrained language model (PLM). \nIt has good text generation ability but cannot cope with the structural information of AMR graphs well.\nThe second category of method is based on graph neural networks (GNNs), whose advantages and disadvantages are exactly the opposite. \nTo combine the strengths of the two kinds of models, in this paper, we propose a dual encoder-decoder model named \\modelName, which integrates a specially designed GNN into a pre-trained sequence-to-sequence model.\nWe conduct extensive experiments as well as human evaluation and a case study, finding that it achieves the desired effect and yields state-of-the-art performance in the AMR-to-text generation task. \nWe also demonstrate that it outperforms the most powerful general-purpose PLM GPT-4.", "keywords": "graph-to-text generation;abstract mearning representation;dual-encoder", "primary_area": "generative models", "supplementary_material": "/attachment/fb5440a758542d3938393eefc08f2902dfa32201.zip", "author": "Yining Hong;Fanchao Qi;Maosong Sun", "authorids": "~Yining_Hong3;~Fanchao_Qi1;~Maosong_Sun1", "gender": "F;M;M", "homepage": "https://hyn0027.github.io/;;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm", "dblp": ";228/5500;95/3291-1", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ", "orcid": ";0000-0002-4400-4033;", "linkedin": ";%E5%87%A1%E8%B6%85-%E5%B2%82-885770a4/;", "or_profile": "~Yining_Hong3;~Fanchao_Qi1;~Maosong_Sun1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Undergrad student;Postdoc;Full Professor", "bibtex": "@misc{\nhong2024two,\ntitle={Two Heads Are Better Than One: Exploiting Both Sequence and Graph Models in {AMR}-To-Text Generation},\nauthor={Yining Hong and Fanchao Qi and Maosong Sun},\nyear={2024},\nurl={https://openreview.net/forum?id=61DYdiyQqk}\n}", "github": "", "project": "", "reviewers": "Koxu;FACa;PF3i", "site": "https://openreview.net/forum?id=61DYdiyQqk", "pdf_size": 2743551, "rating": "5;5;6", "confidence": "2;5;4", "soundness": "3;3;3", "contribution": "2;2;2", "presentation": "2;3;3", "wc_summary": "74;64;109", "wc_strengths": "58;50;113", "wc_weaknesses": "248;106;219", "wc_questions": "73;1;49", "wc_review": "453;221;490", "wc_reply_reviewers": "0;0;53", "wc_reply_authors": "556;500;1571", "reply_reviewers": "0;0;2", "reply_authors": "1;1;4", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 82.33333333333333, 19.293061504650375 ], "wc_strengths_avg": [ 73.66666666666667, 28.003967972810962 ], "wc_weaknesses_avg": [ 191.0, 61.25901294231459 ], "wc_questions_avg": [ 41.0, 29.93325909419153 ], "wc_review_avg": [ 388.0, 119.04900951568924 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 24.984439601924677 ], "wc_reply_authors_avg": [ 875.6666666666666, 492.2061447085854 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.1889822365046136, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GLWuaFHGhu4J:scholar.google.com/&scioq=Two+Heads+Are+Better+Than+One:+Exploiting+Both+Sequence+and+Graph+Models+in+AMR-To-Text+Generation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "61TRLIS5A0", "title": "Enhancing Medical Image Generation with Anatomical Precision: A Multi-Headed VAE-Based Diffusion Model", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Score-based image generation models, also known as diffusion models, can generate highly realistic and diverse natural images. However, a common challenge emerges when applying diffusion models to medical image generation and segmentation. While these models excel at producing realistic local textures, they struggle to accurately capture global anatomical priors, such as organ shape and location. Furthermore, the model lacks the capability for controlled recalibration to transform an anatomically unrealistic image into a realistic one. Here we present a new diffusion model where the generated images exhibit both realistic style and anatomically accurate position. Specifically, this is done by guiding the reverse diffusion process with our specially designed multi-headed VAE, which produces the image's disentangled style and position embeddings. We use the position embedding to define a grid deformation function that deforms a simple position prior to a predicted segmentation mask. Then, we apply the same grid deformation on the style embedding for image generation. This alleviates the style embedding from the burden of learning position features, thereby promoting disentangling. Our proposed approach showcases promising performance in controlled image generation across a range of medical image tasks, such as skin lesions and fetal head. Furthermore, our model delivers state-of-the-art segmentation performance.", "keywords": "medical image processing;image segmentation;variational auto-encoder;diffusion modelling;controlled image generation", "primary_area": "generative models", "supplementary_material": "", "author": "Hongfei Yang;YoungSeok Jeon;Mengling Feng", "authorids": "~Hongfei_Yang2;~YoungSeok_Jeon1;~Mengling_Feng1", "gender": "M;M;M", "homepage": ";https://www.mornin-feng.com/;https://www.mornin-feng.com", "dblp": ";;31/7025", "google_scholar": ";ppl10bsAAAAJ;F2ET1WsAAAAJ", "orcid": "0000-0002-8150-9364;;0000-0002-5338-6248", "linkedin": ";;mengling-mornin-feng-9400811a/", "or_profile": "~Hongfei_Yang2;~YoungSeok_Jeon1;~Mengling_Feng1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;nus.edu;nus.edu.sg", "position": "Postdoc;Postdoc;Assistant Professor", "bibtex": "@misc{\nyang2024enhancing,\ntitle={Enhancing Medical Image Generation with Anatomical Precision: A Multi-Headed {VAE}-Based Diffusion Model},\nauthor={Hongfei Yang and YoungSeok Jeon and Mengling Feng},\nyear={2024},\nurl={https://openreview.net/forum?id=61TRLIS5A0}\n}", "github": "", "project": "", "reviewers": "PEau;nez1;etX6", "site": "https://openreview.net/forum?id=61TRLIS5A0", "pdf_size": 12491106, "rating": "3;5;5", "confidence": "4;4;5", "soundness": "2;3;2", "contribution": "2;2;2", "presentation": "2;4;2", "wc_summary": "30;108;92", "wc_strengths": "34;71;95", "wc_weaknesses": "216;243;42", "wc_questions": "59;140;49", "wc_review": "339;562;278", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 76.66666666666667, 33.6386021641143 ], "wc_strengths_avg": [ 66.66666666666667, 25.090945688745084 ], "wc_weaknesses_avg": [ 167.0, 89.07300376657341 ], "wc_questions_avg": [ 82.66666666666667, 40.74582459862878 ], "wc_review_avg": [ 393.0, 122.06828689986055 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-JTrD89olNcJ:scholar.google.com/&scioq=Enhancing+Medical+Image+Generation+with+Anatomical+Precision:+A+Multi-Headed+VAE-Based+Diffusion+Model&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "61hDIuox86", "title": "Latent Lie Group Representations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Symmetry detection tasks rely on identifying transformations of data points that keep some task-related quality, such as classification label, identical. These symmetries are useful during model selection for neural networks, as even a conceptually simple symmetry (e.g., translation invariance) can lead to superior performance-efficiency tradeoffs (e.g., CNNs). Leveraging neural networks to learn these transformations can lead to approaches that yield representations of the transformations in latent space, rather than just the data itself.\nIn this work, we propose a latent variable framework for learning one-parameter subgroups of Lie group symmetries from observations, improving the accuracy of the learned transformation with respect to the one in pixel-space, even including situations in which this might not even be desirable.", "keywords": "deep learning;symmetry;lie groups", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Alex Gabel;Rick Quax;Efstratios Gavves", "authorids": "~Alex_Gabel1;r.quax@uva.nl;~Efstratios_Gavves1", "gender": "M;;M", "homepage": "https://www.uva.nl/en/profile/g/a/a.gabel/a.gabel.html;;https://www.egavves.com", "dblp": ";;03/8693", "google_scholar": "u8IK1h0AAAAJ;;https://scholar.google.nl/citations?user=QqfCvsgAAAAJ", "orcid": "0009-0008-0964-8918;;", "linkedin": "https://nl.linkedin.com/in/alex-gabel;;", "or_profile": "~Alex_Gabel1;r.quax@uva.nl;~Efstratios_Gavves1", "aff": "University of Amsterdam;;University of Amsterdam", "aff_domain": "uva.nl;;uva.nl", "position": "PhD student;;Associate Professor", "bibtex": "@misc{\ngabel2024latent,\ntitle={Latent Lie Group Representations},\nauthor={Alex Gabel and Rick Quax and Efstratios Gavves},\nyear={2024},\nurl={https://openreview.net/forum?id=61hDIuox86}\n}", "github": "", "project": "", "reviewers": "bdCU;8A7f;FZBr", "site": "https://openreview.net/forum?id=61hDIuox86", "pdf_size": 590200, "rating": "1;5;5", "confidence": "3;4;3", "soundness": "2;4;3", "contribution": "2;3;3", "presentation": "1;3;3", "wc_summary": "74;74;56", "wc_strengths": "37;79;10", "wc_weaknesses": "299;210;48", "wc_questions": "2;127;24", "wc_review": "412;490;138", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 68.0, 8.48528137423857 ], "wc_strengths_avg": [ 42.0, 28.39013913315678 ], "wc_weaknesses_avg": [ 185.66666666666666, 103.90487102259559 ], "wc_questions_avg": [ 51.0, 54.48547206977899 ], "wc_review_avg": [ 346.6666666666667, 150.94664252274342 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mFJZKUPoIaYJ:scholar.google.com/&scioq=Latent+Lie+Group+Representations&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "61mnwO4Mzp", "title": "Denoising Diffusion Variational Inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "Latent variable methods are a powerful tool for representation learning that greatly benefit from expressive variational posteriors, including generative models based on normalizing flows or adversarial networks.\nIn this work, we propose denoising diffusion variational inference, which relies on diffusion models---recent generative algorithms with state-of-the-art sample quality---to fit a complex posterior by performing diffusion in latent space. Our method augments a variational posterior with auxiliary latent variables via a user-specified noising process that transforms a complex latent into a simple auxiliary latent. The approximate posterior then reverses this noising process by optimizing a lower bound on the marginal likelihood inspired by the wake-sleep algorithm. Our method can be used to fit deep latent variable models, which yields the DiffVAE algorithm. This algorithm is especially effective at dimensionality reduction and representation learning, where it outperforms methods based on adversarial training or invertible flow-based posteriors. We use this algorithm on a motivating task in biology---inferring latent ancestry from human genomes---and show that it outperforms strong baselines on the 1000 Genomes dataset.", "keywords": "visualization;vae;diffusion models;representation learning", "primary_area": "generative models", "supplementary_material": "", "author": "Top Piriyakulkij;Yingheng Wang;Volodymyr Kuleshov", "authorids": "~Top_Piriyakulkij1;~Yingheng_Wang1;~Volodymyr_Kuleshov1", "gender": "M;M;", "homepage": "https://www.cs.cornell.edu/~wp237/;https://isjakewong.github.io/publications/;https://www.cs.cornell.edu/~kuleshov/", "dblp": "305/7203;265/6357;81/8612", "google_scholar": "nlO1TkkAAAAJ;4WEa7tMAAAAJ;RY_t8XAAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Top_Piriyakulkij1;~Yingheng_Wang1;~Volodymyr_Kuleshov1", "aff": "Cornell University;Cornell University;Cornell University", "aff_domain": "cs.cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\npiriyakulkij2024denoising,\ntitle={Denoising Diffusion Variational Inference},\nauthor={Top Piriyakulkij and Yingheng Wang and Volodymyr Kuleshov},\nyear={2024},\nurl={https://openreview.net/forum?id=61mnwO4Mzp}\n}", "github": "", "project": "", "reviewers": "3sxG;18s7;t61r;Qkoy", "site": "https://openreview.net/forum?id=61mnwO4Mzp", "pdf_size": 3067837, "rating": "3;5;5;5", "confidence": "3;4;3;4", "soundness": "2;2;2;3", "contribution": "2;2;2;2", "presentation": "2;2;2;3", "wc_summary": "77;65;205;107", "wc_strengths": "24;33;125;50", "wc_weaknesses": "100;316;184;136", "wc_questions": "83;124;36;272", "wc_review": "284;538;550;565", "wc_reply_reviewers": "0;203;129;12", "wc_reply_authors": "1996;1789;1091;1425", "reply_reviewers": "0;1;1;1", "reply_authors": "4;3;3;3", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 113.5, 54.99772722576816 ], "wc_strengths_avg": [ 58.0, 39.79321550214308 ], "wc_weaknesses_avg": [ 184.0, 81.82909018191513 ], "wc_questions_avg": [ 128.75, 88.37243631359271 ], "wc_review_avg": [ 484.25, 116.00942849613561 ], "wc_reply_reviewers_avg": [ 86.0, 84.27633119684316 ], "wc_reply_authors_avg": [ 1575.25, 346.33536853749143 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uq11FH4MoRQJ:scholar.google.com/&scioq=Denoising+Diffusion+Variational+Inference&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "In-Context Learning Dynamics with Random Binary Sequences", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19405", "id": "62K7mALO2q", "author_site": "Eric Bigelow, Ekdeep Singh Lubana, Robert Dick, Hidenori Tanaka, Tomer Ullman", "tldr": "", "abstract": "Large language models (LLMs) trained on huge text datasets demonstrate intriguing capabilities, achieving state-of-the-art performance on tasks they were not explicitly trained for. The precise nature of LLM capabilities is often mysterious, and different prompts can elicit different capabilities through in-context learning. We propose a framework that enables us to analyze in-context learning dynamics to understand latent concepts underlying LLMs\u2019 behavioral patterns. This provides a more nuanced understanding than success-or-failure evaluation benchmarks, but does not require observing internal activations as a mechanistic interpretation of circuits would. Inspired by the cognitive science of human randomness perception, we use random binary sequences as context and study dynamics of in-context learning by manipulating properties of context data, such as sequence length. In the latest GPT-3.5+ models, we find emergent abilities to generate seemingly random numbers and learn basic formal languages, with striking in-context learning dynamics where model outputs transition sharply from seemingly random behaviors to deterministic repetition.", "keywords": "In-Context Learning;Large Language Models;Interpretability;Computational Cognitive Science", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Eric J Bigelow;Ekdeep Singh Lubana;Robert P. Dick;Hidenori Tanaka;Tomer Ullman", "authorids": "~Eric_J_Bigelow1;~Ekdeep_Singh_Lubana1;~Robert_P._Dick1;~Hidenori_Tanaka1;~Tomer_Ullman1", "gender": ";M;M;;", "homepage": ";https://ekdeepslubana.github.io/;http://robertdick.org/;https://sites.google.com/view/htanaka/home;", "dblp": ";228/2683;84/523.html;;", "google_scholar": ";https://scholar.google.co.in/citations?user=OP7S3vsAAAAJ;;f_pWOGIAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Eric_J_Bigelow1;~Ekdeep_Singh_Lubana1;~Robert_P._Dick1;~Hidenori_Tanaka1;~Tomer_Ullman1", "aff": ";University of Michigan;University of Michigan;Physics & Informatics Lab, NTT Research, Inc.;", "aff_domain": ";umich.edu;umich.edu;ntt-research.com;", "position": ";PhD student;Full Professor;Senior Research Scientist;", "bibtex": "@inproceedings{\nbigelow2024incontext,\ntitle={In-Context Learning Dynamics with Random Binary Sequences},\nauthor={Eric J Bigelow and Ekdeep Singh Lubana and Robert P. Dick and Hidenori Tanaka and Tomer Ullman},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=62K7mALO2q}\n}", "github": "", "project": "", "reviewers": "rqBo;F7kD;xnie;272r", "pdf_size": 6470996, "rating": "6;6;6;6", "confidence": "4;2;3;3", "soundness": "2;2;2;3", "contribution": "3;3;2;2", "presentation": "3;3;3;2", "wc_summary": "229;292;266;49", "wc_strengths": "28;179;266;55", "wc_weaknesses": "138;679;351;188", "wc_questions": "156;197;156;162", "wc_review": "551;1347;1039;454", "wc_reply_reviewers": "9;193;328;37", "wc_reply_authors": "1949;2310;998;1030", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 209.0, 95.04998684902591 ], "wc_strengths_avg": [ 132.0, 96.05987716002973 ], "wc_weaknesses_avg": [ 339.0, 211.51004704268777 ], "wc_questions_avg": [ 167.75, 17.06421694658152 ], "wc_review_avg": [ 847.75, 363.6367521304743 ], "wc_reply_reviewers_avg": [ 141.75, 128.36544511666682 ], "wc_reply_authors_avg": [ 1571.75, 572.2789420378842 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11402233474497562685&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 9, "openreview": "https://openreview.net/forum?id=62K7mALO2q", "pdf": "https://openreview.net/pdf?id=62K7mALO2q", "email": ";umich.edu;umich.edu;ntt-research.com;", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Michigan;NTT Research, Inc.", "aff_unique_dep": ";Physics & Informatics Lab", "aff_unique_url": "https://www.umich.edu;https://www.ntt-research.com", "aff_unique_abbr": "UM;NTT Research", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "639DcBewcJ", "title": "Low-Rank Robust Graph Contrastive Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have been widely used to learn node representations and with outstanding performance on various tasks such as node classification. However, noise, which inevitably exists in real-world graph data, would considerably degrade the performance of GNNs revealed by recent studies. In this work, we propose a novel and robust method, Low-Rank Robust Graph Contrastive Learning (LR-RGCL). LR-RGCL performs transductive node classification in two steps. First, a robst GCL encoder named RGCL is trained by prototypical contrastive learning with Bayesian nonparametric Prototype Learning (BPL). Next, using the robust features produced by RGCL, a novel and provable low-rank transductive classification algorithm is used to classify the unlabeled nodes in the graph. Our low-rank transductive classification algorithm is inspired by the low frequency property of the graph data and its labels, and theoretical result on the generalization of our algorithm is provided. To the best of our knowledge, our theoretical result is among the first to demonstrate the advantage of low-rank learning in transductive classification. Extensive experiments on public benchmarks demonstrate the superior performance of LR-RGCL and the robustness of the learned node representations. The code of LR-RGCL is available at \\url{https://anonymous.4open.science/r/LRR-GCL-3B3C/}.", "keywords": "Low Rank Robust Graph Contrastive Learning;Bayesian Nonparametric Method;Generation Bound;Transductive Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Yancheng Wang;Yingzhen Yang", "authorids": "~Yancheng_Wang2;~Yingzhen_Yang1", "gender": "M;M", "homepage": ";http://yingzhenyang.com", "dblp": ";66/3838.html", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": ";", "linkedin": ";yingzhen-yang-9b869122", "or_profile": "~Yancheng_Wang2;~Yingzhen_Yang1", "aff": "Arizona State University;Arizona State University", "aff_domain": "asu.edu;asu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nwang2024lowrank,\ntitle={Low-Rank Robust Graph Contrastive Learning},\nauthor={Yancheng Wang and Yingzhen Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=639DcBewcJ}\n}", "github": "", "project": "", "reviewers": "9AcT;wZD8;X4pk;FuBV", "site": "https://openreview.net/forum?id=639DcBewcJ", "pdf_size": 2367883, "rating": "1;3;3;6", "confidence": "5;4;4;4", "soundness": "1;2;2;3", "contribution": "1;2;1;2", "presentation": "1;1;3;2", "wc_summary": "41;85;22;68", "wc_strengths": "18;48;16;86", "wc_weaknesses": "169;73;142;226", "wc_questions": "2;318;67;133", "wc_review": "230;524;247;513", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 54.0, 24.238399287081645 ], "wc_strengths_avg": [ 42.0, 28.39013913315678 ], "wc_weaknesses_avg": [ 152.5, 55.01136246267674 ], "wc_questions_avg": [ 130.0, 118.0105927448888 ], "wc_review_avg": [ 378.5, 140.18291622020138 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7276068751089989, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:C6-2RMC3mtsJ:scholar.google.com/&scioq=Low-Rank+Robust+Graph+Contrastive+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "63r6HyqyRm", "title": "A Vision-free Baseline for Multimodal Grammar Induction", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Past work has shown that paired vision-language signals substantially improve grammar induction in multimodal datasets such as MSCOCO. We investigate whether advancements in large language models (LLMs) that are only trained with text could provide strong assistance for grammar induction in multimodal settings. We find that our text-only approach, an LLM-based C-PCFG (LC-PCFG), outperforms previous multi-modal methods, and achieves state-of-the-art grammar induction performance for various multimodal datasets. Compared to image-aided grammar induction, LC-PCFG outperforms the prior state-of-the-art by $7.9$ Corpus-F1 points, with an $85$\\% reduction in parameter count and $1.7\\times$ faster training speed. Across three video-assisted grammar induction benchmarks, LC-PCFG outperforms prior state-of-the-art by up to $7.7$ Corpus-F1, with $8.8\\times$ faster training. These results shed light on the notion that text-only language models might include visually grounded cues that aid in grammar induction in multimodal contexts. Moreover, our results emphasize the importance of establishing a robust vision-free baseline when evaluating the benefit of multimodal approaches.", "keywords": "Grammar Induction;Multimodal;Language Models", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/26a52c375b7f08f3c661dd1c97c6c87f56a7a0cf.pdf", "author": "Boyi Li;Rodolfo Corona;Karttikeya Mangalam;Catherine Chen;Daniel Flaherty;Serge Belongie;Kilian Q Weinberger;Jitendra Malik;Trevor Darrell;Dan Klein", "authorids": "~Boyi_Li1;~Rodolfo_Corona1;~Karttikeya_Mangalam1;~Catherine_Chen2;~Daniel_Flaherty1;~Serge_Belongie1;~Kilian_Q_Weinberger1;~Jitendra_Malik2;~Trevor_Darrell2;~Dan_Klein1", "gender": "F;;M;;;M;M;M;;", "homepage": "https://sites.google.com/site/boyilics/home;https://rcorona.github.io/;http://karttikeya.github.io/;;;https://di.ku.dk/english/staff/?pure=en%2Fpersons%2Fserge-belongie(0ce65383-3761-4b17-948a-83b461e371e2)%2Fpublications.html;http://www.cs.cornell.edu/~kilian/;https://people.eecs.berkeley.edu/~malik/;;http://people.eecs.berkeley.edu/~klein/", "dblp": ";212/0412;200/8205;;;http://dblp.uni-trier.de/pers/hd/b/Belongie:Serge_J=;88/4801;58/2944;;", "google_scholar": ";J2Z-ChoAAAAJ;2l1fWEoAAAAJ;;;ORr4XJYAAAAJ;jsxk8vsAAAAJ;oY9R5YQAAAAJ;;", "orcid": ";;;;;0000-0002-0388-5217;0009-0008-9313-7239;0000-0003-3695-1580;;", "linkedin": ";;;;daniel-flaherty-37a551164/;sergebelongie;;;;dan-klein/", "or_profile": "~Boyi_Li1;~Rodolfo_Corona1;~Karttikeya_Mangalam1;~Catherine_Chen2;~Daniel_Flaherty1;~Serge_Belongie1;~Kilian_Q_Weinberger1;~Jitendra_Malik2;~Trevor_Darrell2;~Dan_Klein1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;;;University of Copenhagen;ASAPP Inc.;University of California, Berkeley;;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;;;ku.dk;asapp.com;berkeley.edu;;berkeley.edu", "position": "Postdoc;PhD student;PhD student;;;Full Professor;Principal Researcher;Full Professor;;Full Professor", "bibtex": "@misc{\nli2024a,\ntitle={A Vision-free Baseline for Multimodal Grammar Induction},\nauthor={Boyi Li and Rodolfo Corona and Karttikeya Mangalam and Catherine Chen and Daniel Flaherty and Serge Belongie and Kilian Q Weinberger and Jitendra Malik and Trevor Darrell and Dan Klein},\nyear={2024},\nurl={https://openreview.net/forum?id=63r6HyqyRm}\n}", "github": "", "project": "", "reviewers": "5YMo;gdiQ;qYkX", "site": "https://openreview.net/forum?id=63r6HyqyRm", "pdf_size": 650980, "rating": "1;3;3", "confidence": "4;5;2", "soundness": "1;2;1", "contribution": "2;2;1", "presentation": "3;3;2", "wc_summary": "37;114;46", "wc_strengths": "13;38;18", "wc_weaknesses": "449;248;149", "wc_questions": "29;2;27", "wc_review": "528;402;240", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 1.3333333333333333, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 65.66666666666667, 34.373762603991366 ], "wc_strengths_avg": [ 23.0, 10.801234497346433 ], "wc_weaknesses_avg": [ 282.0, 124.8118584109699 ], "wc_questions_avg": [ 19.333333333333332, 12.283683848458853 ], "wc_review_avg": [ 390.0, 117.881296226331 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.18898223650461357, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2350422626928522004&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "University of California, Berkeley;University of Copenhagen;ASAPP Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.ku.dk;https://www.asapp.com", "aff_unique_abbr": "UC Berkeley;UCPH;ASAPP", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;Denmark" }, { "id": "648Mq6Neuo", "title": "Guide Your Anomaly with Language", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Anomaly detection is the task of identifying data that is different from what is considered normal.\nRecent advances in deep learning have improved the performance of anomaly detection and are used in many applications.\nHowever, it can be difficult to create a model that reflects the desired normality due to various issues, including lack of data and nuisance factors.\nTo address this, there have been studies that provide the desired knowledge to the model in various ways, but there are limitations, such as the need to understand deep learning. \nIn this work, we propose a method to guide the desired normality boundary in an image anomaly detection task using natural language.\nBy leveraging the robust generalization capabilities of the vision-language model, we present Language-Assisted Feature Transformation.\nLAFT transforms image features to suit the task through natural language using the shared image-text embedding space of CLIP.\nWe extensively analyze the effectiveness of the concept on a toy dataset and show that it works effectively on real-world datasets.", "keywords": "anomaly detection;vision-language model;language guidance;out-of-distribution detection", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "EungGu Yun;Heonjin Ha;Yeongwoo Nam;Bryan Dongik Lee", "authorids": "~EungGu_Yun1;~Heonjin_Ha1;~Yeongwoo_Nam2;~Bryan_Dongik_Lee1", "gender": "M;M;M;", "homepage": "https://yuneg11.github.io;https://github.com/Heonjin;;https://bryandlee.me", "dblp": ";;271/2576;", "google_scholar": "r7-847MAAAAJ;;8BYFKbYAAAAJ;", "orcid": "0000-0002-4648-1415;;;", "linkedin": "yuneg/;;;", "or_profile": "~EungGu_Yun1;~Heonjin_Ha1;~Yeongwoo_Nam2;~Bryan_Dongik_Lee1", "aff": "SAIGE;LG UPLUS;Saige Research;Saige", "aff_domain": "saige.ai;lguplus.co.kr;saigeresearch.ai;saige.ai", "position": "Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nyun2024guide,\ntitle={Guide Your Anomaly with Language},\nauthor={EungGu Yun and Heonjin Ha and Yeongwoo Nam and Bryan Dongik Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=648Mq6Neuo}\n}", "github": "", "project": "", "reviewers": "e9uy;AWhP;rVqL;h8da", "site": "https://openreview.net/forum?id=648Mq6Neuo", "pdf_size": 796590, "rating": "3;3;3;5", "confidence": "5;4;5;3", "soundness": "2;1;2;3", "contribution": "2;1;2;2", "presentation": "1;2;2;3", "wc_summary": "48;100;115;33", "wc_strengths": "64;23;47;39", "wc_weaknesses": "282;175;312;159", "wc_questions": "2;4;5;93", "wc_review": "396;302;479;324", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.0, 34.32928778754374 ], "wc_strengths_avg": [ 43.25, 14.771171246722448 ], "wc_weaknesses_avg": [ 232.0, 66.1021936095921 ], "wc_questions_avg": [ 26.0, 38.6975451417787 ], "wc_review_avg": [ 375.25, 69.25812226735576 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Q94gyLMwpPkJ:scholar.google.com/&scioq=Guide+Your+Anomaly+with+Language&hl=en&as_sdt=0,7", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "SAIGE;LG;Saige Research", "aff_unique_dep": ";LG UPLUS;", "aff_unique_url": ";https://www.lguplus.com;", "aff_unique_abbr": ";LG UPLUS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";South Korea" }, { "title": "Compressed Context Memory for Online Language Model Interaction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19404", "id": "64kSvC4iPg", "author_site": "Jang-Hyun Kim, Junyoung Yeom, Sangdoo Yun, Hyun Oh Song", "tldr": "", "abstract": "This paper presents a context key/value compression method for Transformer language models in online scenarios, where the context continually expands. As the context lengthens, the attention process demands increasing memory and computations, which in turn reduces the throughput of the language model. To address this challenge, we propose a compressed context memory system that continually compresses the accumulating attention key/value pairs into a compact memory space, facilitating language model inference in a limited memory space of computing environments. Our compression process involves integrating a lightweight conditional LoRA into the language model's forward pass during inference, without the need for fine-tuning the model's entire set of weights. We achieve efficient training by modeling the recursive compression process as a single parallelized forward computation. Through evaluations on conversation, personalization, and multi-task learning, we demonstrate that our approach achieves the performance level of a full context model with $5\\times$ smaller context memory size. We further demonstrate the applicability of our approach in a streaming setting with an unlimited context length, outperforming the sliding window approach. Codes are available at https://github.com/snu-mllab/context-memory.", "keywords": "context compression;efficient inference;natural language processing;transformer", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/449f3c12ca22ab9ada2f4e1f4bd472506f80ef3e.zip", "author": "Jang-Hyun Kim;Junyoung Yeom;Sangdoo Yun;Hyun Oh Song", "authorids": "~Jang-Hyun_Kim1;~Junyoung_Yeom1;~Sangdoo_Yun1;~Hyun_Oh_Song1", "gender": "M;M;M;M", "homepage": "https://yeomjy.com;https://sangdooyun.github.io/;https://mllab.snu.ac.kr/hyunoh;https://janghyun1230.github.io/", "dblp": ";124/3009.html;05/10781;", "google_scholar": "L0TXeqoAAAAJ;o0qtjzYAAAAJ;ScoZZPsAAAAJ;8JKsHJcAAAAJ", "orcid": ";;;", "linkedin": "yeomjy;;hyun-oh-song-5a39b03;", "or_profile": "~Junyoung_Yeom1;~Sangdoo_Yun1;~Hyun_Oh_Song1;~JangHyun_Kim1", "aff": "Seoul National University;NAVER;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;navercorp.com;snu.ac.kr;snu.ac.kr", "position": "Undergrad student;Research Scientist;Associate Professor;PhD student", "bibtex": "@inproceedings{\nkim2024compressed,\ntitle={Compressed Context Memory for Online Language Model Interaction},\nauthor={Jang-Hyun Kim and Junyoung Yeom and Sangdoo Yun and Hyun Oh Song},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=64kSvC4iPg}\n}", "github": "", "project": "", "reviewers": "UgoG;Svka;JvdE;JGpv", "pdf_size": 572156, "rating": "5;6;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "91;124;109;99", "wc_strengths": "34;57;153;107", "wc_weaknesses": "552;64;166;145", "wc_questions": "4;170;100;361", "wc_review": "681;415;528;712", "wc_reply_reviewers": "0;0;68;22", "wc_reply_authors": "1502;821;785;1292", "reply_reviewers": "0;0;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.75, 12.316147936753602 ], "wc_strengths_avg": [ 87.75, 45.996603135449035 ], "wc_weaknesses_avg": [ 231.75, 188.77814359718658 ], "wc_questions_avg": [ 158.75, 130.79635889427504 ], "wc_review_avg": [ 584.0, 119.88536190878351 ], "wc_reply_reviewers_avg": [ 22.5, 27.76238462380348 ], "wc_reply_authors_avg": [ 1100.0, 306.4041448805809 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=844548132790378571&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "openreview": "https://openreview.net/forum?id=64kSvC4iPg", "pdf": "https://openreview.net/pdf?id=64kSvC4iPg", "email": "snu.ac.kr;navercorp.com;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Seoul National University;NAVER Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com", "aff_unique_abbr": "SNU;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "64t9er38Zs", "title": "Learning Deep O($n$)-Equivariant Hyperspheres", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper presents an approach to learning (deep) $n$D features equivariant under orthogonal transformations, utilizing hyperspheres and regular $n$-simplexes. Our main contributions are theoretical and tackle major challenges in geometric deep learning such as equivariance and invariance under geometric transformations. Namely, we enrich the recently developed theory of steerable 3D spherical neurons---$\\textup{SO}(3)$-equivariant filter banks based on neurons with spherical decision surfaces---by extending said neurons to $n$D, which we call deep equivariant hyperspheres, and enabling their multi-layer construction. Using synthetic and real-world data in $n$D, we experimentally verify our theoretical contributions and find that our approach is superior to the baselines for small training data sets in all but one case.", "keywords": "spherical neurons;rotational equivariance;regular simplexes;geometric deep learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/f78ef0ff2009140bd2588ae97905477588036683.zip", "author": "Pavlo Melnyk;Michael Felsberg;M\u00e5rten Wadenb\u00e4ck;Andreas Robinson;Cuong Le", "authorids": "~Pavlo_Melnyk1;~Michael_Felsberg2;~M\u00e5rten_Wadenb\u00e4ck1;~Andreas_Robinson1;~Cuong_Le1", "gender": "M;;M;M;M", "homepage": "https://pavlomelnyk.com;https://liu.se/en/employee/micfe03;https://liu.se/en/employee/marwa32;;", "dblp": "232/3322;00/78;132/2319;158/5786;", "google_scholar": "RhThiI8AAAAJ;https://scholar.google.se/citations?hl=en;6WRQpCQAAAAJ;https://scholar.google.se/citations?user=_4Mg38AAAAAJ;aGtNlKgAAAAJ", "orcid": "0000-0002-6091-861X;0000-0002-6096-3648;0000-0002-0675-2794;;", "linkedin": ";https://linkedin.com/in/michael-felsberg-668a202;;;cuong-le-8811ba16b/", "or_profile": "~Pavlo_Melnyk1;~Michael_Felsberg2;~M\u00e5rten_Wadenb\u00e4ck1;~Andreas_Robinson1;~Cuong_Le1", "aff": "Link\u00f6ping University;Link\u00f6ping University;Link\u00f6ping University;Link\u00f6ping University;Link\u00f6ping University", "aff_domain": "liu.se;liu.se;liu.se;liu.se;liu.se", "position": "PhD student;Full Professor;Assistant Professor;Researcher;PhD student", "bibtex": "@misc{\nmelnyk2024learning,\ntitle={Learning Deep O(\\$n\\$)-Equivariant Hyperspheres},\nauthor={Pavlo Melnyk and Michael Felsberg and M{\\r{a}}rten Wadenb{\\\"a}ck and Andreas Robinson and Cuong Le},\nyear={2024},\nurl={https://openreview.net/forum?id=64t9er38Zs}\n}", "github": "", "project": "", "reviewers": "MJcE;gZ3c;sbi9;RNHY", "site": "https://openreview.net/forum?id=64t9er38Zs", "pdf_size": 539048, "rating": "5;6;6;6", "confidence": "3;4;2;5", "soundness": "4;3;3;3", "contribution": "2;2;2;3", "presentation": "1;3;2;3", "wc_summary": "57;69;169;107", "wc_strengths": "49;41;37;60", "wc_weaknesses": "390;176;394;239", "wc_questions": "132;17;101;197", "wc_review": "628;303;701;603", "wc_reply_reviewers": "301;60;0;0", "wc_reply_authors": "1645;525;333;510", "reply_reviewers": "1;1;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 100.5, 43.64344166080397 ], "wc_strengths_avg": [ 46.75, 8.78564169540279 ], "wc_weaknesses_avg": [ 299.75, 94.91147190935351 ], "wc_questions_avg": [ 111.75, 64.7509652437707 ], "wc_review_avg": [ 558.75, 151.9841685834416 ], "wc_reply_reviewers_avg": [ 90.25, 124.11763573320272 ], "wc_reply_authors_avg": [ 753.25, 520.3596712851602 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6118510587473492375&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Link\u00f6ping University", "aff_unique_dep": "", "aff_unique_url": "https://www.liu.se", "aff_unique_abbr": "LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Sweden" }, { "id": "658hDy9RwC", "title": "ASPEST: Bridging the Gap Between Active Learning and Selective Prediction", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Selective prediction aims to learn a reliable model that abstains from making predictions when uncertain. These predictions can then be deferred to a humans for further evaluation. As an everlasting challenge for machine learning, in many real-world scenarios, the distribution of test data is different from the training data. This results in more inaccurate predictions, and often increased dependence on humans, which can be difficult and expensive. Active learning aims to lower the overall labeling effort, and hence human dependence, by querying the most informative examples. Selective prediction and active learning have been approached from different angles, with the connection between them missing. In this work, we introduce a new learning paradigm, *active selective prediction*, which aims to query more informative samples from the shifted target domain while increasing accuracy and coverage. For this new paradigm, we propose a simple yet effective approach, ASPEST, that utilizes ensembles of model snapshots with self-training with their aggregated outputs as pseudo labels. Extensive experiments on numerous image, text and structured datasets, which suffer from domain shifts, demonstrate that ASPEST can significantly outperform prior work on selective prediction and active learning (e.g. on the MNIST$\\to$SVHN benchmark with the labeling budget of 100, ASPEST improves the AUACC metric from 79.36% to 88.84%) and achieves more optimal utilization of humans in the loop.", "keywords": "selective prediction;active learning;distribution shifts", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/a0dad8b1c94870ac1c0c2f014a248b8964163805.zip", "author": "Jiefeng Chen;Jinsung Yoon;Sayna Ebrahimi;Sercan O Arik;Somesh Jha;Tomas Pfister", "authorids": "~Jiefeng_Chen2;~Jinsung_Yoon1;~Sayna_Ebrahimi1;~Sercan_O_Arik1;~Somesh_Jha1;~Tomas_Pfister1", "gender": "M;M;F;M;M;M", "homepage": "https://jfc43.github.io/;https://sites.google.com/corp/view/jinsungyoon;https://saynaebrahimi.github.io/;https://www.sercanarik.com/;;http://tomas.pfister.fi", "dblp": "199/3381;173/5409.html;207/7584;;j/SomeshJha;14/8360", "google_scholar": "5mOfQfAAAAAJ;kiFd6A8AAAAJ;wRyjJfMAAAAJ;;BaI7l8QAAAAJ;ahSpJOAAAAAJ", "orcid": ";;;0000-0001-6333-1729;;0009-0004-4088-8718", "linkedin": "jiefeng-chen-aa1769122/;jinsung-yoon-bb7751b8;saynaebrahimi/;;;", "or_profile": "~Jiefeng_Chen2;~Jinsung_Yoon1;~Sayna_Ebrahimi1;~Sercan_O_Arik1;~Somesh_Jha1;~Tomas_Pfister1", "aff": "Amazon;Google;Google;Google;Department of Computer Science, University of Wisconsin, Madison;Google", "aff_domain": "amazon.com;google.com;google.com;google.com;cs.wisc.edu;google.com", "position": "Applied Scientist;Research Scientist;Research Scientist;Research Scientist;Full Professor;Head of Research @ Cloud AI", "bibtex": "@misc{\nchen2024aspest,\ntitle={{ASPEST}: Bridging the Gap Between Active Learning and Selective Prediction},\nauthor={Jiefeng Chen and Jinsung Yoon and Sayna Ebrahimi and Sercan O Arik and Somesh Jha and Tomas Pfister},\nyear={2024},\nurl={https://openreview.net/forum?id=658hDy9RwC}\n}", "github": "", "project": "", "reviewers": "ZEcg;JH3X;YVeK;Fmse", "site": "https://openreview.net/forum?id=658hDy9RwC", "pdf_size": 793971, "rating": "3;3;5;6", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "contribution": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "86;82;216;131", "wc_strengths": "61;27;73;47", "wc_weaknesses": "62;147;302;38", "wc_questions": "38;4;139;59", "wc_review": "247;260;730;275", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 128.75, 53.92297747713863 ], "wc_strengths_avg": [ 52.0, 17.11724276862369 ], "wc_weaknesses_avg": [ 137.25, 103.38127248201194 ], "wc_questions_avg": [ 60.0, 49.65380146574882 ], "wc_review_avg": [ 378.0, 203.4686708070803 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3280544099817102161&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "Amazon;Google;University of Wisconsin-Madison", "aff_unique_dep": "Amazon.com, Inc.;Google;Department of Computer Science", "aff_unique_url": "https://www.amazon.com;https://www.google.com;https://www.wisc.edu", "aff_unique_abbr": "Amazon;Google;UW-Madison", "aff_campus_unique_index": "1;1;1;2;1", "aff_campus_unique": ";Mountain View;Madison", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Plug-and-Play Posterior Sampling under Mismatched Measurement and Prior Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19403", "id": "66arKkGiFy", "author_site": "Marien Renaud, Jiaming Liu, Valentin De Bortoli, Andres Almansa, Ulugbek Kamilov", "tldr": "", "abstract": "Posterior sampling has been shown to be a powerful Bayesian approach for solving imaging inverse problems. The recent plug-and-play unadjusted Langevin algorithm (PnP-ULA) has emerged as a promising method for Monte Carlo sampling and minimum mean squared error (MMSE) estimation by combining physical measurement models with deep-learning priors specified using image denoisers. However, the intricate relationship between the sampling distribution of PnP-ULA and the mismatched data-fidelity and denoiser has not been theoretically analyzed. We address this gap by proposing a posterior-$L_2$ pseudometric and using it to quantify an explicit error bound for PnP-ULA under mismatched posterior distribution. We numerically validate our theory on several inverse problems such as sampling from Gaussian mixture models and image deblurring. Our results suggest that the sensitivity of the sampling distribution of PnP-ULA to a mismatch in the measurement model and the denoiser can be precisely characterized.", "keywords": "Inverse problems;plug-and-play priors;posterior sampling;unadjusted Langevin algorithm", "primary_area": "optimization", "supplementary_material": "/attachment/bc84ec14339946dbddcf750e6d2f615cf202b8d3.zip", "author": "Marien Renaud;Jiaming Liu;Valentin De Bortoli;Andres Almansa;Ulugbek Kamilov", "authorids": "~Marien_Renaud1;~Jiaming_Liu3;~Valentin_De_Bortoli1;~Andres_Almansa1;~Ulugbek_Kamilov1", "gender": ";M;;M;Not Specified", "homepage": ";https://jiamingliu-jeremy.github.io/;https://vdeborto.github.io/;http://up5.fr/almansa;https://ukmlv.github.io", "dblp": ";33/5934-1;224/9338;80/6581;73/9223", "google_scholar": ";KEucBooAAAAJ;;e1pFlV0AAAAJ;https://scholar.google.com.tw/citations?user=3qYUSDwAAAAJ", "orcid": ";0000-0002-1042-4443;;0000-0001-8196-1329;0000-0001-6770-3278", "linkedin": ";;;;", "or_profile": "~Marien_Renaud1;~Jiaming_Liu3;~Valentin_De_Bortoli1;~Andres_Almansa1;~Ulugbek_Kamilov1", "aff": ";Washington University, St. Louis;University of Oxford;Universit\u00e9 Paris Cit\u00e9;Google", "aff_domain": ";wustl.edu;ox.ac.uk;parisdescartes.fr;google.com", "position": ";PhD student;Postdoc;Principal Researcher;Visiting Faculty Researcher", "bibtex": "@inproceedings{\nrenaud2024plugandplay,\ntitle={Plug-and-Play Posterior Sampling under Mismatched Measurement and Prior Models},\nauthor={Marien Renaud and Jiaming Liu and Valentin De Bortoli and Andres Almansa and Ulugbek Kamilov},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=66arKkGiFy}\n}", "github": "", "project": "", "reviewers": "Uuqz;WmLx;rc8y;eQV2", "pdf_size": 10303544, "rating": "5;6;6;6", "confidence": "4;3;2;3", "soundness": "2;3;3;4", "contribution": "2;3;3;2", "presentation": "2;3;3;2", "wc_summary": "71;71;44;70", "wc_strengths": "41;78;41;51", "wc_weaknesses": "274;282;92;114", "wc_questions": "5;124;15;133", "wc_review": "391;555;192;368", "wc_reply_reviewers": "0;16;0;41", "wc_reply_authors": "1352;865;773;887", "reply_reviewers": "0;1;0;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 64.0, 11.554220008291344 ], "wc_strengths_avg": [ 52.75, 15.138939857202683 ], "wc_weaknesses_avg": [ 190.5, 87.8905569444181 ], "wc_questions_avg": [ 69.25, 59.44062163201189 ], "wc_review_avg": [ 376.5, 128.63222768808757 ], "wc_reply_reviewers_avg": [ 14.25, 16.768646337734005 ], "wc_reply_authors_avg": [ 969.25, 225.08040230104442 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17100265567993470327&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=66arKkGiFy", "pdf": "https://openreview.net/pdf?id=66arKkGiFy", "email": ";wustl.edu;ox.ac.uk;parisdescartes.fr;google.com", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Washington University in St. Louis;University of Oxford;Universit\u00e9 Paris Cit\u00e9;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://wustl.edu;https://www.ox.ac.uk;https://www.universite-paris.fr;https://www.google.com", "aff_unique_abbr": "WUSTL;Oxford;UPC;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "St. Louis;;Mountain View", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United States;United Kingdom;France" }, { "id": "66e22qCU5i", "title": "Certified Copy: A Resistant Backdoor Attack", "track": "main", "status": "Reject", "tldr": "", "abstract": "The robustness, security, and safety of artificial intelligence systems have become a major concern in recent studies. One of the most significant threats to deep learning models is the backdoor attack, which has been thoroughly investigated. Despite numerous backdoor detection mechanisms developed for computer vision systems, our research shows that even simple backdoor attacks can bypass these defenses if the backdoor planting process and poisoning data are carefully crafted. To evade existing backdoor detection systems, we propose a new backdoored model called Certified Copy, which is trained using a novel cost function. This cost function controls the activation of neurons in the model to ensure that the activation generated by clean inputs is similar to that produced by poisoned input data. The model copies the corresponding clean model during training in all situations except when fed with poisoned inputs. We tested our model against six state-of-the-art defense mechanisms, including Neural Cleanse, TAO, ABS, TABOR, NNoculation, and STRIP. The results showed that most of these methods cannot detect the backdoored model. We conclude that deep learning models have a vast hypothesis space, which can be exploited by malicious attackers to hide malicious activation of neurons using poisoned data, leading to undetected backdoored models.", "keywords": "Backdoor attack;Deep Neural Network;Detection methods", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Omid Rajabi Rostami;Rui Ning;Chunsheng Xin;Jin-Hee Cho;Jiang Li;Hongyi Wu", "authorids": "~Omid_Rajabi_Rostami1;~Rui_Ning2;~Chunsheng_Xin1;~Jin-Hee_Cho1;~Jiang_Li3;~Hongyi_Wu1", "gender": "M;;M;F;M;M", "homepage": "https://github.com/omidrajabi6868;https://www.lions.odu.edu/~rning/;https://ww1.odu.edu/eng/programs/ccni/people;https://people.cs.vt.edu/~jicho/;https://fs.wp.odu.edu/jli/;http://www.u.arizona.edu/~mhwu/", "dblp": ";211/2892;06/6463;;41/3068-1;78/1033", "google_scholar": ";oN4NttEAAAAJ;TaP2oq8AAAAJ;wToVkEUAAAAJ;https://scholar.google.com/citations?hl=en;rFLksrwAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;jiang-li-416b0a6/;hongyi-\u201cmichael\u201d-wu-4b51a715/", "or_profile": "~Omid_Rajabi_Rostami1;~Rui_Ning2;~Chunsheng_Xin1;~Jin-Hee_Cho1;~Jiang_Li3;~Hongyi_Wu1", "aff": "Old Dominion University;Old Dominion University;Old Dominion University;Virginia Polytechnic Institute and State University;Old Dominion University;University of Arizona", "aff_domain": "odu.edu;odu.edu;odu.edu;vt.edu;odu.edu;arizona.edu", "position": "PhD student;Assistant Professor;Full Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nrostami2024certified,\ntitle={Certified Copy: A Resistant Backdoor Attack},\nauthor={Omid Rajabi Rostami and Rui Ning and Chunsheng Xin and Jin-Hee Cho and Jiang Li and Hongyi Wu},\nyear={2024},\nurl={https://openreview.net/forum?id=66e22qCU5i}\n}", "github": "", "project": "", "reviewers": "Tjm9;aXtp;GNhR;eu6h;pLZf;Xd6Y;xbMP", "site": "https://openreview.net/forum?id=66e22qCU5i", "pdf_size": 999185, "rating": "1;3;3;3;3;3;5", "confidence": "4;4;4;4;5;3;4", "soundness": "2;3;3;3;3;2;2", "contribution": "1;2;2;2;1;2;3", "presentation": "2;2;3;3;1;3;3", "wc_summary": "27;88;62;75;16;90;81", "wc_strengths": "10;49;24;45;16;42;32", "wc_weaknesses": "104;149;285;122;46;298;23", "wc_questions": "8;80;47;23;12;48;120", "wc_review": "149;366;418;265;90;478;256", "wc_reply_reviewers": "0;0;0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0;0;0", "reply_reviewers": "0;0;0;0;0;0;0", "reply_authors": "0;0;0;0;0;0;0", "rating_avg": [ 3.0, 1.0690449676496976 ], "confidence_avg": [ 4.0, 0.5345224838248488 ], "soundness_avg": [ 2.5714285714285716, 0.4948716593053935 ], "contribution_avg": [ 1.8571428571428572, 0.6388765649999399 ], "presentation_avg": [ 2.4285714285714284, 0.7284313590846836 ], "wc_summary_avg": [ 62.714285714285715, 27.5873381189595 ], "wc_strengths_avg": [ 31.142857142857142, 13.922526160646191 ], "wc_weaknesses_avg": [ 146.71428571428572, 99.92813744417148 ], "wc_questions_avg": [ 48.285714285714285, 37.281608969395876 ], "wc_review_avg": [ 288.85714285714283, 130.46995118902018 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2tZ5pzue08kJ:scholar.google.com/&scioq=Certified+Copy:+A+Resistant+Backdoor+Attack&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Old Dominion University;Virginia Tech;University of Arizona", "aff_unique_dep": ";;", "aff_unique_url": "https://www.odu.edu;https://www.vt.edu;https://www.arizona.edu", "aff_unique_abbr": "ODU;VT;UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "66wQM45W28", "title": "CEDNet: A Cascade Encoder-Decoder Network for Dense Prediction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multi-scale features are essential for dense prediction tasks, such as object detection, instance segmentation, and semantic segmentation. The prevailing methods usually utilize a classification backbone to extract multi-scale features and then fuse these features using a lightweight module (e.g. the fusion module in FPN and BiFPN, two typical object detection methods). However, as these methods allocate most computational resources to the classification backbone, the multi-scale feature fusion in these methods is delayed, which may lead to inadequate feature fusion. While some methods perform feature fusion from early stages, they either fail to fully leverage high-level features to guide low-level feature learning or have complex structures, resulting in sub-optimal performance. We propose a streamlined cascade encoder-decoder network, dubbed CEDNet, tailored for dense prediction tasks. All stages in CEDNet share the same encoder-decoder structure and perform multi-scale feature fusion within the decoder. A hallmark of CEDNet is its ability to incorporate high-level features from early stages to guide low-level feature learning in subsequent stages, thereby enhancing the effectiveness of multi-scale feature fusion. We explored three well-known encoder-decoder structures: Hourglass, UNet, and FPN. When integrated into CEDNet, they performed much better than traditional methods that use a pre-designed classification backbone combined with a lightweight fusion module. Extensive experiments on object detection, instance segmentation, and semantic segmentation demonstrated the effectiveness of our method. The code will be made publicly available.", "keywords": "dense prediction tasks; encoder-decoder structure", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/037d1625ab4069131a37f36f47f579517f34b318.pdf", "author": "Gang Zhang;Ziyi Li;Chufeng Tang;Jianmin Li;Xiaolin Hu", "authorids": "~Gang_Zhang3;~Ziyi_Li3;~Chufeng_Tang1;~Jianmin_Li1;~Xiaolin_Hu1", "gender": "Not Specified;Not Specified;M;M;M", "homepage": ";https://leezeeyee.com;http://chufengt.github.io/;;http://www.xlhu.cn/", "dblp": ";;https://dblp.uni-trier.de/pid/250/9561;71/5930-1;60/6028-1", "google_scholar": "0QvcF6sAAAAJ;;GS1NtlQAAAAJ;PeF1aPkAAAAJ;PksdgoUAAAAJ", "orcid": ";;0000-0002-5794-1616;;0000-0002-4907-7354", "linkedin": ";;;;", "or_profile": "~Gang_Zhang3;~Ziyi_Li3;~Chufeng_Tang1;~Jianmin_Li1;~Xiaolin_Hu1", "aff": "Tsinghua University;Huazhong University of Science and Technology;;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;hust.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;MS student;;Associate Professor;Associate Professor", "bibtex": "@misc{\nzhang2024cednet,\ntitle={{CEDN}et: A Cascade Encoder-Decoder Network for Dense Prediction},\nauthor={Gang Zhang and Ziyi Li and Chufeng Tang and Jianmin Li and Xiaolin Hu},\nyear={2024},\nurl={https://openreview.net/forum?id=66wQM45W28}\n}", "github": "", "project": "", "reviewers": "XJYk;fhWB;Wmqn;sm4X", "site": "https://openreview.net/forum?id=66wQM45W28", "pdf_size": 257233, "rating": "3;3;5;5", "confidence": "5;5;4;5", "soundness": "3;2;2;3", "contribution": "1;2;2;3", "presentation": "2;3;2;3", "wc_summary": "39;58;29;61", "wc_strengths": "33;38;29;85", "wc_weaknesses": "368;244;83;196", "wc_questions": "5;183;5;38", "wc_review": "445;523;146;380", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 46.75, 13.273563952458284 ], "wc_strengths_avg": [ 46.25, 22.598395960775623 ], "wc_weaknesses_avg": [ 222.75, 102.21882165237477 ], "wc_questions_avg": [ 57.75, 73.5573755649289 ], "wc_review_avg": [ 373.5, 140.76665088010014 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10640832804271131810&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tsinghua University;Huazhong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.hust.edu.cn", "aff_unique_abbr": "THU;HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "67t4ikhlvP", "title": "Agent-Centric State Discovery for Finite-Memory POMDPs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Discovering an informative, or agent-centric, state representation that encodes only the relevant information while discarding the irrelevant is a key challenge towards scaling reinforcement learning algorithms and efficiently applying them to downstream tasks. Prior works studied this problem in high-dimensional Markovian environments, when the current observation may be a complex object but is sufficient to decode the informative state. In this work, we consider the problem of discovering the agent-centric state in the more challenging high-dimensional non-Markovian setting, when the state can be decoded from a sequence of past observations. We establish that generalized inverse models can be adapted for learning agent-centric state representation for this task. Our results include asymptotic theory as well as negative results for alternative intuitive algorithms, such as encoding with only a forward-running sequence model. We complement these findings with a thorough empirical study on the agent-centric state discovery abilities of the different alternatives we put forward. Particularly notable is our analysis of past actions, where we show that these can be a double-edged sword: making the algorithms more successful when used correctly and causing dramatic failure when used incorrectly.", "keywords": "pomdp;finitememory;exobmdp;block mdp;agent-centric states", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/e411a108ca705ec97c4c30b8ac422cdfc9ae06ec.zip", "author": "Lili Wu;Ben Evans;Riashat Islam;Raihan Seraj;Yonathan Efroni;Alex Lamb", "authorids": "~Lili_Wu1;~Ben_Evans1;~Riashat_Islam1;~Raihan_Seraj1;~Yonathan_Efroni2;~Alex_Lamb1", "gender": ";M;M;M;M;M", "homepage": ";https://riashat.github.io/;http://raihan-seraj.github.io/;https://sites.google.com/view/yonathan-efroni/;;https://bennevans.github.io/", "dblp": "91/1716;198/0459;;215/3475;;87/9175", "google_scholar": "x8fnPxAAAAAJ;https://scholar.google.ca/citations?user=2_4Rs44AAAAJ;https://scholar.google.ca/citations?user=gtWzuL0AAAAJ;pfTInEgAAAAJ;https://scholar.google.ca/citations?user=BFzFy1YAAAAJ;JPQom2sAAAAJ", "orcid": ";;;;;", "linkedin": "lili-wu-71456674;;http://linkedin.com/in/raihan-seraj/;;;bnevans/", "or_profile": "~Lili_Wu1;~Riashat_Islam1;~Raihan_Seraj1;~Yonathan_Efroni2;~Alex_Matthew_Lamb1;~Benjamin_Evans1", "aff": "Microsoft Research NYC;Saudi Data and AI Authority, Saudi Data and AI Authority;McGill University;Meta;;New York University", "aff_domain": "microsoft.com;sdaia.gov.sa;mcgill.ca;meta.com;;nyu.edu", "position": "Data and applied scientist;Researcher;PhD student;Researcher;;PhD student", "bibtex": "@misc{\nwu2024agentcentric,\ntitle={Agent-Centric State Discovery for Finite-Memory {POMDP}s},\nauthor={Lili Wu and Ben Evans and Riashat Islam and Raihan Seraj and Yonathan Efroni and Alex Lamb},\nyear={2024},\nurl={https://openreview.net/forum?id=67t4ikhlvP}\n}", "github": "", "project": "", "reviewers": "z5xp;38V6;HgfR;Yoch", "site": "https://openreview.net/forum?id=67t4ikhlvP", "pdf_size": 2673396, "rating": "1;1;5;8", "confidence": "4;3;3;3", "soundness": "2;2;3;4", "contribution": "1;1;3;3", "presentation": "2;1;2;3", "wc_summary": "113;52;108;72", "wc_strengths": "103;6;56;33", "wc_weaknesses": "747;93;239;330", "wc_questions": "54;109;225;131", "wc_review": "1017;260;628;566", "wc_reply_reviewers": "724;0;0;41", "wc_reply_authors": "908;281;380;329", "reply_reviewers": "2;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 3.75, 2.947456530637899 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.0, 1.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.25, 25.321680433968044 ], "wc_strengths_avg": [ 49.5, 35.59845502265512 ], "wc_weaknesses_avg": [ 352.25, 243.08370471917692 ], "wc_questions_avg": [ 129.75, 61.730766235322236 ], "wc_review_avg": [ 617.75, 269.33656918435713 ], "wc_reply_reviewers_avg": [ 191.25, 308.0384513335957 ], "wc_reply_authors_avg": [ 474.5, 252.71772791001425 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.538672317561831, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16438357080989642346&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Microsoft;Saudi Data and AI Authority;McGill University;Meta;New York University", "aff_unique_dep": "Microsoft Research;;;Meta Platforms, Inc.;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/microsoft-research-new-york-city;https://sdaia.gov.sa;https://www.mcgill.ca;https://meta.com;https://www.nyu.edu", "aff_unique_abbr": "MSR NYC;SDAIA;McGill;Meta;NYU", "aff_campus_unique_index": "0", "aff_campus_unique": "New York City;", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "United States;Saudi Arabia;Canada" }, { "id": "68k0KcHFrW", "title": "Stochastic Unrolled Federated Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Algorithm unrolling has emerged as a learning-based optimization paradigm that unfolds truncated iterative algorithms in trainable neural-network optimizers. We introduce Stochastic UnRolled Federated learning (SURF), a method that expands algorithm unrolling to a federated learning scenario. Our proposed method tackles two challenges of this expansion, namely the need to feed whole datasets to the unrolled optimizers to find a descent direction and the decentralized nature of federated learning. We circumvent the former challenge by feeding stochastic mini-batches to each unrolled layer and imposing descent constraints to mitigate the randomness induced by using mini-batches. We address the latter challenge by unfolding the distributed gradient descent (DGD) algorithm in a graph neural network (GNN)-based unrolled architecture, which preserves the decentralized nature of training in federated learning. We theoretically prove that our proposed unrolled optimizer converges to a near-optimal region infinitely often. Through extensive numerical experiments, we also demonstrate the effectiveness of the proposed framework in collaborative training of image classifiers.", "keywords": "Algorithm Unrolling;Learning to Optimize;GNNs;Federated Learning", "primary_area": "optimization", "supplementary_material": "", "author": "Samar Hadou;Navid NaderiAlizadeh;Alejandro Ribeiro", "authorids": "~Samar_Hadou1;~Navid_NaderiAlizadeh2;~Alejandro_Ribeiro1", "gender": "F;M;M", "homepage": ";https://alelab.seas.upenn.edu;https://www.seas.upenn.edu/~nnaderi/", "dblp": "304/2692.html;32/15;126/5064.html", "google_scholar": "https://scholar.google.com.eg/citations?user=FwL_RdEAAAAJ;7mrPM4kAAAAJ;roVp_WwAAAAJ", "orcid": ";0000-0003-4230-9906;0000-0002-4891-6726", "linkedin": ";;navid-naderi-alizadeh/", "or_profile": "~Samar_Hadou1;~Alejandro_Ribeiro1;~Navid_Naderializadeh1", "aff": "University of Pennsylvania;University of Pennsylvania;Duke University", "aff_domain": "seas.upenn.edu;upenn.edu;duke.edu", "position": "PhD student;Full Professor;Assistant Research Professor", "bibtex": "@misc{\nhadou2024stochastic,\ntitle={Stochastic Unrolled Federated Learning},\nauthor={Samar Hadou and Navid NaderiAlizadeh and Alejandro Ribeiro},\nyear={2024},\nurl={https://openreview.net/forum?id=68k0KcHFrW}\n}", "github": "", "project": "", "reviewers": "94iK;DuLq;WDdi", "site": "https://openreview.net/forum?id=68k0KcHFrW", "pdf_size": 509193, "rating": "5;6;6", "confidence": "4;4;2", "soundness": "4;3;3", "contribution": "3;2;2", "presentation": "4;3;3", "wc_summary": "65;134;49", "wc_strengths": "59;150;84", "wc_weaknesses": "219;305;87", "wc_questions": "9;68;84", "wc_review": "352;657;304", "wc_reply_reviewers": "170;0;0", "wc_reply_authors": "1511;605;355", "reply_reviewers": "1;0;0", "reply_authors": "4;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 82.66666666666667, 36.88119062912994 ], "wc_strengths_avg": [ 97.66666666666667, 38.38691906829143 ], "wc_weaknesses_avg": [ 203.66666666666666, 89.65613321029534 ], "wc_questions_avg": [ 53.666666666666664, 32.25247621845836 ], "wc_review_avg": [ 437.6666666666667, 156.32515970103967 ], "wc_reply_reviewers_avg": [ 56.666666666666664, 80.13876853447539 ], "wc_reply_authors_avg": [ 823.6666666666666, 496.61878963871493 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15721589579304828873&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Pennsylvania;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.duke.edu", "aff_unique_abbr": "UPenn;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "69eQ7n4mam", "title": "Autoencoders with Intrinsic Dimension Constraints for Learning Low Dimensional Image Representations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Autoencoders have achieved great success in various computer vision applications. The autoencoder learns appropriate low-dimensional image representations through the self-supervised paradigm, i.e., reconstruction. Existing studies mainly focus on minimizing the pixel-level reconstruction error of an image, while mostly ignoring the preservation of the property that reveals the manifold structure of data, such as Intrinsic Dimension (ID). The learning process of most autoencoders is observed to involve dimensionality compression first, and then dimensionality expansion, which plays a crucial role in acquiring low-dimensional image representations. Motivated by the important role of ID, in this work, we propose a novel deep representation learning approach with autoencoder, which incorporates regularization of the global and local ID constraints into the reconstruction of data representations. This approach not only preserves the global manifold structure of the whole dataset but also maintains the local manifold structure of the feature maps of each point, which makes the learned low-dimensional features more discriminant and improves the performance of the downstream tasks. To the best of our knowledge, existing works are rare and limited in exploiting both global and local ID invariant properties on the regularization of DNNs. Numerical experimental results on benchmark datasets (Extended Yale B, Caltech101 and ImageNet) show that the resulting regularized learning models achieve better discriminative representations for downstream tasks including image classification and clustering.", "keywords": "Self-supervised Representation Learning;Intrinsic Dimension", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/280a5184344e7d6ea7b4298a7f9bf96cde229757.pdf", "author": "Jianzhang Zheng;Hao Shen;Xuan Tang;Mingsong Chen;peidong liang;Xian Wei", "authorids": "~Jianzhang_Zheng1;~Hao_Shen1;~Xuan_Tang3;~Mingsong_Chen1;~peidong_liang1;~Xian_Wei1", "gender": ";M;F;M;M;M", "homepage": ";;https://faculty.ecnu.edu.cn/_s15/tx2_21642/main.psp;https://faculty.ecnu.edu.cn/_s43/cms/main.psp;http://hitqz.com;https://www.researchgate.net/", "dblp": ";26/2210-2;;95/573.html;;139/0725", "google_scholar": ";Kce9W-8AAAAJ;mFj-I10AAAAJ;93A6b7YAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-3922-0989;;", "linkedin": ";;;;;", "or_profile": "~Jianzhang_Zheng1;~Hao_Shen1;~Xuan_Tang3;~Mingsong_Chen1;~peidong_liang1;~Xian_Wei1", "aff": ";Fortiss GmbH;East China Normal University;East China Normal University;Fujian(Quanzhou)Advanced Manufacturing Institute;East China Normal University", "aff_domain": ";fortiss.org;ecnu.edu.cn;ecnu.edu.cn;hitqz.com;ecnu.edu.cn", "position": ";Principal Researcher;Associate Professor;Full Professor;Researcher;Principal Researcher", "bibtex": "@misc{\nzheng2024autoencoders,\ntitle={Autoencoders with Intrinsic Dimension Constraints for Learning Low Dimensional Image Representations},\nauthor={Jianzhang Zheng and Hao Shen and Xuan Tang and Mingsong Chen and peidong liang and Xian Wei},\nyear={2024},\nurl={https://openreview.net/forum?id=69eQ7n4mam}\n}", "github": "", "project": "", "reviewers": "dwAb;mtEJ;XqLA;3713;UAX2", "site": "https://openreview.net/forum?id=69eQ7n4mam", "pdf_size": 4120403, "rating": "3;3;5;5;6", "confidence": "4;4;3;4;4", "soundness": "3;2;3;2;3", "contribution": "1;1;2;2;3", "presentation": "2;2;3;3;3", "wc_summary": "191;33;83;29;156", "wc_strengths": "42;19;89;34;137", "wc_weaknesses": "327;123;510;29;127", "wc_questions": "64;39;325;269;5", "wc_review": "624;214;1007;361;425", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.4, 1.2 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 1.8, 0.7483314773547883 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 98.4, 65.15090175891658 ], "wc_strengths_avg": [ 64.2, 43.27308632394967 ], "wc_weaknesses_avg": [ 223.2, 173.2263259438357 ], "wc_questions_avg": [ 140.4, 130.43557796858954 ], "wc_review_avg": [ 526.2, 274.12945846807486 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JEQt4nYzxHkJ:scholar.google.com/&scioq=Autoencoders+with+Intrinsic+Dimension+Constraints+for+Learning+Low+Dimensional+Image+Representations&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "fortiss GmbH;East China Normal University;Fujian Advanced Manufacturing Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fortiss.org;http://www.ecnu.edu.cn;", "aff_unique_abbr": "Fortiss;ECNU;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Quanzhou", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Germany;China" }, { "title": "DreamClean: Restoring Clean Image Using Deep Diffusion Prior", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19402", "id": "6ALuy19mPa", "author_site": "Jie Xiao, Ruili Feng, Zhiheng Liu, Zhantao Yang, Han Zhang, Yurui Zhu, Xueyang Fu, Kai Zhu, Yu Liu, Zheng-Jun Zha", "tldr": "", "abstract": "Image restoration poses a garners substantial interest due to the exponential surge in demands for recovering high-quality images from diverse mobile camera devices, adverse lighting conditions, suboptimal shooting environments, and frequent image compression for efficient transmission purposes. Yet this problem gathers significant challenges as people are blind to the type of restoration the images suffer, which, is usually the case in real-day scenarios and is most urgent to solve for this field. Current research, however, heavily relies on prior knowledge of the restoration type, either explicitly through rules or implicitly through the availability of degraded-clean image pairs to define the restoration process, and consumes considerable effort to collect image pairs of vast degradation types. This paper introduces DreamClean, a training-free method that needs no degradation prior knowledge but yields high-fidelity and generality towards various types of image degradation. DreamClean embeds the degraded image back to the latent of pre-trained diffusion models and re-sample it through a carefully designed diffusion process that mimics those generating clean images. Thanks to the rich image prior in diffusion models and our novel Variance Preservation Sampling (VPS) technique, DreamClean manages to handle various different degradation types at one time and reaches far more satisfied final quality than previous competitors. DreamClean relies on elegant theoretical supports to assure its convergence to clean image when VPS has appropriate parameters, and also enjoys superior experimental performance over various challenging tasks that could be overwhelming for previous methods when degradation prior is unavailable.", "keywords": "Image Restoration;Diffusion Models;Image Prior;Blind Restoration;Super-Resolution;Image Denoising;Colorization;JPEG Artifacts Correction", "primary_area": "generative models", "supplementary_material": "/attachment/090bf70903cc485c4c66bf265ac0de64a6c05823.pdf", "author": "Jie Xiao;Ruili Feng;Han Zhang;Zhiheng Liu;Zhantao Yang;Yurui Zhu;Xueyang Fu;Kai Zhu;Yu Liu;Zheng-Jun Zha", "authorids": "~Jie_Xiao3;~Ruili_Feng1;~Han_Zhang16;~Zhiheng_Liu1;~Zhantao_Yang1;~Yurui_Zhu1;~Xueyang_Fu1;~Kai_Zhu4;~Yu_Liu23;~Zheng-Jun_Zha2", "gender": "M;;M;M;M;M;;M;M;M", "homepage": "https://jiexiaou.github.io/;https://github.com/RuiLiFeng;https://github.com/bibona;https://Johanan528.github.io;;;;https://github.com/liuyuyuil;;https://xueyangfu.github.io/", "dblp": "15/3437-2;20/9594;26/4189-10;;285/8489.html;282/6550;75/4078-4;97/2274-63;23/1818;136/9389", "google_scholar": "https://scholar.google.com/citations?hl=en;;;LT5JwlcAAAAJ;Fz3X5FwAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;8zksQb4AAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-5677-270X;;;;0000-0003-2765-295X;;;;;0000-0001-8036-4071", "linkedin": ";;;;;;;;;", "or_profile": "~Jie_Xiao3;~Ruili_Feng1;~Han_Zhang16;~Zhiheng_Liu1;~Zhantao_Yang1;~Yurui_Zhu1;~Kai_Zhu4;~Yu_Liu23;~Zheng-Jun_Zha2;~Xueyang_Fu2", "aff": "University of Science and Technology of China;University of Science and Technology of China;Shanghai Jiaotong University;University of Science and Technology of China;Shanghai Jiaotong University;University of Science and Technology of China;University of Science and Technology of China;Alibaba Group;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;mail.ustc.edu.cn;sjtu.edu.cn;ustc.edu.cn;sjtu.edu.cn;ustc.edu.cn;ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;PhD student;PhD student;MS student;PhD student;PhD student;Postdoc;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nxiao2024dreamclean,\ntitle={DreamClean: Restoring Clean Image Using Deep Diffusion Prior},\nauthor={Jie Xiao and Ruili Feng and Han Zhang and Zhiheng Liu and Zhantao Yang and Yurui Zhu and Xueyang Fu and Kai Zhu and Yu Liu and Zheng-Jun Zha},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6ALuy19mPa}\n}", "github": "", "project": "", "reviewers": "HBur;aVXX;6gxh;NQCS", "pdf_size": 25702068, "rating": "6;6;8;8", "confidence": "4;5;3;5", "soundness": "2;3;3;4", "contribution": "3;3;4;4", "presentation": "3;3;3;3", "wc_summary": "91;41;193;101", "wc_strengths": "67;15;130;187", "wc_weaknesses": "137;55;115;76", "wc_questions": "101;3;55;22", "wc_review": "396;114;493;386", "wc_reply_reviewers": "78;0;11;0", "wc_reply_authors": "1532;267;421;257", "reply_reviewers": "1;0;1;0", "reply_authors": "4;2;1;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 106.5, 54.87030162118666 ], "wc_strengths_avg": [ 99.75, 64.77412677913921 ], "wc_weaknesses_avg": [ 95.75, 32.104322138927024 ], "wc_questions_avg": [ 45.25, 37.1777823437601 ], "wc_review_avg": [ 347.25, 141.0024379221863 ], "wc_reply_reviewers_avg": [ 22.25, 32.4990384473141 ], "wc_reply_authors_avg": [ 619.25, 530.9709855538248 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3194860137127022818&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=6ALuy19mPa", "pdf": "https://openreview.net/pdf?id=6ALuy19mPa", "email": "mail.ustc.edu.cn;mail.ustc.edu.cn;sjtu.edu.cn;ustc.edu.cn;sjtu.edu.cn;ustc.edu.cn;ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;ustc.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;0;1;0;0;2;0;0", "aff_unique_norm": "University of Science and Technology of China;Shanghai Jiao Tong University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.sjtu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "USTC;SJTU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Enhancing Tail Performance in Extreme Classifiers by Label Variance Reduction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19401", "id": "6ARlSgun7J", "author_site": "Anirudh Buvanesh, Rahul Chand, Jatin Prakash, Bhawna Paliwal, Mudit Dhawan, Neelabh Madan, Deepesh Hada, Vidit Jain, Sonu Mehta, Yashoteja Prabhu, Manish Gupta, Ramachandran Ramjee, Manik Varma", "tldr": "", "abstract": "Extreme Classification (XC) architectures, which utilize a massive One-vs-All (OvA) classifier layer at the output, have demonstrated remarkable performance on problems with large label sets. Nonetheless, these architectures falter on tail labels with few representative samples. This phenomenon has been attributed to factors such as classifier over-fitting and missing label bias, and solutions involving regularization and loss re-calibration have been developed. This paper explores the impact of label variance - a previously unexamined factor - on the tail performance in extreme classifiers. It also develops a method to systematically reduce label variance in XC by transferring the knowledge from a specialized tail-robust teacher model to the OvA classifiers. For this purpose, it proposes a principled knowledge distillation framework, LEVER, which enhances the tail performance in extreme classifiers with formal guarantees on generalization. Comprehensive experiments are conducted on a diverse set of XC datasets, demonstrating that LEVER can enhance tail performance by around 5\\% and 6\\% points in PSP and coverage metrics, respectively, when integrated with leading extreme classifiers. Moreover, it establishes a new state-of-the-art when added to the top-performing Renee classifier. Extensive ablations and analyses substantiate the efficacy of our design choices. Another significant contribution is the release of two new XC datasets that are different from and more challenging than the available benchmark datasets, thereby encouraging more rigorous algorithmic evaluation in the future. Code for LEVER is available at: aka.ms/lever.", "keywords": "Extreme Classification;Extreme Multi-Label Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/d1abe5a93a22abd8a77bcd20c51efe61b261e95f.zip", "author": "Anirudh Buvanesh;Rahul Chand;Jatin Prakash;Bhawna Paliwal;Mudit Dhawan;Neelabh Madan;Deepesh Hada;Vidit Jain;SONU MEHTA;Yashoteja Prabhu;Manish Gupta;Ramachandran Ramjee;Manik Varma", "authorids": "~Anirudh_Buvanesh2;~Rahul_Chand1;~Jatin_Prakash2;~Bhawna_Paliwal1;~Mudit_Dhawan2;~Neelabh_Madan2;~Deepesh_Hada1;~Vidit_Jain2;~SONU_MEHTA1;~Yashoteja_Prabhu1;~Manish_Gupta4;~Ramachandran_Ramjee1;~Manik_Varma1", "gender": ";M;M;F;M;M;M;;F;;;;M", "homepage": ";https://rahulschand.github.io;https://github.com/bicycleman15;;https://mudit-dhawan.github.io/;https://github.com/neelabh17;https://www.microsoft.com/en-us/research/people/deepeshhada/;;https://sonumehta.github.io/;;;https://www.microsoft.com/en-us/research/people/ramjee/;http://manikvarma.org", "dblp": ";;294/5983;302/2497;;317/5192;;68/5650;246/5368;;;97/117.html;07/918.html", "google_scholar": ";;NeBK8VMAAAAJ;7BxDLWcAAAAJ;XmqUp9cAAAAJ;;PHYW-PAAAAAJ;;NcstulUAAAAJ;;;https://scholar.google.co.in/citations?user=0P35aLUAAAAJ;https://scholar.google.gr/citations?user=2efybZkAAAAJ", "orcid": ";;0009-0003-5474-6529;;;;;0000-0002-7911-1074;0000-0003-1654-3766;;;0000-0003-0007-6040;0000-0003-4516-6613", "linkedin": ";;;;;;deepesh-hada/;jvidit/;sonumehta2403/;;;;", "or_profile": "~Anirudh_Buvanesh2;~Rahul_Chand1;~Jatin_Prakash2;~Bhawna_Paliwal1;~Mudit_Dhawan2;~Neelabh_Madan2;~Deepesh_Hada1;~Vidit_Jain2;~SONU_MEHTA1;~Yashoteja_Prabhu1;~Manish_Gupta4;~Ramachandran_Ramjee1;~Manik_Varma1", "aff": ";;Microsoft Research;Microsoft Research;Microsoft;Microsoft;Microsoft;Microsoft Research, India;Indian Institute of Technology Delhi;;;Microsoft;Microsoft Research", "aff_domain": ";;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;cs.iitd.ac.in;;;microsoft.com;research.microsoft.com", "position": ";;Intern;Research Engineer;Intern;Intern;Researcher;Research SDE;PhD student;;;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nbuvanesh2024enhancing,\ntitle={Enhancing Tail Performance in Extreme Classifiers by Label Variance Reduction},\nauthor={Anirudh Buvanesh and Rahul Chand and Jatin Prakash and Bhawna Paliwal and Mudit Dhawan and Neelabh Madan and Deepesh Hada and Vidit Jain and SONU MEHTA and Yashoteja Prabhu and Manish Gupta and Ramachandran Ramjee and Manik Varma},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6ARlSgun7J}\n}", "github": "", "project": "", "reviewers": "UBHy;Cq1J;Uo9h;Kx6v", "pdf_size": 655392, "rating": "5;6;6;8", "confidence": "2;4;3;5", "soundness": "3;3;2;2", "contribution": "2;3;2;3", "presentation": "1;3;2;3", "wc_summary": "66;146;203;89", "wc_strengths": "54;66;84;13", "wc_weaknesses": "171;251;461;39", "wc_questions": "47;87;7;362", "wc_review": "338;550;755;503", "wc_reply_reviewers": "13;180;245;20", "wc_reply_authors": "584;841;896;1069", "reply_reviewers": "1;1;1;1", "reply_authors": "1;3;3;3", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 126.0, 53.146025251188824 ], "wc_strengths_avg": [ 54.25, 26.099568961957974 ], "wc_weaknesses_avg": [ 230.5, 153.1037230115584 ], "wc_questions_avg": [ 125.75, 139.3007088998473 ], "wc_review_avg": [ 536.5, 148.70188297395563 ], "wc_reply_reviewers_avg": [ 114.5, 100.68887724073598 ], "wc_reply_authors_avg": [ 847.5, 173.8454773642386 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.9233805168766388, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14304141305725231718&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6ARlSgun7J", "pdf": "https://openreview.net/pdf?id=6ARlSgun7J", "email": ";;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;cs.iitd.ac.in;;;microsoft.com;research.microsoft.com", "author_num": 13, "aff_unique_index": "0;0;0;0;0;0;1;0;0", "aff_unique_norm": "Microsoft;Indian Institute of Technology Delhi", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.iitd.ac.in", "aff_unique_abbr": "MSR;IIT Delhi", "aff_campus_unique_index": "1", "aff_campus_unique": ";Delhi", "aff_country_unique_index": "0;0;0;0;0;1;1;0;0", "aff_country_unique": "United States;India" }, { "id": "6AtXCnHCFy", "title": "FSN: Feature Shift Network for Load-Domain Domain Generalization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Conventional deep learning methods for fault detection often assume that the training and the testing sets share the same fault pattern spaces and domain spaces. However, some fault patterns are rare, and many real-world faults have not appeared in the training set. As a result, it\u2019s hard for the trained model to achieve desirable performance on the testing set. \nIn this paper, we introduce a novel domain generalization, Load-Domain (LD) domain generalization, which is based on the analysis of the CWRU bearing dataset and its domain division method. For this scenario, we propose a feature shift model called FSN (Feature Shift Network). In the bearing dataset, domains are divided based on different operating conditions which have specific loads, so it\u2019s equivalent to load-based domain division. Moreover, the domain label corresponds to the actual load magnitude, making it unique as it contains physical information, which can boost detection accuracy on unknown domain beyond the training set. According to the knowledge above , FSN is trained for feature shift on adjacent source domains, and finally shifts target domain features into adjacent source domain feature space to achieve the purpose of domain generalization.\nExtensive experiments on CWRU demonstrate that FSN is better than the existed models in the LD domain generalization case. Furthermore, we have another test on MNIST, which also shows FSN can achieve the best performance.", "keywords": "Fault diagnosis;Deep learning;CNN;Domain Generalization;Load-domain Domain Generalization", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Heng Chen;Erkang Zhao;Lei Shi;Yuhui Zhou", "authorids": "~Heng_Chen1;~Erkang_Zhao1;~Lei_Shi17;~Yuhui_Zhou1", "gender": "M;M;M;M", "homepage": "https://gr.xjtu.edu.cn/web/hengchen;http://github.com/EkZzz;;https://zhou-yuhui.github.io/", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;0000-0003-0225-0460;", "linkedin": ";;;", "or_profile": "~Heng_Chen1;~Erkang_Zhao1;~Lei_Shi17;~Yuhui_Zhou1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;;Xi'an Jiaotong University", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn", "position": "Associate Professor;MS student;;MS student", "bibtex": "@misc{\nchen2024fsn,\ntitle={{FSN}: Feature Shift Network for Load-Domain Domain Generalization},\nauthor={Heng Chen and Erkang Zhao and Lei Shi and Yuhui Zhou},\nyear={2024},\nurl={https://openreview.net/forum?id=6AtXCnHCFy}\n}", "github": "", "project": "", "reviewers": "gaFV;nQ8N;u8DP;xHmt;NDUe;2Hv6;4bLC;hzsf", "site": "https://openreview.net/forum?id=6AtXCnHCFy", "pdf_size": 490579, "rating": "3;3;3;3;3;3;5;6", "confidence": "5;3;4;4;4;3;3;4", "soundness": "1;2;2;2;2;2;2;2", "contribution": "1;2;2;2;2;1;2;2", "presentation": "2;2;2;1;2;1;2;4", "wc_summary": "59;65;104;51;55;52;82;142", "wc_strengths": "13;49;57;24;36;35;36;120", "wc_weaknesses": "39;280;140;91;161;88;80;58", "wc_questions": "73;36;16;2;21;48;29;50", "wc_review": "184;430;317;168;273;223;227;370", "wc_reply_reviewers": "0;0;0;0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0;0;0;0", "reply_reviewers": "0;0;0;0;0;0;0;0", "reply_authors": "0;0;0;0;0;0;0;0", "rating_avg": [ 3.625, 1.1110243021644486 ], "confidence_avg": [ 3.75, 0.6614378277661477 ], "soundness_avg": [ 1.875, 0.33071891388307384 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.8660254037844386 ], "wc_summary_avg": [ 76.25, 30.05723706530592 ], "wc_strengths_avg": [ 46.25, 30.617601147052653 ], "wc_weaknesses_avg": [ 117.125, 72.01117534799721 ], "wc_questions_avg": [ 34.375, 20.969844420023723 ], "wc_review_avg": [ 274.0, 86.27572080255256 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.12757297666876868, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vJ8oK8uqn9gJ:scholar.google.com/&scioq=FSN:+Feature+Shift+Network+for+Load-Domain+Domain+Generalization&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Xi'an Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.xjtu.edu.cn", "aff_unique_abbr": "XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "6CGBfHtFRM", "title": "Mean Field Langevin Actor-Critic: Faster Convergence and Global Optimality beyond Lazy Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study how deep reinforcement learning algorithms learn meaningful features when optimized for finding the optimal policy. \nIn particular, we focus on a version of the neural actor-critic algorithm where both the actor and critic are represented by over-parameterized neural networks in the mean-field regime, and are updated via temporal-difference (TD) and policy gradient respectively. \nSpecifically, for the critic neural network to perform policy evaluation, \nwe propose $\\textit{mean-field Langevin TD learning}$ method (MFLTD), an extension of the mean-field Langevin dynamics with proximal TD updates, and compare its effectiveness against existing methods through numerical experiments. \nIn addition, for the actor neural network to perform policy updates, \nwe propose $\\textit{mean-field Langevin policy gradient}$ (MFLPG), which implements policy gradient in the policy space through a version of Wasserstein gradient flow in the space of network parameters. \nWe prove that MFLTD finds the correct value function, and the sequence of actors created by MFLPG created by the algorithm converges linearly to the globally optimal policy of the Kullback Leibler divergence regularized objective. To our best knowledge, \nwe provide the first linear convergence guarantee for neural actor-critic algorithms with $\\textit{global optimality}$ and $\\textit{feature learning}$.", "keywords": "policy gradient method;temporal-difference learning;actor-critic;global optimality;linear convergence;neural network;mean field;feature learning", "primary_area": "learning theory", "supplementary_material": "/attachment/da8e31443087f67c5dd4dd3151c2a27965b2760d.pdf", "author": "Kakei Yamamoto;Kazusato Oko;Zhuoran Yang;Taiji Suzuki", "authorids": "~Kakei_Yamamoto1;~Kazusato_Oko1;~Zhuoran_Yang1;~Taiji_Suzuki1", "gender": "M;M;M;M", "homepage": ";;https://zhuoranyang.github.io/;http://ibis.t.u-tokyo.ac.jp/suzuki/", "dblp": "334/7773;;;08/312", "google_scholar": "https://scholar.google.com/citations?hl=ja;;;x8osrBsAAAAJ", "orcid": "0000-0002-6231-2750;;;", "linkedin": "kakei-yamamoto-bb37461a0/;kazusatooko/;;", "or_profile": "~Kakei_Yamamoto1;~Kazusato_Oko1;~Zhuoran_Yang1;~Taiji_Suzuki1", "aff": "Massachusetts Institute of Technology;The University of Tokyo;Yale University;The University of Tokyo", "aff_domain": "mit.edu;u-tokyo.ac.jp;yale.edu;tokyo.ac.jp", "position": "PhD student;MS student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nyamamoto2024mean,\ntitle={Mean Field Langevin Actor-Critic: Faster Convergence and Global Optimality beyond Lazy Learning},\nauthor={Kakei Yamamoto and Kazusato Oko and Zhuoran Yang and Taiji Suzuki},\nyear={2024},\nurl={https://openreview.net/forum?id=6CGBfHtFRM}\n}", "github": "", "project": "", "reviewers": "1VQp;fWZz;GTpe;XKHS;Xd7S", "site": "https://openreview.net/forum?id=6CGBfHtFRM", "pdf_size": 489793, "rating": "5;5;5;5;6", "confidence": "2;5;3;3;3", "soundness": "2;3;3;3;2", "contribution": "2;2;2;2;3", "presentation": "3;3;2;3;2", "wc_summary": "73;118;160;116;128", "wc_strengths": "17;50;82;29;64", "wc_weaknesses": "37;151;579;56;155", "wc_questions": "74;378;2;23;209", "wc_review": "201;697;823;224;556", "wc_reply_reviewers": "38;48;0;0;0", "wc_reply_authors": "889;703;1171;686;908", "reply_reviewers": "1;1;0;0;0", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 119.0, 27.885480092693403 ], "wc_strengths_avg": [ 48.4, 23.397435756937128 ], "wc_weaknesses_avg": [ 195.6, 197.6234803863144 ], "wc_questions_avg": [ 137.2, 140.32448111430878 ], "wc_review_avg": [ 500.2, 249.7401849923236 ], "wc_reply_reviewers_avg": [ 17.2, 21.301643129110957 ], "wc_reply_authors_avg": [ 871.4, 175.5797254810475 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.10206207261596577, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9644032660721430589&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of Tokyo;Yale University", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.u-tokyo.ac.jp;https://www.yale.edu", "aff_unique_abbr": "MIT;UTokyo;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;Japan" }, { "title": "Two-timescale Extragradient for Finding Local Minimax Points", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19400", "id": "6CIGhcJYJH", "author_site": "Jiseok Chae, Kyuwon Kim, Donghwan Kim", "tldr": "", "abstract": "Minimax problems are notoriously challenging to optimize. However, we present that the two-timescale extragradient method can be a viable solution. By utilizing dynamical systems theory, we show that it converges to points that satisfy the second-order necessary condition of local minimax points, under mild conditions that the two-timescale gradient descent ascent fails to work. This work provably improves upon all previous results on finding local minimax points, by eliminating a crucial assumption that the Hessian with respect to the maximization variable is nondegenerate.", "keywords": "Minimax optimization;Nonconvex-nonconcave optimization;Extragradient method;Dynamical systems", "primary_area": "optimization", "supplementary_material": "", "author": "Jiseok Chae;Kyuwon Kim;Donghwan Kim", "authorids": "~Jiseok_Chae1;~Kyuwon_Kim1;~Donghwan_Kim2", "gender": "M;M;M", "homepage": "https://jsch8q.github.io/;https://kaist-kyuwonkim.github.io/;http://mathsci.kaist.ac.kr/~donghwankim/", "dblp": "348/5478;119/9250;05/1032", "google_scholar": ";;https://scholar.google.com/citations?hl=en", "orcid": "0009-0009-0672-3422;0009-0002-6967-9907;", "linkedin": ";;", "or_profile": "~Jiseok_Chae1;~Kyuwon_Kim1;~Donghwan_Kim2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.edu;kaist.ac.kr", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchae2024twotimescale,\ntitle={Two-timescale Extragradient for Finding Local Minimax Points},\nauthor={Jiseok Chae and Kyuwon Kim and Donghwan Kim},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6CIGhcJYJH}\n}", "github": "", "project": "", "reviewers": "8GfH;aBEe;dqsM;yB9R", "pdf_size": 627451, "rating": "6;6;8;8", "confidence": "3;2;3;3", "soundness": "3;3;4;3", "contribution": "3;2;4;3", "presentation": "2;3;2;3", "wc_summary": "82;89;119;78", "wc_strengths": "71;67;125;21", "wc_weaknesses": "216;68;273;234", "wc_questions": "2;60;46;58", "wc_review": "371;284;563;391", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "631;337;703;420", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 92.0, 16.077935190813527 ], "wc_strengths_avg": [ 71.0, 36.851051545376556 ], "wc_weaknesses_avg": [ 197.75, 77.6929050814809 ], "wc_questions_avg": [ 41.5, 23.425413550244958 ], "wc_review_avg": [ 402.25, 101.15180423502093 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 522.75, 149.38938215281567 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12522265416593365909&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6CIGhcJYJH", "pdf": "https://openreview.net/pdf?id=6CIGhcJYJH", "email": "kaist.edu;kaist.edu;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "DrS: Learning Reusable Dense Rewards for Multi-Stage Tasks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19399", "id": "6CZ50WgfCG", "author_site": "Tongzhou Mu, Minghua Liu, Hao Su", "tldr": "", "abstract": "The success of many RL techniques heavily relies on human-engineered dense rewards, which typically demands substantial domain expertise and extensive trial and error. In our work, we propose **DrS** (**D**ense **r**eward learning from **S**tages), a novel approach for learning *reusable* dense rewards for multi-stage tasks in a data-driven manner. By leveraging the stage structures of the task, DrS learns a high-quality dense reward from sparse rewards and demonstrations if given. The learned rewards can be *reused* in unseen tasks, thus reducing the human effort for reward engineering. Extensive experiments on three physical robot manipulation task families with 1000+ task variants demonstrate that our learned rewards can be reused in unseen tasks, resulting in improved performance and sample efficiency of RL algorithms. The learned rewards even achieve comparable performance to human-engineered rewards on some tasks. See our [project page](https://sites.google.com/view/iclr24drs) for more details.", "keywords": "Reward Learning;Multi-stage Task", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/74e755c3aef53c3a0d8fbe89b8266993518bc85e.pdf", "author": "Tongzhou Mu;Minghua Liu;Hao Su", "authorids": "~Tongzhou_Mu1;~Minghua_Liu1;~Hao_Su1", "gender": "M;M;M", "homepage": "http://cseweb.ucsd.edu/~t3mu/;https://cseweb.ucsd.edu//~mil070/;http://ai.ucsd.edu/~haosu", "dblp": "183/0943;28/8907;09/4945-1", "google_scholar": "uVsZydYAAAAJ;6U3IGtEAAAAJ;1P8Zu04AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tongzhou_Mu1;~Minghua_Liu1;~Hao_Su1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nmu2024drs,\ntitle={DrS: Learning Reusable Dense Rewards for Multi-Stage Tasks},\nauthor={Tongzhou Mu and Minghua Liu and Hao Su},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6CZ50WgfCG}\n}", "github": "", "project": "", "reviewers": "oxbF;35ZH;GMmw;ck68", "pdf_size": 1634213, "rating": "3;6;8;8", "confidence": "4;4;3;3", "soundness": "2;2;4;4", "contribution": "1;2;4;3", "presentation": "2;3;4;4", "wc_summary": "89;62;131;126", "wc_strengths": "61;41;92;130", "wc_weaknesses": "175;165;20;104", "wc_questions": "39;235;89;14", "wc_review": "364;503;332;374", "wc_reply_reviewers": "814;401;0;25", "wc_reply_authors": "3046;2491;735;672", "reply_reviewers": "3;2;0;1", "reply_authors": "6;7;1;1", "rating_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 1.0 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 102.0, 28.222331583340168 ], "wc_strengths_avg": [ 81.0, 33.62290885690886 ], "wc_weaknesses_avg": [ 116.0, 61.72924752497798 ], "wc_questions_avg": [ 94.25, 85.63111292048002 ], "wc_review_avg": [ 393.25, 65.23563060168883 ], "wc_reply_reviewers_avg": [ 310.0, 331.51998431467143 ], "wc_reply_authors_avg": [ 1736.0, 1051.2162004078893 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.75, 2.7726341266023544 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8551861104941366, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13580116662997845792&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6CZ50WgfCG", "pdf": "https://openreview.net/pdf?id=6CZ50WgfCG", "email": "ucsd.edu;ucsd.edu;ucsd.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6CetUU9FSt", "title": "Visual Encoders for Data-Efficient Imitation Learning in Modern Video Games", "track": "main", "status": "Reject", "tldr": "", "abstract": "Video games have served as useful benchmarks for the decision making community, but going beyond Atari games towards training agents in modern games has been prohibitively expensive for the vast majority of the research community. Recent progress in the research, development and open release of large vision models has the potential to amortize some of these costs across the community. However, it is currently unclear which of these models have learnt representations that retain information critical for sequential decision making. Towards enabling wider participation in the research of gameplaying agents in modern games, we present a systematic study of imitation learning with publicly available visual encoders compared to the typical, task-specific, end-to-end training approach in Minecraft, Minecraft Dungeons and Counter-Strike: Global Offensive.", "keywords": "Imitation Learning;Visual Encoders", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Lukas Sch\u00e4fer;Logan Jones;Anssi Kanervisto;Yuhan Cao;Tabish Rashid;Raluca Georgescu;David Bignell;Siddhartha Sen;Andrea Trevi\u00f1o Gavito;Sam Devlin", "authorids": "~Lukas_Sch\u00e4fer1;~Logan_Jones1;~Anssi_Kanervisto1;~Yuhan_Cao1;~Tabish_Rashid1;~Raluca_Georgescu1;~David_Bignell1;~Siddhartha_Sen1;~Andrea_Trevi\u00f1o_Gavito1;~Sam_Devlin2", "gender": "M;M;M;;M;F;M;;F;M", "homepage": "https://lukaschaefer.com/;;;https://yyyuhan.github.io/;;http://aka.ms/raluca;;http://sidsen.org;;", "dblp": ";;186/7786;;196/5069;;183/0963;;;64/7502", "google_scholar": "-yp0O_IAAAAJ;;https://scholar.google.fi/citations?user=iPimqbwAAAAJ;;d4BeWwcAAAAJ;;;;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-7479-4574;;;;;;0000-0002-8032-5600;0000-0002-7769-3090", "linkedin": "lukas-schaefer/;loganbjones16/;;yuhan-cao-859633129/;;;;;andrea-trevino-gavito/;https://www.linkedin.com/pub/sam-devlin/83/810/b23", "or_profile": "~Lukas_Sch\u00e4fer1;~Logan_Jones1;~Anssi_Kanervisto1;~Yuhan_Cao1;~Tabish_Rashid1;~Raluca_Georgescu1;~David_Bignell1;~Siddhartha_Sen1;~Andrea_Trevi\u00f1o_Gavito1;~Sam_Devlin2", "aff": "University of Edinburgh;;Microsoft;;Microsoft;Microsoft;Microsoft Research;Microsoft Research;Microsoft;Microsoft Research", "aff_domain": "ed.ac.uk;;microsoft.com;;microsoft.com;microsoft.com;research.microsoft.com;research.microsoft.com;microsoft.com;microsoft.com", "position": "PhD student;;Researcher;;Researcher;Researcher;Researcher;Principal Researcher;Researcher;Principal Researcher", "bibtex": "@misc{\nsch{\\\"a}fer2024visual,\ntitle={Visual Encoders for Data-Efficient Imitation Learning in Modern Video Games},\nauthor={Lukas Sch{\\\"a}fer and Logan Jones and Anssi Kanervisto and Yuhan Cao and Tabish Rashid and Raluca Georgescu and David Bignell and Siddhartha Sen and Andrea Trevi{\\~n}o Gavito and Sam Devlin},\nyear={2024},\nurl={https://openreview.net/forum?id=6CetUU9FSt}\n}", "github": "", "project": "", "reviewers": "sq4s;bAy4;JgYZ;yJPq", "site": "https://openreview.net/forum?id=6CetUU9FSt", "pdf_size": 6610292, "rating": "1;3;3;3", "confidence": "5;5;5;4", "soundness": "2;2;2;2", "contribution": "1;2;2;2", "presentation": "3;3;3;2", "wc_summary": "147;73;55;232", "wc_strengths": "24;66;58;105", "wc_weaknesses": "64;523;310;291", "wc_questions": "104;67;21;3", "wc_review": "339;729;444;631", "wc_reply_reviewers": "0;295;183;77", "wc_reply_authors": "980;1221;990;1313", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;3;3", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 126.75, 69.86549577581197 ], "wc_strengths_avg": [ 63.25, 28.80429655450728 ], "wc_weaknesses_avg": [ 297.0, 162.4576867987477 ], "wc_questions_avg": [ 48.75, 39.52451770736741 ], "wc_review_avg": [ 535.75, 152.92706594975266 ], "wc_reply_reviewers_avg": [ 138.75, 111.17188268622601 ], "wc_reply_authors_avg": [ 1126.0, 144.74632983257297 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8913380107900966404&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;1;1;1;1;1", "aff_unique_norm": "University of Edinburgh;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.ed.ac.uk;https://www.microsoft.com", "aff_unique_abbr": "Edinburgh;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "6CfJp9NG6Q", "title": "STUDY: Socially Aware Temporally Causal Decoder Recommender Systems", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recommender systems are widely used to help people find items that are tailored to their interests. These interests are often influenced by social networks, making it important to use social network information effectively in recommender systems, especially for demographic groups with interests that differ from the majority. This paper introduces STUDY, a Socially-aware Temporally caUsal Decoder recommender sYstem. The STUDY architecture is significantly more efficient to learn and train than existing methods and performs joint inference over socially-connected groups in a single forward pass of a modified transformer decoder network. We demonstrate the benefits of STUDY in the recommendation of books for students who have dyslexia or are struggling readers. Students with dyslexia often have difficulty engaging with reading material, making it critical to recommend books that are tailored to their interests. We worked with our non-profit partner Learning Ally to evaluate STUDY on a dataset of struggling readers. STUDY was able to generate recommendations that more accurately predicted student engagement, when compared with existing methods.", "keywords": "Recommender Systems; Transformers; Social Recommendations; Education", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/4719c6fd638fdabc3b1e3512120869a526db8e4b.pdf", "author": "Eltayeb Ahmed;Diana Mincu;Lauren Harrell;Katherine A Heller;Subhrajit Roy", "authorids": "~Eltayeb_Ahmed1;~Diana_Mincu1;~Lauren_Harrell1;~Katherine_A_Heller1;~Subhrajit_Roy1", "gender": "F;;F;M;M", "homepage": ";;;;https://uniquecreed.co.uk", "dblp": ";;32/4403;73/8829;", "google_scholar": "1Fg7f78AAAAJ;G2scKbkAAAAJ;;;4PAd9qgAAAAJ", "orcid": ";;;;", "linkedin": ";lauren-harrell-00829315;;subhrajit-roy-7aa18123/;", "or_profile": "~Diana_Mincu1;~Lauren_Harrell1;~Katherine_A_Heller1;~Subhrajit_Roy1;~Eltayeb_Khalid_Eltayeb_Ahmed1", "aff": "Google;Google;Google;Google;University of Oxford", "aff_domain": "google.com;google.com;google.com;google.com;eng.ox.ac.uk", "position": "Software Engineer;Data Scientist ;Researcher;Researcher;PhD student", "bibtex": "@misc{\nahmed2024study,\ntitle={{STUDY}: Socially Aware Temporally Causal Decoder Recommender Systems},\nauthor={Eltayeb Ahmed and Diana Mincu and Lauren Harrell and Katherine A Heller and Subhrajit Roy},\nyear={2024},\nurl={https://openreview.net/forum?id=6CfJp9NG6Q}\n}", "github": "", "project": "", "reviewers": "Trnv;5grQ;54vF;fvs5;GC3n", "site": "https://openreview.net/forum?id=6CfJp9NG6Q", "pdf_size": 319426, "rating": "1;3;5;5;5", "confidence": "5;3;5;4;3", "soundness": "3;2;3;3;3", "contribution": "2;2;2;3;2", "presentation": "2;2;2;2;2", "wc_summary": "64;34;24;84;73", "wc_strengths": "23;54;40;138;66", "wc_weaknesses": "227;88;105;267;86", "wc_questions": "139;198;39;78;4", "wc_review": "453;374;208;567;229", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 3.8, 1.6 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 55.8, 22.999130418344084 ], "wc_strengths_avg": [ 64.2, 39.58989770130759 ], "wc_weaknesses_avg": [ 154.6, 76.78176866939182 ], "wc_questions_avg": [ 91.6, 69.56033352421478 ], "wc_review_avg": [ 366.2, 135.4745732600771 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2795084971874737, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4owZXOfIqdwJ:scholar.google.com/&scioq=STUDY:+Socially+Aware+Temporally+Causal+Decoder+Recommender+Systems&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Google;University of Oxford", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.ox.ac.uk", "aff_unique_abbr": "Google;Oxford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "6EQbYM0CIX", "title": "Conditional Generative Modeling for High-dimensional Marked Temporal Point Processes", "track": "main", "status": "Reject", "tldr": "", "abstract": "Point processes offer a versatile framework for sequential event modeling. However, the computational challenges and constrained representational power of the existing point process models have impeded their potential for wider applications. This limitation becomes especially pronounced when dealing with event data that is associated with multi-dimensional or high-dimensional marks such as texts or images. To address this challenge, this study proposes a novel event generative framework for modeling point processes with high-dimensional marks. We aim to capture the distribution of events without explicitly specifying the conditional intensity or probability density function. Instead, we use a conditional generator that takes the history of events as input and generates the high-quality subsequent event that is likely to occur given the prior observations. The proposed framework offers a host of benefits, including considerable representational power to capture intricate dynamics in multi- or even high-dimensional event space, as well as exceptional efficiency in learning the model and generating samples. Our numerical results demonstrate superior performance compared to other state-of-the-art baselines.", "keywords": "marked temporal point processes;conditional generative models", "primary_area": "generative models", "supplementary_material": "/attachment/d6acd13ff20d361216f4d5ab426a8e922304e740.pdf", "author": "Zheng Dong;Zekai Fan;Shixiang Zhu", "authorids": "~Zheng_Dong3;~Zekai_Fan1;~Shixiang_Zhu1", "gender": "M;;M", "homepage": "https://sites.google.com/view/zheng-dong/home;;https://sites.google.com/view/woodyzhu", "dblp": ";;133/3853", "google_scholar": "iqZN-q4AAAAJ;;v6_Gv6IAAAAJ", "orcid": "0000-0002-1505-8569;;0000-0002-2241-6096", "linkedin": "zheng-dong-23a264222/;;shixiang-zhu-26b956a0/", "or_profile": "~Zheng_Dong3;~Zekai_Fan1;~Shixiang_Zhu1", "aff": "Georgia Institute of Technology;;Carnegie Mellon University", "aff_domain": "gatech.edu;;cmu.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@misc{\ndong2024conditional,\ntitle={Conditional Generative Modeling for High-dimensional Marked Temporal Point Processes},\nauthor={Zheng Dong and Zekai Fan and Shixiang Zhu},\nyear={2024},\nurl={https://openreview.net/forum?id=6EQbYM0CIX}\n}", "github": "", "project": "", "reviewers": "LQDX;GHUR;AoCo", "site": "https://openreview.net/forum?id=6EQbYM0CIX", "pdf_size": 6032456, "rating": "3;3;5", "confidence": "3;3;5", "soundness": "2;3;2", "contribution": "2;2;3", "presentation": "3;2;3", "wc_summary": "72;50;80", "wc_strengths": "168;107;71", "wc_weaknesses": "286;480;223", "wc_questions": "58;2;4", "wc_review": "584;639;378", "wc_reply_reviewers": "107;377;0", "wc_reply_authors": "1591;2190;1014", "reply_reviewers": "2;2;0", "reply_authors": "4;4;2", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 67.33333333333333, 12.684198393626966 ], "wc_strengths_avg": [ 115.33333333333333, 40.036094825655624 ], "wc_weaknesses_avg": [ 329.6666666666667, 109.36889665510738 ], "wc_questions_avg": [ 21.333333333333332, 25.940101944458295 ], "wc_review_avg": [ 533.6666666666666, 112.33976242730007 ], "wc_reply_reviewers_avg": [ 161.33333333333334, 158.63234922997964 ], "wc_reply_authors_avg": [ 1598.3333333333333, 480.127992194674 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 3.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=875969137132098907&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Georgia Institute of Technology;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.cmu.edu", "aff_unique_abbr": "Georgia Tech;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "6Ey8mAuLiw", "title": "On the Power of Multitask Representation Learning with Gradient Descent", "track": "main", "status": "Reject", "tldr": "", "abstract": "Representation learning, particularly multi-task representation learning, has gained widespread popularity in various deep learning applications, ranging from computer vision to natural language processing, due to its remarkable generalization performance. Despite its growing use, our understanding of the underlying mechanisms remains limited. In this paper, we provide a theoretical analysis elucidating why multi-task representation learning outperforms its single-task counterpart in scenarios involving over-parameterized two-layer convolutional neural networks trained by gradient descent. Our analysis is based on a data model that encompasses both task-shared and task-specific features, a setting commonly encountered in real-world applications. We also present experiments on synthetic and real-world data to illustrate and validate our theoretical findings.", "keywords": "representation learning;multi-task learning;gradient descent;generalization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Qiaobo Li;Zixiang Chen;Yihe Deng;Yiwen Kou;Yuan Cao;Quanquan Gu", "authorids": "~Qiaobo_Li1;~Zixiang_Chen1;~Yihe_Deng1;~Yiwen_Kou1;~Yuan_Cao1;~Quanquan_Gu1", "gender": "M;M;F;F;M;M", "homepage": ";https://sites.google.com/view/zxchen;;https://evankou.github.io/;https://yuancaohku.github.io/;http://web.cs.ucla.edu/~qgu/", "dblp": ";137/3624;230/8011;323/9058;;50/4597", "google_scholar": ";6nrCHr0AAAAJ;7Lix1poAAAAJ;https://scholar.google.com/citations?hl=en;-VGnHI4AAAAJ;GU9HgNAAAAAJ", "orcid": ";;;;;", "linkedin": "qiaobo-li-581815251/;;;yiwen-kou-5a444916b/;;", "or_profile": "~Qiaobo_Li1;~Zixiang_Chen1;~Yihe_Deng1;~Yiwen_Kou1;~Yuan_Cao1;~Quanquan_Gu1", "aff": "Department of Computer Science, University of Illinois at Urbana-Champaign; University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of Hong Kong;University of California, Los Angeles", "aff_domain": "cs.illinois.edu;cs.ucla.edu;ucla.edu;ucla.edu;hku.hk;cs.ucla.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nli2024on,\ntitle={On the Power of Multitask Representation Learning with Gradient Descent},\nauthor={Qiaobo Li and Zixiang Chen and Yihe Deng and Yiwen Kou and Yuan Cao and Quanquan Gu},\nyear={2024},\nurl={https://openreview.net/forum?id=6Ey8mAuLiw}\n}", "github": "", "project": "", "reviewers": "vmJ4;yKNZ;rtpX;tYKP", "site": "https://openreview.net/forum?id=6Ey8mAuLiw", "pdf_size": 2129620, "rating": "3;6;6;6", "confidence": "3;3;2;4", "soundness": "2;3;3;3", "contribution": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "48;71;73;67", "wc_strengths": "29;139;130;39", "wc_weaknesses": "149;161;269;79", "wc_questions": "19;103;192;156", "wc_review": "245;474;664;341", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "670;1616;1598;1211", "reply_reviewers": "0;0;0;0", "reply_authors": "2;3;3;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.75, 9.908960591303208 ], "wc_strengths_avg": [ 84.25, 50.47462233637811 ], "wc_weaknesses_avg": [ 164.5, 67.9760987406603 ], "wc_questions_avg": [ 117.5, 65.08648093114269 ], "wc_review_avg": [ 431.0, 157.18937623134713 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1273.75, 384.2931010309709 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jnMKiirk250J:scholar.google.com/&scioq=On+the+Power+of+Multitask+Representation+Learning+with+Gradient+Descent&hl=en&as_sdt=0,47", "gs_version_total": 2, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of California, Los Angeles;University of Hong Kong", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://illinois.edu;https://www.ucla.edu;https://www.hku.hk", "aff_unique_abbr": "UIUC;UCLA;HKU", "aff_campus_unique_index": "0;1;1;1;2;1", "aff_campus_unique": "Urbana-Champaign;Los Angeles;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "6FAH0SgQzO", "title": "FedRC: Tackling Diverse Distribution Shifts Challenge in Federated Learning by Robust Clustering", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated Learning (FL) is a machine learning paradigm that safeguards privacy by retaining client data on edge devices. However, optimizing FL in practice can be challenging due to the diverse and heterogeneous nature of the learning system. Though recent research has focused on improving the optimization of FL when distribution shifts occur among clients, ensuring global performance when multiple types of distribution shifts occur simultaneously among clients---such as feature distribution shift, label distribution shift, and concept shift---remain under-explored.\n\nIn this paper, we identify the learning challenges posed by the simultaneous occurrence of diverse distribution shifts and propose a clustering principle to overcome these challenges. Through our research, we find that existing methods fail to address the clustering principle. Therefore, we propose a novel clustering algorithm framework, dubbed as FedRC, which adheres to our proposed clustering principle by incorporating a bi-level optimization problem and a novel objective function. Extensive experiments demonstrate that FedRC significantly outperforms other SOTA cluster-based FL methods.\n Our code will be publicly available.", "keywords": "Deep Learning;Distribution Shifts;Generalization;Federated Learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/ad3c1cf78d293f8a6ab0728857203c21e2c32267.zip", "author": "Yongxin Guo;Xiaoying Tang;Tao Lin", "authorids": "~Yongxin_Guo1;~Xiaoying_Tang2;~Tao_Lin1", "gender": "M;F;M", "homepage": "https://gyxxyg.github.io/yongxinguo/;https://sse.cuhk.edu.cn/en/faculty/tangxiaoying;https://lins-lab.github.io/", "dblp": ";134/9714-2;64/4492-4.html", "google_scholar": "5Cl1GZwAAAAJ;https://scholar.google.com/citations?hl=zh-TW;QE9pa_cAAAAJ", "orcid": "0009-0001-8652-0722;0000-0003-3955-1195;0000-0002-3246-6935", "linkedin": ";;", "or_profile": "~Yongxin_Guo1;~Xiaoying_Tang2;~Tao_Lin1", "aff": "Tencent;The Chinese University of Hong Kong, Shenzhen;Westlake University", "aff_domain": "tencent.com;cuhk.edu.cn;westlake.edu", "position": "Intern;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nguo2024fedrc,\ntitle={Fed{RC}: Tackling Diverse Distribution Shifts Challenge in Federated Learning by Robust Clustering},\nauthor={Yongxin Guo and Xiaoying Tang and Tao Lin},\nyear={2024},\nurl={https://openreview.net/forum?id=6FAH0SgQzO}\n}", "github": "", "project": "", "reviewers": "gsbU;kcSi;xiXR", "site": "https://openreview.net/forum?id=6FAH0SgQzO", "pdf_size": 3506410, "rating": "5;8;8", "confidence": "4;4;4", "soundness": "2;4;4", "contribution": "3;3;3", "presentation": "3;3;4", "wc_summary": "79;71;125", "wc_strengths": "229;65;62", "wc_weaknesses": "188;74;101", "wc_questions": "171;1;185", "wc_review": "667;211;473", "wc_reply_reviewers": "399;0;0", "wc_reply_authors": "3257;510;1515", "reply_reviewers": "2;0;0", "reply_authors": "7;1;3", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 91.66666666666667, 23.79542439676633 ], "wc_strengths_avg": [ 118.66666666666667, 78.02706083290734 ], "wc_weaknesses_avg": [ 121.0, 48.641546028061235 ], "wc_questions_avg": [ 119.0, 83.63412381717565 ], "wc_review_avg": [ 450.3333333333333, 186.84991005855179 ], "wc_reply_reviewers_avg": [ 133.0, 188.09040379562165 ], "wc_reply_authors_avg": [ 1760.6666666666667, 1134.8322440881832 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.6666666666666665, 2.494438257849294 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15939733531129246781&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tencent;Chinese University of Hong Kong;Westlake University", "aff_unique_dep": "Tencent Holdings Limited;;", "aff_unique_url": "https://www.tencent.com;https://www.cuhk.edu.cn;https://www.westlake.edu.cn", "aff_unique_abbr": "Tencent;CUHK;WU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "6FvBXs8t8K", "title": "Learn from the Past: A Proxy based Adversarial Defense Framework to Boost Robustness", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In light of the vulnerability of deep learning models to adversarial samples and the ensuing security issues, a range of methods, including Adversarial Training (AT) as a prominent representative, aimed at enhancing model robustness against various adversarial attacks, have seen rapid development. However, existing methods essentially assist the current state of target model to defend against parameter-oriented adversarial attacks with explicit or implicit computation burdens, which also suffers from unstable convergence behavior due to inconsistency of optimization trajectories. Diverging from previous work, this paper reconsiders the update rule of target model and corresponding deficiency to defend based on its current state. By introducing the historical state of the target model as a proxy, which is endowed with much prior information for defense, we formulate a two-stage update rule, resulting in a general adversarial defense framework, which we refer to as 'LAST' ($\\textbf{L}$earn from the P$\\textbf{ast}$). Besides, we devise a Self Distillation (SD) based defense objective to constrain the update process of the proxy model without the introduction of larger teacher models. Experimentally, we demonstrate consistent and significant performance enhancements by refining a series of single-step and multi-step AT methods (e.g., up to $\\bf 9.2$% and $\\bf 20.5$% improvement of Robust Accuracy (RA) on CIFAR10 and CIFAR100 datasets, respectively) across various datasets, backbones and attack modalities, and validate its ability to enhance training stability and ameliorate catastrophic overfitting issues meanwhile.", "keywords": "Adversarial training;adversarial defense framework;two-stage;proxy model;self distillation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/415f2064094e2b00e3dd91de7e900bd34e3ed5c8.zip", "author": "Yaohua Liu;Jiaxin Gao;Zhu Liu;Xianghao Jiao;Xin Fan;Risheng Liu", "authorids": "~Yaohua_Liu1;~Jiaxin_Gao1;~Zhu_Liu3;~Xianghao_Jiao1;~Xin_Fan1;~Risheng_Liu1", "gender": "M;;M;M;M;", "homepage": ";;https://scholar.google.com/citations?user=WDjOXbIAAAAJ&hl=zh-CN;;http://dutmedia.org;https://rsliu.tech/", "dblp": "180/2847;;14/191-4.html;348/5634;87/3021-1;82/8066", "google_scholar": "6l4tf4kAAAAJ;;WDjOXbIAAAAJ;vMJatVIAAAAJ;ZJXY-LYAAAAJ;DzuhImQAAAAJ", "orcid": "0000-0002-9057-1645;;0000-0003-0975-2711;0009-0004-1032-169X;;", "linkedin": "%E8%80%80%E5%8D%8E-%E5%88%98-5786b2211/;;;;xin-fan-ab78464/;", "or_profile": "~Yaohua_Liu1;~Jiaxin_Gao1;~Zhu_Liu3;~Xianghao_Jiao1;~Xin_Fan1;~Risheng_Liu1", "aff": "Dalian University of Technology;;Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;Dalian University of Technology", "aff_domain": "dlut.edu.cn;;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn", "position": "PhD student;;PhD student;MS student;Full Professor;Full Professor", "bibtex": "@misc{\nliu2024learn,\ntitle={Learn from the Past: A Proxy based Adversarial Defense Framework to Boost Robustness},\nauthor={Yaohua Liu and Jiaxin Gao and Zhu Liu and Xianghao Jiao and Xin Fan and Risheng Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=6FvBXs8t8K}\n}", "github": "", "project": "", "reviewers": "MeBp;9nzt;atuW;EcMM", "site": "https://openreview.net/forum?id=6FvBXs8t8K", "pdf_size": 4521833, "rating": "3;3;6;6", "confidence": "5;4;4;4", "soundness": "2;2;3;3", "contribution": "2;1;3;3", "presentation": "2;3;3;3", "wc_summary": "98;48;75;17", "wc_strengths": "34;26;63;30", "wc_weaknesses": "263;456;35;10", "wc_questions": "73;4;2;3", "wc_review": "468;534;175;60", "wc_reply_reviewers": "190;0;0;0", "wc_reply_authors": "1909;3025;531;471", "reply_reviewers": "1;0;0;0", "reply_authors": "4;6;2;2", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.5, 30.25309901481169 ], "wc_strengths_avg": [ 38.25, 14.566657131957214 ], "wc_weaknesses_avg": [ 191.0, 182.00686800228172 ], "wc_questions_avg": [ 20.5, 30.319135871591065 ], "wc_review_avg": [ 309.25, 197.39728341595787 ], "wc_reply_reviewers_avg": [ 47.5, 82.27241335952168 ], "wc_reply_authors_avg": [ 1484.0, 1059.443721959784 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_6lK14kmeHMJ:scholar.google.com/&scioq=Learn+from+the+Past:+A+Proxy+based+Adversarial+Defense+Framework+to+Boost+Robustness&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Dalian University of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.dlut.edu.cn/", "aff_unique_abbr": "DUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6GySuKTJcd", "title": "Energy-Guided Continuous Entropic Barycenter Estimation for General Costs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Optimal transport (OT) barycenters are a mathematically grounded way of averaging probability distributions while capturing their geometric properties. In a nutshell, the task is to take the average of a collection of probability distributions w.r.t. given OT discrepancies. We propose a novel algorithm for approximating the continuous Entropic OT (EOT) barycenter for arbitrary OT cost functions. Our approach builds upon the dual reformulation of the EOT problem based on weak OT, which has recently gained the attention of the ML community. Beyond its novelty, our method enjoys several advantageous properties: (i) we establish quality bounds for the recovered solution; (ii) this approach seemlessly interconnects with the Energy-Based Models (EBMs) learning procedure, enabling the use of well-tuned algorithms for the problem of interest; (iii) it provides an intuitive optimization scheme avoiding min-max, reinforce and other intricate technical tricks. For validation, we consider several low-dimensional scenarios and image-space setups, including *non-Euclidean* cost functions. Furthermore, we investigate the practical task of learning the barycenter on an image manifold generated by a pretrained generative model, opening up new directions for real-world applications.", "keywords": "energy-based model;generative model;optimal transport;entropic optimal transport barycenters;general optimal transport cost", "primary_area": "generative models", "supplementary_material": "/attachment/5c767d87daa2d7525106cf5a8715918e94d2abd9.zip", "author": "Alexander Kolesov;Petr Mokrov;Igor Udovichenko;Milena Gazdieva;Gudmund Pammer;Evgeny Burnaev;Alexander Korotin", "authorids": "~Alexander_Kolesov1;~Petr_Mokrov1;~Igor_Udovichenko1;~Milena_Gazdieva1;~Gudmund_Pammer1;~Evgeny_Burnaev1;~Alexander_Korotin2", "gender": "M;M;;F;M;M;", "homepage": "https://github.com/Kolessov;https://github.com/PetrMokrov;;;https://people.math.ethz.ch/~gpammer/;http://faculty.skoltech.ru/people/evgenyburnaev;", "dblp": "287/4380;;;309/6585;;144/7845;", "google_scholar": "WyAI_wUAAAAJ;CRsi4IkAAAAJ;;h52_Zx8AAAAJ;ipItetYAAAAJ;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;", "orcid": ";;;0000-0003-0047-1577;0000-0003-2494-8739;0000-0001-8424-0690;", "linkedin": ";;;;;;", "or_profile": "~Alexander_Kolesov1;~Petr_Mokrov1;~Igor_Udovichenko1;~Milena_Gazdieva1;~Gudmund_Pammer1;~Evgeny_Burnaev1;~Alexander_Korotin2", "aff": "The Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;;Skolkovo Institute of Science and Technology;ETHZ - ETH Zurich;Skolkovo Institute of Science and Technology;", "aff_domain": "skoltech.ru;skolkovotech.ru;;skoltech.ru;ethz.ch;skoltech.ru;", "position": "PhD student;PhD student;;PhD student;Postdoc;Full Professor;", "bibtex": "@misc{\nkolesov2024energyguided,\ntitle={Energy-Guided Continuous Entropic Barycenter Estimation for General Costs},\nauthor={Alexander Kolesov and Petr Mokrov and Igor Udovichenko and Milena Gazdieva and Gudmund Pammer and Evgeny Burnaev and Alexander Korotin},\nyear={2024},\nurl={https://openreview.net/forum?id=6GySuKTJcd}\n}", "github": "", "project": "", "reviewers": "BAkA;97mW;16og;7U49", "site": "https://openreview.net/forum?id=6GySuKTJcd", "pdf_size": 10770669, "rating": "5;6;6;8", "confidence": "3;4;3;3", "soundness": "3;3;3;4", "contribution": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "79;94;64;70", "wc_strengths": "91;123;24;81", "wc_weaknesses": "110;307;74;42", "wc_questions": "264;49;134;3", "wc_review": "544;573;296;196", "wc_reply_reviewers": "148;29;64;14", "wc_reply_authors": "1355;1534;1248;176", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 11.299889379989523 ], "wc_strengths_avg": [ 79.75, 35.73076405564258 ], "wc_weaknesses_avg": [ 133.25, 103.15855514691934 ], "wc_questions_avg": [ 112.5, 99.29375609775269 ], "wc_review_avg": [ 402.25, 160.52784026454725 ], "wc_reply_reviewers_avg": [ 63.75, 51.91519527074901 ], "wc_reply_authors_avg": [ 1078.25, 530.8410190443086 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16650540745146647880&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;https://www.ethz.ch", "aff_unique_abbr": "Skoltech;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Russian Federation;Switzerland" }, { "title": "Personalize Segment Anything Model with One Shot", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19398", "id": "6Gzkhoc6YS", "author_site": "Renrui Zhang, Zhengkai Jiang, Ziyu Guo, Shilin Yan, Junting Pan, Hao Dong, Yu Qiao, Gao Peng, Hongsheng Li", "tldr": "", "abstract": "Driven by large-data pre-training, Segment Anything Model (SAM) has been demonstrated as a powerful promptable framework, revolutionizing the segmentation field. Despite the generality, customizing SAM for specific visual concepts without man-powered prompting is under-explored, e.g., automatically segmenting your pet dog in numerous images. In this paper, we introduce a training-free Personalization approach for SAM, termed PerSAM. Given only one-shot data, i.e., a single image with a reference mask, we first obtain a positive-negative location prior for the target concept in new images. Then, aided by target visual semantics, we empower SAM for personalized object segmentation via two proposed techniques: target-guided attention and target-semantic prompting. In this way, we can effectively customize the general-purpose SAM for private use without any training. To further alleviate the ambiguity of segmentation scales, we present an efficient one-shot fine-tuning variant, PerSAM-F. Freezing the entire SAM, we introduce a scale-aware fine-tuning to aggregate multi-scale masks, which only tunes 2 parameters within 10 seconds for improved performance. To demonstrate our efficacy, we construct a new dataset, PerSeg, for the evaluation of personalized object segmentation, and also test our methods on various one-shot image and video segmentation benchmarks. Besides, we propose to leverage PerSAM to improve DreamBooth for personalized text-to-image synthesis. By mitigating the disturbance of training-set backgrounds, our approach showcases better target appearance generation and higher fidelity to the input text prompt. Code is released at https://github.com/ZrrSkywalker/Personalize-SAM.", "keywords": "Segment Anything Model (SAM);one-shot learning;text-to-image generation", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/9aa69c2ce9ae5d1634a5b7c812006ea90b1c64f0.pdf", "author": "Renrui Zhang;Zhengkai Jiang;Ziyu Guo;Shilin Yan;Junting Pan;Hao Dong;Yu Qiao;Peng Gao;Hongsheng Li", "authorids": "~Renrui_Zhang1;~Zhengkai_Jiang1;~Ziyu_Guo2;~Shilin_Yan1;~Junting_Pan2;~Hao_Dong3;~Yu_Qiao1;~Peng_Gao3;~Hongsheng_Li3", "gender": "M;;F;M;M;M;;;M", "homepage": ";;;https://scholar.google.com/citations?user=2VhjOykAAAAJ&hl=en;https://junting.github.io/;https://zsdonghao.github.io;;;http://www.ee.cuhk.edu.hk/~hsli", "dblp": "244/1748;;;166/3197.html;166/1669;14/1525-3.html;;;27/7402-1", "google_scholar": "YlL3xN4AAAAJ;;S9GLetwAAAAJ;2VhjOykAAAAJ;8Xt3TnAAAAAJ;xLFL4sMAAAAJ;;;BN2Ze-QAAAAJ", "orcid": ";;;;;0000-0003-2261-9122;;;", "linkedin": ";;;;junting-pan/;;;;", "or_profile": "~Renrui_Zhang1;~Zhengkai_Jiang1;~Ziyu_Guo2;~Shilin_Yan1;~Junting_Pan2;~Hao_Dong3;~Yu_Qiao1;~Peng_Gao3;~Hongsheng_Li3", "aff": "MMLab of CUHK & Shanghai AI Laboratory;;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Fudan University;The Chinese University of Hong Kong;Peking University;;;The Chinese University of Hong Kong", "aff_domain": "pjlab.org.cn;;cse.cuhk.edu.hk;fudan.edu.cn;cuhk.edu.hk;pku.edu.cn;;;cuhk.edu.hk", "position": "PhD student;;PhD student;MS student;Postdoc;Assistant Professor;;;Associate Professor", "bibtex": "@inproceedings{\nzhang2024personalize,\ntitle={Personalize Segment Anything Model with One Shot},\nauthor={Renrui Zhang and Zhengkai Jiang and Ziyu Guo and Shilin Yan and Junting Pan and Hao Dong and Yu Qiao and Peng Gao and Hongsheng Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6Gzkhoc6YS}\n}", "github": "", "project": "", "reviewers": "Fp1Y;XA1U;inm4", "pdf_size": 8743264, "rating": "6;6;8", "confidence": "5;4;5", "soundness": "3;3;4", "contribution": "2;3;4", "presentation": "3;3;4", "wc_summary": "74;223;59", "wc_strengths": "62;252;95", "wc_weaknesses": "112;111;64", "wc_questions": "35;58;87", "wc_review": "283;644;305", "wc_reply_reviewers": "24;23;26", "wc_reply_authors": "934;591;616", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 118.66666666666667, 74.02852303147904 ], "wc_strengths_avg": [ 136.33333333333334, 82.89082511888085 ], "wc_weaknesses_avg": [ 95.66666666666667, 22.395436042987765 ], "wc_questions_avg": [ 60.0, 21.275964529643932 ], "wc_review_avg": [ 410.6666666666667, 165.2358583627927 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 1.247219128924647 ], "wc_reply_authors_avg": [ 713.6666666666666, 156.13313407331435 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 233, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3983746222204517152&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6Gzkhoc6YS", "pdf": "https://openreview.net/pdf?id=6Gzkhoc6YS", "email": "pjlab.org.cn;;cse.cuhk.edu.hk;fudan.edu.cn;cuhk.edu.hk;pku.edu.cn;;;cuhk.edu.hk", "author_num": 9, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Chinese University of Hong Kong;Fudan University;Peking University", "aff_unique_dep": "MMLab;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.fudan.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "CUHK;Fudan;Peking U", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6HABsUI6UF", "title": "Knowledge Accumulation in Continually Learned Representations and the Issue of Feature Forgetting", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "While it is established that neural networks suffer from catastrophic forgetting ``at the output level'', it is debated whether this is also the case at the level of representations. Some studies ascribe a certain level of innate robustness to representations, that they only forget minimally and no critical information, while others claim that representations are also severely affected by forgetting. To settle this debate, we first discuss how this apparent disagreement might stem from the coexistence of two phenomena that affect the quality of continually learned representations: knowledge accumulation and feature forgetting. We then show that, even though it is true that feature forgetting can be small in absolute terms, newly learned information is forgotten just as catastrophically at the level of representations as it is at the output level. Next we show that this feature forgetting is problematic as it substantially slows down knowledge accumulation. We further show that representations that are continually learned through both supervised and self-supervised learning suffer from feature forgetting. Finally, we study how feature forgetting and knowledge accumulation are affected by different types of continual learning methods.", "keywords": "continual learning;catastrophic forgetting;knowledge accumulation;conceptual analysis", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/4358574358669e6f26ba2c5bdc9c8be3cb00bb89.pdf", "author": "Timm Hess;Eli Verwimp;Gido M van de Ven;Tinne Tuytelaars", "authorids": "~Timm_Hess1;~Eli_Verwimp1;~Gido_M_van_de_Ven1;~Tinne_Tuytelaars1", "gender": "M;M;;", "homepage": ";;;", "dblp": ";290/1282;;", "google_scholar": ";HIh4i18AAAAJ;;", "orcid": ";0000-0001-5469-3103;;", "linkedin": "timm-hess-2b8664137/;;;", "or_profile": "~Timm_Hess1;~Eli_Verwimp1;~Gido_M_van_de_Ven1;~Tinne_Tuytelaars1", "aff": "KU Leuven;Department of Electrical Engineering, KU Leuven, Belgium, KU Leuven;;", "aff_domain": "esat.kuleuven.be;esat.kuleuven.be;;", "position": "PhD student;PhD student;;", "bibtex": "@misc{\nhess2024knowledge,\ntitle={Knowledge Accumulation in Continually Learned Representations and the Issue of Feature Forgetting},\nauthor={Timm Hess and Eli Verwimp and Gido M van de Ven and Tinne Tuytelaars},\nyear={2024},\nurl={https://openreview.net/forum?id=6HABsUI6UF}\n}", "github": "", "project": "", "reviewers": "BtF4;acbx;1Fn7;gcjs", "site": "https://openreview.net/forum?id=6HABsUI6UF", "pdf_size": 425328, "rating": "3;3;3;6", "confidence": "4;5;4;5", "soundness": "2;2;3;3", "contribution": "1;1;2;3", "presentation": "2;1;2;2", "wc_summary": "53;88;52;96", "wc_strengths": "15;21;31;47", "wc_weaknesses": "141;460;198;53", "wc_questions": "1;1;2;92", "wc_review": "210;570;283;288", "wc_reply_reviewers": "27;0;16;0", "wc_reply_authors": "383;593;175;99", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.25, 19.954636052807377 ], "wc_strengths_avg": [ 28.5, 12.114041439585717 ], "wc_weaknesses_avg": [ 213.0, 151.6723442160765 ], "wc_questions_avg": [ 24.0, 39.26194085880116 ], "wc_review_avg": [ 337.75, 137.59791967904167 ], "wc_reply_reviewers_avg": [ 10.75, 11.431863365173676 ], "wc_reply_authors_avg": [ 312.5, 192.444147741624 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2275169797400754065&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Katholieke Universiteit Leuven;KU Leuven", "aff_unique_dep": ";Department of Electrical Engineering", "aff_unique_url": "https://www.kuleuven.be;https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Belgium" }, { "id": "6Hv6ADDy0L", "title": "Physics-aware Hand Object Interaction Denoising", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The credibility and practicality of a reconstructed hand-object interaction sequence depend largely on its physical plausibility. However, due to high occlusions during hand-object interaction, physical plausibility remains a challenging criterion for purely vision-based tracking methods. To address this issue and enhance the results of existing hand trackers, this paper proposes a novel physically-aware hand motion de-noising method. Specifically, we introduce two learned loss terms that explicitly capture two crucial aspects of physical plausibility: grasp credibility and manipulation feasibility. These terms are used to train a physically-aware de-noising network. Qualitative and quantitative experiments demonstrate that our approach significantly improves both fine-grained physical plausibility and overall pose accuracy, surpassing current state-of-the-art de-noising methods.", "keywords": "physcis-aware neural network;hand motion denoising;hand object interaction", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/1a5f5cff33d6898be83b725ff59bac20fcd8c182.zip", "author": "Haowen Luo;Yunze Liu;Li Yi", "authorids": "~Haowen_Luo1;~Yunze_Liu2;~Li_Yi2", "gender": ";M;M", "homepage": ";https://yunzeliu.github.io;https://ericyi.github.io/", "dblp": ";282/1192;26/4239-1", "google_scholar": ";xYVEg0cAAAAJ;UyZL660AAAAJ", "orcid": ";0009-0002-3148-8822;", "linkedin": ";;", "or_profile": "~Haowen_Luo1;~Yunze_Liu2;~Li_Yi2", "aff": ";IIIS, Tsinghua University;Tsinghua University", "aff_domain": ";mails.tsinghua.edu.cn;tsinghua.edu.cn", "position": ";PhD student;Assistant Professor", "bibtex": "@misc{\nluo2024physicsaware,\ntitle={Physics-aware Hand Object Interaction Denoising},\nauthor={Haowen Luo and Yunze Liu and Li Yi},\nyear={2024},\nurl={https://openreview.net/forum?id=6Hv6ADDy0L}\n}", "github": "", "project": "", "reviewers": "CERf;Xkao;R2Kj;ZPXX", "site": "https://openreview.net/forum?id=6Hv6ADDy0L", "pdf_size": 3367826, "rating": "3;3;6;8", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "contribution": "2;2;2;3", "presentation": "2;2;2;3", "wc_summary": "51;123;57;62", "wc_strengths": "47;26;26;124", "wc_weaknesses": "100;307;117;478", "wc_questions": "15;5;55;9", "wc_review": "213;461;255;673", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.25, 28.98598799420161 ], "wc_strengths_avg": [ 55.75, 40.32601517631019 ], "wc_weaknesses_avg": [ 250.5, 154.45144868210204 ], "wc_questions_avg": [ 21.0, 19.949937343260004 ], "wc_review_avg": [ 400.5, 183.1959333609783 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8164965809277261, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2704536020240165749&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Institute for Interdisciplinary Information Sciences", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "6HwamHLDa6", "title": "A Multi-In-Single-Out Network for Video Frame Interpolation without optical flow", "track": "main", "status": "Reject", "tldr": "", "abstract": "In general, deep learning-based video frame interpolation (VFI) methods have predominantly focused on estimating motion vectors between two input frames and warping them to the target time. While this approach has shown impressive performance for linear motion between two input frames, it exhibits limitations when dealing with occlusions and nonlinear movements. Recently, generative models have been applied to VFI to address these issues. However, as VFI is not a task focused on generating plausible images, but rather on predicting accurate intermediate frames between two given frames, performance limitations still persist. In this paper, we propose a multi-in-single-out (MISO) based VFI method that does not rely on motion vector estimation, allowing it to effectively model occlusions and nonlinear motion. Additionally, we introduce a novel motion perceptual loss that enables MISO-VFI to better capture the spatio-temporal correlations within the video frames. Our MISO-VFI method achieves state-of-the-art results on VFI benchmarks Vimeo90K, Middlebury, and UCF101, with a significant performance gap compared to existing approaches.", "keywords": "Video Frame Interpolation;Video Synthesis", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/e5c88df169fad3ee2b360b46dbdfd5138a18ddfd.zip", "author": "Jaemin Lee;Minseok Seo;Sangwoo Lee;Hyobin Park;Dong-Geol Choi", "authorids": "~Jaemin_Lee2;~Minseok_Seo1;~Sangwoo_Lee2;~Hyobin_Park1;~Dong-Geol_Choi2", "gender": "M;M;M;;F", "homepage": "https://sites.google.com/view/j911/home;https://sites.google.com/view/minseokcv/%ED%99%88;;;https://github.com/hbp001", "dblp": "45/3500;;31/5983;62/6155.html;", "google_scholar": "h1R6SZMAAAAJ;https://scholar.google.co.kr/citations?user=pOygDIIAAAAJ;PLgXBtEAAAAJ;;https://scholar.google.com/citations?view_op=list_works", "orcid": "0000-0002-2154-300X;;0000-0001-5432-4770;;", "linkedin": ";;;;", "or_profile": "~Jaemin_Lee2;~Minseok_Seo1;~Sangwoo_Lee2;~Dong-Geol_Choi2;~Hyobin_Park2", "aff": "Testworks Inc.;SI Analytics;ANTLAB;Hanbat National University;Hanbat National University", "aff_domain": "testworks.co.kr;si-analytics.ai;antlab.co.kr;hanbat.ac.kr;hanbat.ac.kr", "position": "Researcher;Researcher;Researcher;Full Professor;MS student", "bibtex": "@misc{\nlee2024a,\ntitle={A Multi-In-Single-Out Network for Video Frame Interpolation without optical flow},\nauthor={Jaemin Lee and Minseok Seo and Sangwoo Lee and Hyobin Park and Dong-Geol Choi},\nyear={2024},\nurl={https://openreview.net/forum?id=6HwamHLDa6}\n}", "github": "", "project": "", "reviewers": "a9Au;CFmx;rS3w;WQaG", "site": "https://openreview.net/forum?id=6HwamHLDa6", "pdf_size": 2566335, "rating": "5;5;5;5", "confidence": "3;3;5;5", "soundness": "2;3;2;2", "contribution": "2;2;1;1", "presentation": "2;3;3;2", "wc_summary": "72;74;54;94", "wc_strengths": "32;12;18;38", "wc_weaknesses": "113;131;121;110", "wc_questions": "35;211;10;3", "wc_review": "252;428;203;245", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "469;591;422;363", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 73.5, 14.168627315304754 ], "wc_strengths_avg": [ 25.0, 10.44030650891055 ], "wc_weaknesses_avg": [ 118.75, 8.13557004763649 ], "wc_questions_avg": [ 64.75, 85.27125834652611 ], "wc_review_avg": [ 282.0, 86.35102778774552 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 461.25, 83.79849342321137 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AIUSxowBXEMJ:scholar.google.com/&scioq=A+Multi-In-Single-Out+Network+for+Video+Frame+Interpolation+without+optical+flow&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Testworks Inc.;SI Analytics;ANTLAB;Hanbat National University", "aff_unique_dep": ";;;", "aff_unique_url": ";;;https://www.hanbat.ac.kr", "aff_unique_abbr": ";;;HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;2", "aff_country_unique": "United States;;South Korea" }, { "id": "6I7UsvlDPj", "title": "LaMPP: Language Models as Probabilistic Priors for Perception and Action", "track": "main", "status": "Reject", "tldr": "", "abstract": "Language models trained on large text corpora encode rich distributional information about real-world environments and action sequences. This information plays a crucial role in current approaches to language processing tasks like question answering and instruction generation. We describe how to leverage language models for *non-linguistic* perception and control tasks. Our approach casts labeling and decision-making as inference in probabilistic graphical models in which language models parameterize prior distributions over labels, decisions and parameters, making it possible to integrate uncertain observations and incomplete background knowledge in a principled way. Applied to semantic segmentation, household navigation, and activity recognition tasks, this approach improves predictions on rare, out-of-distribution, and structurally novel inputs.", "keywords": "Machine Learning;Natural Language Processing;Language Models;Priors", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "/attachment/f9fbed4f5968a730146faac010dc312e3d283f5d.zip", "author": "Belinda Z. Li;William Chen;Pratyusha Sharma;Jacob Andreas", "authorids": "~Belinda_Z._Li1;~William_Chen1;~Pratyusha_Sharma1;~Jacob_Andreas1", "gender": "M;F;M;F", "homepage": ";https://pratyushasharma.github.io/;http://web.mit.edu/jda/www;https://belindal.github.io/", "dblp": ";228/7904;97/8154;263/9914", "google_scholar": "xUeq5EAAAAAJ;RGiCLUgAAAAJ;dnZ8udEAAAAJ;700fyvEAAAAJ", "orcid": ";;;", "linkedin": "william-chen-a3956516b/;;;", "or_profile": "~William_Chen1;~Pratyusha_Sharma1;~Jacob_Andreas1;~Belinda_Zou_Li1", "aff": "University of California, Berkeley;Massachusetts Institute of Technology;Microsoft;Massachusetts Institute of Technology", "aff_domain": "berkeley.edu;mit.edu;microsoft.com;mit.edu", "position": "PhD student;PhD student;Researcher;PhD student", "bibtex": "@misc{\nli2024lampp,\ntitle={La{MPP}: Language Models as Probabilistic Priors for Perception and Action},\nauthor={Belinda Z. Li and William Chen and Pratyusha Sharma and Jacob Andreas},\nyear={2024},\nurl={https://openreview.net/forum?id=6I7UsvlDPj}\n}", "github": "", "project": "", "reviewers": "C44C;xpjw;WR1X", "site": "https://openreview.net/forum?id=6I7UsvlDPj", "pdf_size": 39725166, "rating": "5;6;6", "confidence": "4;3;3", "soundness": "3;2;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "48;148;89", "wc_strengths": "9;46;45", "wc_weaknesses": "176;116;81", "wc_questions": "4;99;15", "wc_review": "237;409;230", "wc_reply_reviewers": "0;38;0", "wc_reply_authors": "809;1501;495", "reply_reviewers": "0;1;0", "reply_authors": "2;3;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.0, 41.04469108991645 ], "wc_strengths_avg": [ 33.333333333333336, 17.21110752456745 ], "wc_weaknesses_avg": [ 124.33333333333333, 39.228674319799396 ], "wc_questions_avg": [ 39.333333333333336, 42.42902570437156 ], "wc_review_avg": [ 292.0, 82.7808351411525 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 17.913371790059205 ], "wc_reply_authors_avg": [ 935.0, 420.250718817549 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4920872563659380315&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu;https://www.microsoft.com", "aff_unique_abbr": "UC Berkeley;MIT;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "6INCxtPVXd", "title": "Mode-Aware Continual Learning for Conditional Generative Adversarial Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "The main challenge in continual learning for generative models is to effectively learn new target modes with limited samples while preserving previously learned ones. To this end, we introduce a new continual learning approach for generative modeling in conjunction with a mode-affinity score specifically designed for conditional generative adversarial networks. First, the generator produces samples of existing modes for subsequent replay. The discriminator is then used to compute the mode similarity measure, which identifies a set of closest existing modes to the target. Subsequently, a label for the target mode is generated and given as a weighted average of the labels within this set. We extend the continual learning model by training it on the target data with the newly-generated label, while performing memory replay to mitigate the risk of catastrophic forgetting. Experimental results on benchmark datasets demonstrate the gains of our approach over the state-of-the-art methods, even when using fewer training samples.", "keywords": "continual learning;generative model;mode affinity", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Cat Phuoc Le;Juncheng Dong;Ahmed Aloui;Vahid Tarokh", "authorids": "~Cat_Phuoc_Le1;~Juncheng_Dong1;~Ahmed_Aloui1;~Vahid_Tarokh1", "gender": "M;;M;", "homepage": "https://scholars.duke.edu/person/cat.le;;;", "dblp": "251/5583;;116/6738;", "google_scholar": "gSzKGdQAAAAJ;;;", "orcid": "0000-0002-9121-9395;;;", "linkedin": "catphuocle/;;ahmed-aloui-b06547153/;", "or_profile": "~Cat_Phuoc_Le1;~Juncheng_Dong1;~Ahmed_Aloui1;~Vahid_Tarokh1", "aff": "Duke University;;Duke University, Duke University;", "aff_domain": "duke.edu;;ece.duke.edu;", "position": "Postdoc;;PhD student;", "bibtex": "@misc{\nle2024modeaware,\ntitle={Mode-Aware Continual Learning for Conditional Generative Adversarial Networks},\nauthor={Cat Phuoc Le and Juncheng Dong and Ahmed Aloui and Vahid Tarokh},\nyear={2024},\nurl={https://openreview.net/forum?id=6INCxtPVXd}\n}", "github": "", "project": "", "reviewers": "M2Vt;DKw7;cors;y3EF", "site": "https://openreview.net/forum?id=6INCxtPVXd", "pdf_size": 9176444, "rating": "3;3;5;5", "confidence": "5;4;4;5", "soundness": "2;2;3;2", "contribution": "1;1;3;2", "presentation": "1;1;3;2", "wc_summary": "53;73;93;108", "wc_strengths": "16;30;63;46", "wc_weaknesses": "294;273;167;169", "wc_questions": "5;1;82;20", "wc_review": "368;377;405;343", "wc_reply_reviewers": "0;0;137;0", "wc_reply_authors": "447;432;462;317", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 81.75, 20.72890493972125 ], "wc_strengths_avg": [ 38.75, 17.5695048308141 ], "wc_weaknesses_avg": [ 225.75, 58.22961016527588 ], "wc_questions_avg": [ 27.0, 32.53459696999488 ], "wc_review_avg": [ 373.25, 22.16275028059469 ], "wc_reply_reviewers_avg": [ 34.25, 59.322740159234044 ], "wc_reply_authors_avg": [ 414.5, 57.282196186948 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2098866224242859711&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Conserve-Update-Revise to Cure Generalization and Robustness Trade-off in Adversarial Training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19397", "id": "6IjN7oxjXt", "author_site": "Shruthi Gowda, Bahram Zonooz, Elahe Arani", "tldr": "", "abstract": "Adversarial training improves the robustness of neural networks against adversarial attacks, albeit at the expense of the trade-off between standard and robust generalization. To unveil the underlying factors driving this phenomenon, we examine the layer-wise learning capabilities of neural networks during the transition from a standard to an adversarial setting. Our empirical findings demonstrate that selectively updating specific layers while preserving others can substantially enhance the network's learning capacity. We, therefore, propose CURE, a novel training framework that leverages a gradient prominence criterion to perform selective conservation, updating, and revision of weights. Importantly, CURE is designed to be dataset- and architecture-agnostic, ensuring its applicability across various scenarios. It effectively tackles both memorization and overfitting issues, thus enhancing the trade-off between robustness and generalization and additionally, this training approach also aids in mitigating \"robust overfitting\". Furthermore, our study provides valuable insights into the mechanisms of selective adversarial training and offers a promising avenue for future research.", "keywords": "Adversarial training;Adversarial Robustness;Generalization;Robustness;Robust overfitting;Selective training", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Shruthi Gowda;Bahram Zonooz;Elahe Arani", "authorids": "~Shruthi_Gowda1;~Bahram_Zonooz1;~Elahe_Arani1", "gender": "F;M;F", "homepage": ";https://sites.google.com/view/bahramzonooz;https://sites.google.com/view/elahe-arani", "dblp": ";250/9573;", "google_scholar": "e9CgwRgAAAAJ;;e_I_v6cAAAAJ", "orcid": ";;0000-0002-0952-7007", "linkedin": "shruthi-gowda-19811b15/;;elahe-arani-630870b2/", "or_profile": "~Shruthi_Gowda1;~Bahram_Zonooz1;~Elahe_Arani1", "aff": "Eindhoven University of Technology;Eindhoven University of Technology;Wayve Technologies Ltd", "aff_domain": "tue.nl;tue.nl;wayve.ai", "position": "PhD student;Assistant Professor;Head of AI Research", "bibtex": "@inproceedings{\ngowda2024conserveupdaterevise,\ntitle={Conserve-Update-Revise to Cure Generalization and Robustness Trade-off in Adversarial Training},\nauthor={Shruthi Gowda and Bahram Zonooz and Elahe Arani},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6IjN7oxjXt}\n}", "github": "", "project": "", "reviewers": "PH92;DhtN;i2U9;HvAu", "pdf_size": 2312333, "rating": "5;5;6;6", "confidence": "3;1;4;3", "soundness": "2;2;3;3", "contribution": "2;2;2;4", "presentation": "3;3;3;4", "wc_summary": "119;41;105;75", "wc_strengths": "107;8;42;52", "wc_weaknesses": "399;176;19;286", "wc_questions": "101;36;42;78", "wc_review": "726;261;208;491", "wc_reply_reviewers": "507;0;11;173", "wc_reply_authors": "3007;1129;873;2034", "reply_reviewers": "1;0;1;1", "reply_authors": "5;3;2;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.0, 29.966648127543394 ], "wc_strengths_avg": [ 52.25, 35.56947427219019 ], "wc_weaknesses_avg": [ 220.0, 140.29789734703795 ], "wc_questions_avg": [ 64.25, 26.61179249881526 ], "wc_review_avg": [ 421.5, 205.48296766398911 ], "wc_reply_reviewers_avg": [ 172.75, 204.77350292457274 ], "wc_reply_authors_avg": [ 1760.75, 838.8999865895814 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1547884356502405058&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=6IjN7oxjXt", "pdf": "https://openreview.net/pdf?id=6IjN7oxjXt", "email": "tue.nl;tue.nl;wayve.ai", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Eindhoven University of Technology;Wayve Technologies", "aff_unique_dep": ";", "aff_unique_url": "https://www.tue.nl;https://www.wayvetechnologies.com", "aff_unique_abbr": "TU/e;Wayve", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Netherlands;United Kingdom" }, { "title": "Principled Federated Domain Adaptation: Gradient Projection and Auto-Weighting", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19396", "id": "6J3ehSUrMU", "author_site": "Enyi Jiang, Yibo Jacky Zhang, Sanmi Koyejo", "tldr": "", "abstract": "Federated Domain Adaptation (FDA) describes the federated learning (FL) setting where source clients and a server work collaboratively to improve the performance of a target client where limited data is available. The domain shift between the source and target domains, coupled with limited data of the target client, makes FDA a challenging problem, e.g., common techniques such as federated averaging and fine-tuning fail due to domain shift and data scarcity. \nTo theoretically understand the problem, we introduce new metrics that characterize the FDA setting and a theoretical framework with novel theorems for analyzing the performance of server aggregation rules. Further, we propose a novel lightweight aggregation rule, Federated Gradient Projection ($\\texttt{FedGP}$), which significantly improves the target performance with domain shift and data scarcity. Moreover, our theory suggests an $\\textit{auto-weighting scheme}$ that finds the optimal combinations of the source and target gradients. This scheme improves both $\\texttt{FedGP}$ and a simpler heuristic aggregation rule. Extensive experiments verify the theoretical insights and illustrate the effectiveness of the proposed methods in practice.", "keywords": "federated domain adaptation;federated learning;domain adaptation;machine learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/16734c7844700ed71efbac5551d4b1fcf363e141.pdf", "author": "Enyi Jiang;Yibo Jacky Zhang;Sanmi Koyejo", "authorids": "~Enyi_Jiang1;~Yibo_Jacky_Zhang1;~Sanmi_Koyejo1", "gender": "F;M;", "homepage": "https://enyijiang.github.io/;https://cs.stanford.edu/~sanmi/;https://yiboz.me/", "dblp": "268/2122;14/8885;251/9129.html", "google_scholar": "h6YC9nwAAAAJ;EaaOeJwAAAAJ;", "orcid": ";0000-0002-4023-419X;", "linkedin": "enyi-jiang-16a561171/;sanmi-koyejo-984754/;", "or_profile": "~Enyi_Jiang1;~Oluwasanmi_O_Koyejo1;~Jacky_Yibo_Zhang1", "aff": "University of Illinois, Urbana Champaign;Google;University of Illinois, Urbana Champaign", "aff_domain": "uiuc.edu;google.com;illinois.edu", "position": "PhD student;Research Scientist;PhD student", "bibtex": "@inproceedings{\njiang2024principled,\ntitle={Principled Federated Domain Adaptation: Gradient Projection and Auto-Weighting},\nauthor={Enyi Jiang and Yibo Jacky Zhang and Sanmi Koyejo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6J3ehSUrMU}\n}", "github": "", "project": "", "reviewers": "nVHX;d97G;tJ7A;28v7", "pdf_size": 975660, "rating": "5;6;6;6", "confidence": "5;3;4;3", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "50;59;101;154", "wc_strengths": "22;34;92;67", "wc_weaknesses": "54;68;197;334", "wc_questions": "22;106;32;24", "wc_review": "148;267;422;579", "wc_reply_reviewers": "0;72;0;30", "wc_reply_authors": "582;932;466;1080", "reply_reviewers": "0;2;0;1", "reply_authors": "1;3;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.0, 41.15215668710451 ], "wc_strengths_avg": [ 53.75, 27.55335732719336 ], "wc_weaknesses_avg": [ 163.25, 113.25055187503503 ], "wc_questions_avg": [ 46.0, 34.84250278036869 ], "wc_review_avg": [ 354.0, 162.21436434545492 ], "wc_reply_reviewers_avg": [ 25.5, 29.508473359359 ], "wc_reply_authors_avg": [ 765.0, 250.00199999200007 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4689861876947239151&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=6J3ehSUrMU", "pdf": "https://openreview.net/pdf?id=6J3ehSUrMU", "email": "uiuc.edu;google.com;illinois.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://illinois.edu;https://www.google.com", "aff_unique_abbr": "UIUC;Google", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Urbana-Champaign;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6J7WG1jyog", "title": "AceGPT, Localizing Large Language Models in Arabic", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper underscores the critical necessity and methodology for developing a localized Large Language Model (LLM) specifically for Arabic, a language imbued with unique cultural characteristics inadequately addressed by current mainstream models like ChatGPT. Significant concerns emerge when addressing cultural sensitivity and local values. To address this, the paper proposes a comprehensive solution that includes further pre-training with Arabic texts, Supervised Fine-Tuning (SFT) utilizing native Arabic instructions, and GPT-4 responses in Arabic, alongside Reinforcement Learning with AI Feedback (RLAIF) employing a reward model attuned to local culture and values. The goal is to cultivate culturally cognizant and value-aligned Arabic LLMs capable of accommodating the diverse, application-specific needs of Arabic-speaking communities.\n Comprehensive evaluations reveal that the resulting model, dubbed AceGPT, sets the state-of-the-art standard for open Arabic LLMs across various benchmarks, including the instruction-following benchmark (i.e., Arabic Vicuna-80 and Arabic AlpacaEval), knowledge benchmark (i.e., Arabic MMLU and EXAMs), and the newly introduced Arabic cultural \\& value alignment benchmark. Notably, AceGPT outperforms ChatGPT in the popular Vicuna-80 benchmark when evaluated with GPT-4, despite the benchmark's limited scale.", "keywords": "AceGPT;Arabic;Large Language Model;Localization", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Huang Huang;Fei Yu;Jianqing Zhu;Xuening Sun;Hao Cheng;Song Dingjie;Zhihong Chen;Mosen Alharthi;Bang An;Ziche Liu;Zhang Zhiyi;Junying Chen;Jianquan Li;Benyou Wang;Lian Zhang;Ruoyu Sun;Xiang Wan;Haizhou Li;Jinchao Xu", "authorids": "~Huang_Huang2;~Fei_Yu3;~Jianqing_Zhu2;~Xuening_Sun2;~Hao_Cheng18;~Song_Dingjie1;~Zhihong_Chen2;~Mosen_Alharthi1;~Bang_An3;~Ziche_Liu1;~Zhang_Zhiyi2;~Junying_Chen2;~Jianquan_Li1;~Benyou_Wang2;~Lian_Zhang2;~Ruoyu_Sun1;~Xiang_Wan1;~Haizhou_Li3;~Jinchao_Xu1", "gender": "M;F;M;M;M;M;M;M;;M;M;M;M;M;;M;M;M;M", "homepage": ";;https://github.com/614479467;https://markch00.github.io/;https://bbsngg.github.io/;;https://scholar.google.com/citations?hl=en&user=eMfvcJ4AAAAJ;https://cemse.kaust.edu.sa/amcs/people/person/bang;https://zicheliu.com/;https://github.com/zhangzhiyi23;;;https://wabyking.github.io/old.html;;https://ruoyus.github.io/;http://www.sribd.cn/teacher/28;https://colips.org/~eleliha/;https://www.personal.psu.edu/jxx1/;", "dblp": ";;;;262/3284;78/3726;;;;;;;169/1793;;30/9879-1;;36/4118;;", "google_scholar": "0JhMor8AAAAJ;EsCgPkQAAAAJ;;;YLQ8DCsAAAAJ;y55sF8cAAAAJ;;;cQ0xpOkAAAAJ;;https://scholar.google.com.hk/citations?user=I0raPTYAAAAJ;https://scholar.google.com/citations?hl=en;Jk4vJU8AAAAJ;;PsfzbCMAAAAJ;;https://scholar.google.com.sg/citations?user=z8_x7C8AAAAJ;pBHiYxcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;;;;;;0000-0002-1501-9914;;;;0000-0001-9158-9401;;", "linkedin": ";;;;;;;;;;;;;lian-zhang-47027a187/;;;haizhou-li-4ba74b6/;;", "or_profile": "~Huang_Huang2;~Fei_Yu3;~Xuening_Sun2;~Hao_Cheng18;~Song_Dingjie1;~Zhihong_Chen2;~Mosen_Alharthi1;~Bang_An3;~Ziche_Liu1;~Zhang_Zhiyi2;~Junying_Chen2;~Jianquan_Li1;~Benyou_Wang2;~Lian_Zhang2;~Ruoyu_Sun1;~Xiang_Wan1;~Haizhou_Li3;~Jinchao_Xu1;~Zhu_Jianqing2", "aff": "Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong, Shenzhen;Bupt;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;King Abdullah University of Science and Technology;;The Chinese University of Hong Kong, Shenzhen;;Chinese University of Hong Kong, Shenzhen;;The Chinese University of Hong Kong, Shenzhen;Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong;Shenzhen Research Institute of Big Data;National University of Singapore;Pennsylvania State University;King Abdullah University of Science and Technology", "aff_domain": "sribd.cn;link.cuhk.edu.cn;cs.umass.edu;cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn;kaust.edu.sa;;cuhk.edu.cn;;cuhk.hk;;cuhk.edu.cn;sribd.cn;cuhk.edu.cn;sribd.cn;nus.edu.sg;psu.edu;kaust.edu.sa", "position": "Researcher;PhD student;MS student;MS student;Researcher;PhD student;PhD student;;Undergrad student;;PhD student;;Assistant Professor;Researcher;Associate Professor;Principal Researcher;Full Professor;Full Professor;Postdoc", "bibtex": "@misc{\nhuang2024acegpt,\ntitle={Ace{GPT}, Localizing Large Language Models in Arabic},\nauthor={Huang Huang and Fei Yu and Jianqing Zhu and Xuening Sun and Hao Cheng and Song Dingjie and Zhihong Chen and Mosen Alharthi and Bang An and Ziche Liu and Zhang Zhiyi and Junying Chen and Jianquan Li and Benyou Wang and Lian Zhang and Ruoyu Sun and Xiang Wan and Haizhou Li and Jinchao Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=6J7WG1jyog}\n}", "github": "", "project": "", "reviewers": "P1tY;CfmQ;vrrL", "site": "https://openreview.net/forum?id=6J7WG1jyog", "pdf_size": 636383, "rating": "3;3;5", "confidence": "4;4;3", "soundness": "3;3;3", "contribution": "2;2;2", "presentation": "3;3;3", "wc_summary": "96;98;38", "wc_strengths": "38;81;36", "wc_weaknesses": "169;158;39", "wc_questions": "52;229;21", "wc_review": "355;566;134", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 77.33333333333333, 27.824849006278942 ], "wc_strengths_avg": [ 51.666666666666664, 20.75786330258702 ], "wc_weaknesses_avg": [ 122.0, 58.86141916966211 ], "wc_questions_avg": [ 100.66666666666667, 91.62362625921813 ], "wc_review_avg": [ 351.6666666666667, 176.37901109699973 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 19, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8712652082820375035&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;1;1;1;3;1;1;1;0;1;0;4;5;3", "aff_unique_norm": "Shenzhen Research Institute of Big Data;Chinese University of Hong Kong;Beijing University of Posts and Telecommunications;King Abdullah University of Science and Technology;National University of Singapore;Pennsylvania State University", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.sribd.cn;https://www.cuhk.edu.cn;http://www.bupt.edu.cn/;https://www.kast.kau.edu.sa;https://www.nus.edu.sg;https://www.psu.edu", "aff_unique_abbr": ";CUHK;BUPT;KAUST;NUS;PSU", "aff_campus_unique_index": "1;1;1;1;1;1;1;2", "aff_campus_unique": ";Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0;0;0;0;0;2;3;1", "aff_country_unique": "China;Saudi Arabia;Singapore;United States" }, { "id": "6JcbNMEFPw", "title": "Large Language Models Can Be Good Privacy Protection Learners", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The proliferation of Large Language Models (LLMs) has driven considerable interest in fine-tuning them with domain-specific data to create specialized language models. \nNevertheless, such domain-specific fine-tuning data often contains sensitive personally identifiable information (PII). Direct fine-tuning LLMs on this data without privacy protection poses a risk of leakage. \nTo address this challenge, we introduce Privacy Protection Language Models (PPLM), a novel paradigm for fine-tuning LLMs that effectively injects domain-specific knowledge while safeguarding data privacy. \nOur work offers a theoretical analysis for model design and delves into various techniques such as corpus curation, penalty-based unlikelihood in training loss, and instruction-based tuning, etc. Extensive experiments across diverse datasets and scenarios demonstrate the effectiveness of our approaches. In particular, instruction tuning with both positive and negative examples, stands out as a promising method, effectively protecting private data while enhancing the model's knowledge. Our work underscores the potential for Large Language Models as robust privacy protection learners.", "keywords": "Large language model;Privacy protection", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Yijia Xiao;Yiqiao Jin;Yushi Bai;Yue Wu;Xianjun Yang;Xiao Luo;Wenchao Yu;Xujiang Zhao;Yanchi Liu;Haifeng Chen;Wei Wang;Wei Cheng", "authorids": "~Yijia_Xiao1;~Yiqiao_Jin1;~Yushi_Bai1;~Yue_Wu12;~Xianjun_Yang1;~Xiao_Luo3;~Wenchao_Yu1;~Xujiang_Zhao1;~Yanchi_Liu1;~Haifeng_Chen1;~Wei_Wang13;~Wei_Cheng1", "gender": "M;M;M;M;M;M;;M;M;;F;M", "homepage": "https://yijia-xiao.com;https://ahren09.github.io/;https://bys0318.github.io/;https://yuewu.us/;;http://luoxiao12.github.io;;https://zxj32.github.io/;;https://haifengchen.gitlab.io/intro/;http://www.cs.ucla.edu/~weiwang;https://chengw07.github.io/", "dblp": "238/7281.html;207/6631.html;302/4421;41/5979-11;37/10237;50/1585-1;07/8491;221/5767;62/8146;08/57-1.html;w/WeiWang.html;89/2506-2.html", "google_scholar": "xLwcZvYAAAAJ;eY85qm4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;kSQ1mLYAAAAJ;Tunh15sAAAAJ;https://scholar.google.com.hk/citations?;;k2-JcFAAAAAJ;faLmr-YAAAAJ;QzakB68AAAAJ;UedS9LQAAAAJ;PRrGVmoAAAAJ", "orcid": ";0000-0002-6974-5970;;;0000-0003-3318-8444;;;;;;0000-0002-8180-2886;", "linkedin": "yijia-xiao/;ahren-jin/;;;xianjun-yang-0062aa1a6/;%E9%9C%84-%E7%BD%97-303548214/;;zxj32/;;;wei-wang-8800845/;wei-cheng-ml/", "or_profile": "~Yijia_Xiao1;~Yiqiao_Jin1;~Yushi_Bai1;~Yue_Wu12;~Xianjun_Yang1;~Xiao_Luo3;~Wenchao_Yu1;~Xujiang_Zhao1;~Yanchi_Liu1;~Haifeng_Chen1;~Wei_Wang13;~Wei_Cheng1", "aff": "University of California, Los Angeles;Georgia Institute of Technology;Tsinghua University;University of California, Los Angeles;University of California, Santa Barbara;University of California, Los Angeles;University of California, Los Angeles;NEC Labs America;NEC-Labs;NEC-Labs;University of California, Los Angeles;NEC-Labs", "aff_domain": "ucla.edu;gatech.edu;tsinghua.edu.cn;ucla.edu;ucsb.edu;cs.ucla.edu;ucla.edu;nec-labs.com;nec-labs.com;nec-labs.com;ucla.edu;nec-labs.com", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Postdoc;PhD student;Researcher;Researcher;Researcher;Full Professor;Principal Researcher", "bibtex": "@misc{\nxiao2024large,\ntitle={Large Language Models Can Be Good Privacy Protection Learners},\nauthor={Yijia Xiao and Yiqiao Jin and Yushi Bai and Yue Wu and Xianjun Yang and Xiao Luo and Wenchao Yu and Xujiang Zhao and Yanchi Liu and Haifeng Chen and Wei Wang and Wei Cheng},\nyear={2024},\nurl={https://openreview.net/forum?id=6JcbNMEFPw}\n}", "github": "", "project": "", "reviewers": "wbo6;UJs9;qBeM;8QZB", "site": "https://openreview.net/forum?id=6JcbNMEFPw", "pdf_size": 1268612, "rating": "5;5;5;6", "confidence": "4;3;4;4", "soundness": "3;2;2;3", "contribution": "2;2;2;2", "presentation": "3;1;3;3", "wc_summary": "144;28;85;72", "wc_strengths": "70;40;41;48", "wc_weaknesses": "281;32;76;73", "wc_questions": "369;297;96;1", "wc_review": "864;397;298;194", "wc_reply_reviewers": "0;142;57;0", "wc_reply_authors": "1744;492;727;1005", "reply_reviewers": "0;2;1;0", "reply_authors": "7;6;6;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 82.25, 41.43896113562694 ], "wc_strengths_avg": [ 49.75, 12.090802289343747 ], "wc_weaknesses_avg": [ 115.5, 97.11977141653496 ], "wc_questions_avg": [ 190.75, 148.36167800345208 ], "wc_review_avg": [ 438.25, 256.0726215353762 ], "wc_reply_reviewers_avg": [ 49.75, 58.12217735081851 ], "wc_reply_authors_avg": [ 992.0, 470.6107733573468 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 5.5, 1.5 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;2;0;3;0;0;4;5;5;0;5", "aff_unique_norm": "University of California, Los Angeles;Georgia Institute of Technology;Tsinghua University;University of California, Santa Barbara;NEC Labs America;NEC Laboratories", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ucla.edu;https://www.gatech.edu;https://www.tsinghua.edu.cn;https://www.ucsb.edu;https://www.nec-labs.com;https://www.nec-labs.com", "aff_unique_abbr": "UCLA;Georgia Tech;THU;UCSB;NEC LA;NEC-Labs", "aff_campus_unique_index": "0;0;2;0;0;0", "aff_campus_unique": "Los Angeles;;Santa Barbara", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "6K81ILDnuv", "title": "Learning from Integral Losses in Physics Informed Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "This work proposes a solution for the problem of training physics-informed networks under partial integro-differential equations. These equations require an infinite or a large number of neural evaluations to construct a single residual for training. As a result, accurate evaluation may be impractical, and we show that naive approximations at replacing these integrals with unbiased estimates lead to biased loss functions and solutions. To overcome this bias, we investigate three types of potential solutions: the deterministic sampling approach, the double-sampling trick, and the delayed target method. We consider three classes of PDEs for benchmarking; one defining Poisson problems with singular charges and weak solutions of up to 10 dimensions, another involving weak solutions on electro-magnetic fields and a Maxwell equation, and a third one defining a Smoluchowski coagulation problem. Our numerical results confirm the existence of the aforementioned bias in practice, and also show that our proposed delayed target approach can lead to accurate solutions with comparable quality to ones estimated with a large number of samples. Our implementation is open-source and available at https://anonymous.4open.science/r/btspinn.", "keywords": "Integral Losses;Partial Integro-Differential Equations;Physics Informed Neural Networks; Delayed Target Method", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "", "author": "Ehsan Saleh;Saba Ghaffari;Tim Bretl;Luke Olson;Matthew West", "authorids": "~Ehsan_Saleh1;~Saba_Ghaffari1;~Tim_Bretl1;~Luke_Olson1;~Matthew_West1", "gender": ";F;M;M;", "homepage": ";;http://bretl.csl.illinois.edu/;http://lukeo.cs.illinois.edu/;http://lagrange.mechse.illinois.edu", "dblp": ";;29/2834;06/8561.html;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=ab_0lGcAAAAJ;o43oc6AAAAAJ;", "orcid": ";;;0000-0002-5283-6104;0000-0002-7605-0050", "linkedin": ";saba-ghaffari-171a3356/;;;", "or_profile": "~Ehsan_Saleh1;~Saba_Ghaffari1;~Tim_Bretl1;~Luke_Olson1;~Matthew_West1", "aff": ";University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": ";illinois.edu;illinois.edu;illinois.edu;illinois.edu", "position": ";PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nsaleh2024learning,\ntitle={Learning from Integral Losses in Physics Informed Neural Networks},\nauthor={Ehsan Saleh and Saba Ghaffari and Tim Bretl and Luke Olson and Matthew West},\nyear={2024},\nurl={https://openreview.net/forum?id=6K81ILDnuv}\n}", "github": "", "project": "", "reviewers": "LK81;izgt;jJr8;WnMU", "site": "https://openreview.net/forum?id=6K81ILDnuv", "pdf_size": 1776918, "rating": "5;5;5;6", "confidence": "3;4;4;1", "soundness": "2;2;4;3", "contribution": "2;3;3;3", "presentation": "1;2;3;3", "wc_summary": "55;117;81;103", "wc_strengths": "29;23;73;41", "wc_weaknesses": "54;75;484;61", "wc_questions": "305;41;104;5", "wc_review": "443;256;742;210", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 89.0, 23.45207879911715 ], "wc_strengths_avg": [ 41.5, 19.30673457630782 ], "wc_weaknesses_avg": [ 168.5, 182.31086089424295 ], "wc_questions_avg": [ 113.75, 115.9641647234179 ], "wc_review_avg": [ 412.75, 209.1642596143041 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8952720426583083926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "UniTabE: A Universal Pretraining Protocol for Tabular Foundation Model in Data Science", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19395", "id": "6LLho5X6xV", "author_site": "Yazheng Yang, Yuqi Wang, Guang Liu, Ledell Wu, Qi Liu", "tldr": "", "abstract": "Recent advancements in Natural Language Processing (NLP) have witnessed the groundbreaking impact of pretrained models, yielding impressive outcomes across various tasks. This study seeks to extend the power of pretraining methodologies to facilitating the prediction over tables in data science, a domain traditionally overlooked, yet inherently challenging due to the plethora of table schemas intrinsic to different tasks. The primary research questions underpinning this work revolve around the establishment of a universal pretraining protocol for tables with varied structures, the generalizability and transferability of learned knowledge across tasks, the adaptation to diverse downstream applications, and the incorporation of incremental columns over time. In response to these challenges, we introduce UniTabE, a straightforward yet effective method designed to process tables in a uniform manner, devoid of constraints imposed by specific table structures. UniTabE's core concept relies on representing each basic table element with a module, termed TabUnit. This is subsequently followed by a Transformer encoder to refine the representation. Moreover, our model is designed to facilitate pretraining and finetuning through the utilization of free-form prompts. In order to implement the pretraining phase, we curated an expansive tabular dataset comprising approximately 13 billion samples, meticulously gathered from the Kaggle platform. This research primarily centers on classification and regression tasks involving tabular data, and conducts rigorous experimental testing and analyses to validate the effectiveness of our methodology. The experimental results demonstrate UniTabE's superior performance against several baseline models across a multitude of benchmark datasets. This, therefore, underscores UniTabE's potential to significantly enhance the semantic representation of tabular data, thereby marking a significant stride for tabular data analysis.", "keywords": "Pre-training Tabular Encoder;Pre-training;Heterogeneous Tabular Data;Classification and Regression;Deep Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yazheng Yang;Yuqi Wang;Guang Liu;Ledell Wu;Qi Liu", "authorids": "~Yazheng_Yang2;~Yuqi_Wang5;~Guang_Liu2;~Ledell_Wu1;~Qi_Liu5", "gender": ";M;F;M;M", "homepage": "https://www.linkedin.com/in/yazheng-yang-zju-cs/;https://bugggggggg.github.io/;;http://leuchine.github.io/;", "dblp": "222/9478;20/1168-3.html;206/6528;;193/2510-6", "google_scholar": "SAeMbW4AAAAJ;1fazBgUAAAAJ;-eJHVt8AAAAJ;Y-OeKMwAAAAJ;PqsOPSwAAAAJ", "orcid": "0000-0003-1627-8341;0009-0000-2930-0654;;0000-0003-4608-5778;", "linkedin": "yazheng-yang-zju-cs/;yuqi-wang-2746a8258/;;;", "or_profile": "~Yazheng_Yang2;~Yuqi_Wang5;~Ledell_Wu1;~Qi_Liu5;~Peter_Guang1", "aff": "The University of Hong Kong;University of Hong Kong;Creatify AI;University of Hong Kong;Beijing Academy of Artificial Intelligence", "aff_domain": "hku.hk;hku.hk;creatify.ai;hku.hk;baai.ac.cn", "position": "PhD student;MS student;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nyang2024unitabe,\ntitle={UniTabE: A Universal Pretraining Protocol for Tabular Foundation Model in Data Science},\nauthor={Yazheng Yang and Yuqi Wang and Guang Liu and Ledell Wu and Qi Liu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6LLho5X6xV}\n}", "github": "", "project": "", "reviewers": "3wag;KBNh;es5d", "pdf_size": 2155377, "rating": "5;6;8", "confidence": "3;5;3", "soundness": "2;4;3", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "202;105;109", "wc_strengths": "157;147;114", "wc_weaknesses": "252;164;163", "wc_questions": "56;112;99", "wc_review": "667;528;485", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "649;739;739", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 138.66666666666666, 44.81319250201019 ], "wc_strengths_avg": [ 139.33333333333334, 18.372685039360892 ], "wc_weaknesses_avg": [ 193.0, 41.72129751897305 ], "wc_questions_avg": [ 89.0, 23.930454794396756 ], "wc_review_avg": [ 560.0, 77.67024312223225 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 709.0, 42.42640687119285 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.18898223650461365, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10684779965231187666&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=6LLho5X6xV", "pdf": "https://openreview.net/pdf?id=6LLho5X6xV", "email": "hku.hk;hku.hk;creatify.ai;hku.hk;baai.ac.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Hong Kong;Creatify AI;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hku.hk;;https://www.baaic.cn", "aff_unique_abbr": "HKU;;BAAI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "6LNTSrJjBe", "title": "Language Agent Tree Search Unifies Reasoning Acting and Planning in Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "While large language models (LLMs) have demonstrated impressive performance on a range of decision-making tasks, they rely on simple acting processes and fall short of broad deployment as autonomous agents. We introduce LATS (Language Agent Tree Search), a general framework that synergizes the capabilities of LLMs in planning, acting, and reasoning. Drawing inspiration from Monte Carlo tree search in model-based reinforcement learning, LATS employs LLMs as agents, value functions, and optimizers, repurposing their latent strengths for enhanced decision-making. What is crucial in this method is the use of an environment for external feedback, which offers a more deliberate and adaptive problem-solving mechanism that moves beyond the limitations of existing methods. Our experimental evaluation across diverse domains, such as programming, HotPotQA, and WebShop, demonstrates the superiority and versatility of LATS for both reasoning and acting. In particular, LATS achieves 94.4\\% for programming on HumanEval with GPT-4 and an average score of 75.9 for web browsing on WebShop, demonstrating the effectiveness and generality of our method.", "keywords": "large language models;agent;reasoning;decision-making", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Andy Zhou;Kai Yan;Michal Shlapentokh-Rothman;Haohan Wang;Yu-Xiong Wang", "authorids": "~Andy_Zhou2;~Kai_Yan1;~Michal_Shlapentokh-Rothman1;~Haohan_Wang1;~Yu-Xiong_Wang1", "gender": "M;M;M;;F", "homepage": "https://www.andyzhou.ai;https://kaiyan289.github.io/;http://cs.cmu.edu/~haohanw;https://yxw.cs.illinois.edu/;https://michalmsr.web.illinois.edu/", "dblp": ";;132/4066;35/10700;269/4751", "google_scholar": "https://scholar.google.com/citations?hl=en;KElKfgQAAAAJ;nZxJGeUAAAAJ;T_Q-xDkAAAAJ;x9szIWsAAAAJ", "orcid": ";;;;", "linkedin": "andy-zhou-679376206/;%E5%BC%80-%E9%A2%9C-18b7931b1/;haohanwang/;;michal-shlapentokh-rothman/", "or_profile": "~Andy_Zhou2;~Kai_Yan1;~Haohan_Wang1;~Yu-Xiong_Wang1;~Michal_M_Shlapentokh-Rothman1", "aff": "Department of Computer Science;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;cs.illinois.edu;illinois.edu;cs.illinois.edu;illinois.edu", "position": "Undergrad student;PhD student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@misc{\nzhou2024language,\ntitle={Language Agent Tree Search Unifies Reasoning Acting and Planning in Language Models},\nauthor={Andy Zhou and Kai Yan and Michal Shlapentokh-Rothman and Haohan Wang and Yu-Xiong Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=6LNTSrJjBe}\n}", "github": "", "project": "", "reviewers": "7QYU;j4Tk;3Lmb;15HT", "site": "https://openreview.net/forum?id=6LNTSrJjBe", "pdf_size": 782168, "rating": "3;5;5;6", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "contribution": "2;2;2;3", "presentation": "3;1;2;3", "wc_summary": "66;27;57;43", "wc_strengths": "29;25;44;55", "wc_weaknesses": "179;241;146;93", "wc_questions": "151;14;208;8", "wc_review": "425;307;455;199", "wc_reply_reviewers": "185;412;86;11", "wc_reply_authors": "2649;1526;2304;1340", "reply_reviewers": "1;2;1;1", "reply_authors": "5;3;4;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 48.25, 14.7542366796795 ], "wc_strengths_avg": [ 38.25, 11.986972094736853 ], "wc_weaknesses_avg": [ 164.75, 53.65806090421084 ], "wc_questions_avg": [ 95.25, 86.6526831667664 ], "wc_review_avg": [ 346.5, 101.5517109653993 ], "wc_reply_reviewers_avg": [ 173.5, 150.89483092538325 ], "wc_reply_authors_avg": [ 1954.75, 539.8385754093533 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 191, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5570756468692384789&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Unknown Institution;University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": ";https://illinois.edu", "aff_unique_abbr": ";UIUC", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "1;1;1;1", "aff_country_unique": ";United States" }, { "id": "6LyO8WTVTU", "title": "A Teacher-Guided Framework for Graph Representation Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We consider the problem of unsupervised representation learning for Graph Neural Networks (GNNs). \nSeveral state-of-the-art approaches to this problem are based on Contrastive Learning (CL) principles that generate transferable representations. \nTheir objective function can be posed as a supervised discriminative task using 'hard labels', as they consider each pair of graphs as either 'equally positive' or 'equally negative'.\nHowever, it has been observed that using 'soft labels' in a Bayesian way can reduce the variance of the risk for discriminative tasks in supervised settings. \nMotivated by this, we propose a CL framework for GNNs, called *Teacher-guided Graph Contrastive Learning (TGCL)*, that incorporates `soft labels' to facilitate a more regularized discrimination. \nIn particular, we propose a teacher-student framework where the student network learns the representation by distilling the representations produced by the teacher network trained using unlabelled graphs. \nOur proposed approach can be adapted to any existing CL methods and empirically improves the performance across diverse downstream tasks.", "keywords": "graph neural network;representation learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Arnab Kumar Mondal;Jay Nandy;Manohar Kaul;Prathosh AP", "authorids": "~Arnab_Kumar_Mondal2;~Jay_Nandy1;~Manohar_Kaul1;~Prathosh_AP1", "gender": "M;M;M;M", "homepage": ";;https://manukaul.github.io/;https://sites.google.com/view/prathosh", "dblp": ";193/4096;29/10735;218/5887", "google_scholar": "MZ8N49AAAAAJ;https://scholar.google.co.in/citations?user=8N_wxz8AAAAJ;https://scholar.google.com.tw/citations?user=jNroyK4AAAAJ;https://scholar.google.co.in/citations?user=OEwV4bsAAAAJ", "orcid": "0000-0001-7297-374X;;;", "linkedin": "arnab-mondal-a4448a18/;jay-nandy-36654b34/;manu-k-72b936287/;prathosh-ap-phd-50ab9511/", "or_profile": "~Arnab_Kumar_Mondal2;~Jay_Nandy1;~Manohar_Kaul1;~Prathosh_AP1", "aff": "Fujitsu Research and Development Center Co. Ltm.;Fujitsu Research and Development Center Co. Ltm.;Fujitsu Research and Development Center Co. Ltm.;Indian Institute of Science, Indian institute of science, Bangalore", "aff_domain": "fujitsu.com;fujitsu.com;fujitsu.com;iisc.ac.in", "position": "Researcher;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@misc{\nmondal2024a,\ntitle={A Teacher-Guided Framework for Graph Representation Learning},\nauthor={Arnab Kumar Mondal and Jay Nandy and Manohar Kaul and Prathosh AP},\nyear={2024},\nurl={https://openreview.net/forum?id=6LyO8WTVTU}\n}", "github": "", "project": "", "reviewers": "RhAx;xgHF;2WDw", "site": "https://openreview.net/forum?id=6LyO8WTVTU", "pdf_size": 1392351, "rating": "3;3;5", "confidence": "4;4;3", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "52;53;101", "wc_strengths": "54;24;30", "wc_weaknesses": "185;67;322", "wc_questions": "3;45;8", "wc_review": "294;189;461", "wc_reply_reviewers": "0;0;1081", "wc_reply_authors": "581;360;1657", "reply_reviewers": "0;0;3", "reply_authors": "1;1;4", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.66666666666667, 22.866763848189994 ], "wc_strengths_avg": [ 36.0, 12.96148139681572 ], "wc_weaknesses_avg": [ 191.33333333333334, 104.19959479554397 ], "wc_questions_avg": [ 18.666666666666668, 18.732028424302822 ], "wc_review_avg": [ 314.6666666666667, 112.0009920590984 ], "wc_reply_reviewers_avg": [ 360.3333333333333, 509.5882869751053 ], "wc_reply_authors_avg": [ 866.0, 566.5515569360539 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V5BOlabZ7fsJ:scholar.google.com/&scioq=A+Teacher-Guided+Framework+for+Graph+Representation+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Fujitsu Research and Development Center;Indian Institute of Science", "aff_unique_dep": "Research and Development;", "aff_unique_url": "https://www.fujitsu.com/global/;https://www.iisc.ac.in", "aff_unique_abbr": "Fujitsu R&D;IISc", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bangalore", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Japan;India" }, { "id": "6M5G5hNiAU", "title": "How Abilities in Large Language Models are Affected by Supervised Fine-tuning Data Composition", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large language models (LLMs) with enormous pre-training tokens and parameter amounts emerge abilities including math reasoning, code generation, and instruction following. These abilities are further enhanced by supervised fine-tuning (SFT). The open-source community has studied on ad-hoc SFT for each ability, while proprietary LLMs are versatile for all abilities. It is important to investigate how to unlock them with multiple abilities via SFT. In this study, we specifically focus on the data composition between mathematical reasoning, code generation, and general human-aligning abilities during SFT. From a scaling perspective, we investigate the relationship between model abilities and various factors including data amounts, data composition ratio, model parameters, and SFT strategies. Our experiments reveal that different abilities exhibit different scaling patterns, and larger models generally show superior performance with the same amount of data. Mathematical reasoning and code generation improve as data amounts increase consistently, while the general ability is enhanced with about a thousand samples and improves slowly. We find data composition results in various abilities improvements with low data amounts, while conflicts of abilities with high data amounts. Our experiments further show that composition data amount impacts performance, while the influence of composition ratio is insignificant. Regarding the SFT strategies, we evaluate sequential learning multiple abilities are prone to catastrophic forgetting. Our proposed Dual-stage Mixed Fine-tuning (DMT) strategy learns specialized abilities first and then learns general abilities with a small amount of specialized data to prevent forgetting, offering a promising solution to learn multiple abilities with different scaling patterns.", "keywords": "Data Composition;Large Language Model;Scaling Analysis;Supervised Fine-tuning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Guanting Dong;Hongyi Yuan;Keming Lu;Chengpeng Li;Mingfeng Xue;Dayiheng Liu;Wei Wang;Zheng Yuan;Chang Zhou;Jingren Zhou", "authorids": "~Guanting_Dong1;~Hongyi_Yuan1;~Keming_Lu1;~Chengpeng_Li1;~Mingfeng_Xue1;~Dayiheng_Liu1;~Wei_Wang41;~Zheng_Yuan2;~Chang_Zhou2;~Jingren_Zhou1", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": "https://dongguanting.github.io/;;;;;https://dayihengliu.github.io/;;;;", "dblp": ";308/0909;65/6898.html;;;https://dblp.uni-trier.de/pers/hd/l/Liu:Dayiheng;35/7092-225;56/2877-2;;84/2644", "google_scholar": "amozZDkAAAAJ;FG3O4i8AAAAJ;WuD2op4AAAAJ;5pidFF8AAAAJ;;pPLQrX4AAAAJ;0zSeT3oAAAAJ;https://scholar.google.com/citations?hl=zh-CN;QeSoG3sAAAAJ;", "orcid": ";;;;;0000-0002-8755-8941;0000-0002-7028-9845;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Guanting_Dong1;~Hongyi_Yuan1;~Keming_Lu1;~Chengpeng_Li1;~Mingfeng_Xue1;~Dayiheng_Liu1;~Wei_Wang41;~Zheng_Yuan2;~Chang_Zhou2;~Jingren_Zhou1", "aff": "Beijing University of Posts and Telecommunications;Tsinghua University;Alibaba Group;University of Science and Technology of China;Sichuan University;Alibaba Group;Meituan;Alibaba Group;Alibaba Group;Alibaba Group", "aff_domain": "bupt.edu.cn;tsinghua.edu.cn;alibaba-inc.com;ustc.edu.cn;scu.edu.cn;alibaba-inc.com;meituan.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "MS student;PhD student;Researcher;PhD student;PhD student;Researcher;Principal Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\ndong2024how,\ntitle={How Abilities in Large Language Models are Affected by Supervised Fine-tuning Data Composition},\nauthor={Guanting Dong and Hongyi Yuan and Keming Lu and Chengpeng Li and Mingfeng Xue and Dayiheng Liu and Wei Wang and Zheng Yuan and Chang Zhou and Jingren Zhou},\nyear={2024},\nurl={https://openreview.net/forum?id=6M5G5hNiAU}\n}", "github": "", "project": "", "reviewers": "4phB;crg6;C1QY", "site": "https://openreview.net/forum?id=6M5G5hNiAU", "pdf_size": 713131, "rating": "3;5;6", "confidence": "4;3;4", "soundness": "2;2;3", "contribution": "1;2;3", "presentation": "2;3;3", "wc_summary": "46;56;80", "wc_strengths": "28;81;99", "wc_weaknesses": "133;260;113", "wc_questions": "103;94;353", "wc_review": "310;491;645", "wc_reply_reviewers": "0;62;17", "wc_reply_authors": "262;1526;2012", "reply_reviewers": "0;1;1", "reply_authors": "1;4;6", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 60.666666666666664, 14.2672897060218 ], "wc_strengths_avg": [ 69.33333333333333, 30.13672547278855 ], "wc_weaknesses_avg": [ 168.66666666666666, 65.09650954971055 ], "wc_questions_avg": [ 183.33333333333334, 120.02870027159707 ], "wc_review_avg": [ 482.0, 136.91116341141313 ], "wc_reply_reviewers_avg": [ 26.333333333333332, 26.157641755751268 ], "wc_reply_authors_avg": [ 1266.6666666666667, 737.5930826380868 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 2.0548046676563256 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 119, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12013057803072540290&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;3;4;2;5;2;2;2", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Tsinghua University;Alibaba Group;University of Science and Technology of China;Sichuan University;Meituan", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.tsinghua.edu.cn;https://www.alibaba.com;http://www.ustc.edu.cn;https://www.scu.edu.cn;https://www.meituan.com", "aff_unique_abbr": "BUPT;THU;Alibaba;USTC;SCU;Meituan", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SaProt: Protein Language Modeling with Structure-aware Vocabulary", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19394", "id": "6MRm3G4NiU", "author_site": "Jin Su, Chenchen Han, Yuyang Zhou, Junjie Shan, Xibin Zhou, Fajie Yuan", "tldr": "", "abstract": "Large-scale protein language models (PLMs), such as the ESM family, have achieved remarkable performance in various downstream tasks related to protein structure and function by undergoing unsupervised training on residue sequences. They have become essential tools for researchers and practitioners in biology. However, a limitation of vanilla PLMs is their lack of explicit consideration for protein structure information, which suggests the potential for further improvement. Motivated by this, we introduce the concept of a ``structure-aware vocabulary\" that integrates residue tokens with structure tokens. The structure tokens are derived by encoding the 3D structure of proteins using Foldseek. We then propose SaProt, a large-scale general-purpose PLM trained on an extensive dataset comprising approximately 40 million protein sequences and structures. Through extensive evaluation, our SaProt model surpasses well-established and renowned baselines across 10 significant downstream tasks, demonstrating its exceptional capacity and broad applicability. We have made the code, pre-trained model, and all relevant materials available at https://github.com/westlake-repl/SaProt.", "keywords": "Protein Language Models;Universal Representations;Downstream Tasks;Protein Structure Modeling", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Jin Su;Chenchen Han;Yuyang Zhou;Junjie Shan;Xibin Zhou;Fajie Yuan", "authorids": "~Jin_Su1;~Chenchen_Han1;~Yuyang_Zhou1;~Junjie_Shan1;~Xibin_Zhou1;~Fajie_Yuan2", "gender": "M;;Not Specified;M;M;M", "homepage": "https://ltenjoy.github.io/;;;https://scholar.google.com/citations?user=LhaFXOEAAAAJ&hl=en;https://fajieyuan.github.io/;https://github.com/zhoubay", "dblp": ";;;;175/4819;", "google_scholar": "47BkgB8AAAAJ;;;LhaFXOEAAAAJ;yOiBwasAAAAJ;", "orcid": ";0000-0002-3330-5308;;;;", "linkedin": ";;yuyang-zhou-150301222/;;;", "or_profile": "~Jin_Su1;~Chenchen_Han1;~Yuyang_Zhou1;~Junjie_Shan1;~Fajie_Yuan2;~Xibin_Bayes_Zhou1", "aff": "Westlake University;Westlake University;Westlake University;Westlake University;Westlake University;Westlake University", "aff_domain": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu;westlake.edu.cn;westlake.edu", "position": "PhD student;PhD student;PhD student;Researcher;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsu2024saprot,\ntitle={SaProt: Protein Language Modeling with Structure-aware Vocabulary},\nauthor={Jin Su and Chenchen Han and Yuyang Zhou and Junjie Shan and Xibin Zhou and Fajie Yuan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6MRm3G4NiU}\n}", "github": "", "project": "", "reviewers": "1519;LEd3;WitX", "pdf_size": 4933295, "rating": "6;8;8", "confidence": "3;5;5", "soundness": "3;4;3", "contribution": "3;3;3", "presentation": "2;4;3", "wc_summary": "93;81;80", "wc_strengths": "81;72;80", "wc_weaknesses": "236;74;45", "wc_questions": "92;70;68", "wc_review": "502;297;273", "wc_reply_reviewers": "118;12;32", "wc_reply_authors": "2053;513;672", "reply_reviewers": "1;1;1", "reply_authors": "3;1;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 84.66666666666667, 5.90668171555645 ], "wc_strengths_avg": [ 77.66666666666667, 4.0276819911981905 ], "wc_weaknesses_avg": [ 118.33333333333333, 84.040995287353 ], "wc_questions_avg": [ 76.66666666666667, 10.873004286866728 ], "wc_review_avg": [ 357.3333333333333, 102.76294187216625 ], "wc_reply_reviewers_avg": [ 54.0, 45.985504962614755 ], "wc_reply_authors_avg": [ 1079.3333333333333, 691.5395063833221 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 117, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18080281709349236734&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6MRm3G4NiU", "pdf": "https://openreview.net/pdf?id=6MRm3G4NiU", "email": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu;westlake.edu.cn;westlake.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Westlake University", "aff_unique_dep": "", "aff_unique_url": "https://www.westlake.edu.cn", "aff_unique_abbr": "WU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Graphical Multioutput Gaussian Process with Attention", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19393", "id": "6N8TW504aa", "author_site": "Yijue Dai, Wenzhong Yan, Feng Yin", "tldr": "", "abstract": "Integrating information while recognizing dependence from multiple data sources and enhancing the predictive performance of the multi-output regression are challenging tasks. Multioutput Gaussian Process (MOGP) methods offer outstanding solutions with tractable predictions and uncertainty quantification. However, their practical applications are hindered by high computational complexity and storage demand. Additionally, there exist model mismatches in existing MOGP models when dealing with non-Gaussian data. To improve the model representation ability in terms of flexibility, optimality, and scalability, this paper introduces a novel multi-output regression framework, termed Graphical MOGP (GMOGP), which is empowered by: (i) Generating flexible Gaussian process priors consolidated from dentified parents, (ii) providing dependent processes with attention-based graphical representations, and (iii) achieving Pareto optimal solutions of kernel hyperparameters via a distributed learning framework. Numerical results confirm that the proposed GMOGP significantly outperforms state-of-the-art MOGP alternatives in predictive performance, as well as in time and memory efficiency, across various synthetic and real datasets.", "keywords": "Gaussian process regression;Multioutput Gaussian process;Attention mechanism", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Yijue Dai;Wenzhong Yan;Feng Yin", "authorids": "~Yijue_Dai1;~Wenzhong_Yan1;~Feng_Yin1", "gender": "M;M;", "homepage": "https://blsp-group.github.io/people.html;https://sse.cuhk.edu.cn/en/faculty/yinfeng;", "dblp": ";59/6917;", "google_scholar": ";4mW1N5oAAAAJ;upD7ilcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Wenzhong_Yan1;~Feng_Yin1;~Dianna_Dai1", "aff": "School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen;;CUHK(SZ)", "aff_domain": "cuhk.edu;;cuhk.edu.cn", "position": "PhD student;;PhD student", "bibtex": "@inproceedings{\ndai2024graphical,\ntitle={Graphical Multioutput Gaussian Process with Attention},\nauthor={Yijue Dai and Wenzhong Yan and Feng Yin},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6N8TW504aa}\n}", "github": "", "project": "", "reviewers": "X7C1;pYyL;sgU9;9P5Q", "pdf_size": 1267166, "rating": "6;8;8;8", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "2;4;4;4", "wc_summary": "77;87;94;62", "wc_strengths": "26;98;203;132", "wc_weaknesses": "510;475;89;62", "wc_questions": "9;8;606;423", "wc_review": "622;668;992;679", "wc_reply_reviewers": "62;44;108;33", "wc_reply_authors": "1690;1039;1296;692", "reply_reviewers": "1;1;2;1", "reply_authors": "4;2;3;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 80.0, 12.020815280171307 ], "wc_strengths_avg": [ 114.75, 63.72352391385775 ], "wc_weaknesses_avg": [ 284.0, 209.08491098116096 ], "wc_questions_avg": [ 261.5, 261.1422026406303 ], "wc_review_avg": [ 740.25, 146.91217614615883 ], "wc_reply_reviewers_avg": [ 61.75, 28.63891583143468 ], "wc_reply_authors_avg": [ 1179.25, 364.5472363082732 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_GNysYaL62kJ:scholar.google.com/&scioq=Graphical+Multioutput+Gaussian+Process+with+Attention&hl=en&as_sdt=0,44", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=6N8TW504aa", "pdf": "https://openreview.net/pdf?id=6N8TW504aa", "email": "cuhk.edu;;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Chinese University of Hong Kong;Chinese University of Hong Kong, Shenzhen", "aff_unique_dep": "School of Science and Engineering;", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.cuhk.edu.cn/sz", "aff_unique_abbr": "CUHK;CUHK(SZ)", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "6NEJ0ReNzr", "title": "Learning to Plan and Generate Text with Citations", "track": "main", "status": "Reject", "tldr": "", "abstract": "The increasing demand for the deployment of LLMs in\ninformation-seeking scenarios has spurred efforts in creating\nverifiable systems, which generate responses to queries along with\nsupporting evidence. In this paper, we explore the attribution\ncapabilities of plan-based models which have been recently shown to\nimprove the faithfulness, grounding, and controllability of generated\ntext. We conceptualize plans as a sequence of questions which serve as\nblueprints of the generated content and its organisation. We\nexperiment with two models that utilize different variants of\nblueprints, an abstractive model where questions are\ngenerated from scratch, and an extractive\nmodel where the decoder is forced to copy questions from the\ninput. Experiments on long-form question-answering show\nthat output quality improves for blueprint models when these learn\nto generate responses with attribution. Moreover, the citations generated by blueprint models are more accurate compared to those obtained from \nLLM-based pipelines lacking a planning component.", "keywords": "summarization;text generation;content planning;attribution", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Constanza Fierro;Reinald Kim Amplayo;Fantine Huot;Nicola De Cao;Joshua Maynez;Shashi Narayan;Mirella Lapata", "authorids": "~Constanza_Fierro1;~Reinald_Kim_Amplayo2;~Fantine_Huot1;~Nicola_De_Cao1;~Joshua_Maynez1;~Shashi_Narayan1;~Mirella_Lapata1", "gender": "M;F;M;M;M;F;F", "homepage": "https://rktamplayo.github.io;;https://nicola-decao.github.io;;https://sites.google.com/corp/view/shashinarayan/;https://homepages.inf.ed.ac.uk/mlap/;", "dblp": ";;218/6626;220/3863;74/8458;59/6701;205/9159", "google_scholar": ";79VvQLMAAAAJ;CqTR3sIAAAAJ;ZOYd-0oAAAAJ;prEcE9IAAAAJ;j67B9Q4AAAAJ;uYCIJSEAAAAJ", "orcid": ";;;;;;", "linkedin": ";fantine/;nicoladecao;;;;", "or_profile": "~Reinald_Kim_Amplayo2;~Fantine_Huot1;~Nicola_De_Cao1;~Joshua_Maynez1;~Shashi_Narayan1;~Mirella_Lapata1;~FIERRO_Constanza1", "aff": "Google;Google;Google;Google;Google;Edinburgh University, University of Edinburgh;Copenhagen University", "aff_domain": "google.com;google.com;google.com;google.com;google.com;inf.ed.ac.uk;ku.dk", "position": "Researcher;Researcher;Researcher;Researcher;Research Scientist;Full Professor;PhD student", "bibtex": "@misc{\nfierro2024learning,\ntitle={Learning to Plan and Generate Text with Citations},\nauthor={Constanza Fierro and Reinald Kim Amplayo and Fantine Huot and Nicola De Cao and Joshua Maynez and Shashi Narayan and Mirella Lapata},\nyear={2024},\nurl={https://openreview.net/forum?id=6NEJ0ReNzr}\n}", "github": "", "project": "", "reviewers": "8rJ6;eZwk;vp7e;LX3E", "site": "https://openreview.net/forum?id=6NEJ0ReNzr", "pdf_size": 985007, "rating": "3;6;6;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "contribution": "2;3;3;4", "presentation": "2;3;2;4", "wc_summary": "63;74;133;133", "wc_strengths": "50;25;19;55", "wc_weaknesses": "369;145;63;16", "wc_questions": "220;2;10;75", "wc_review": "702;246;225;279", "wc_reply_reviewers": "279;0;0;8", "wc_reply_authors": "2152;476;401;123", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 100.75, 32.48364973336586 ], "wc_strengths_avg": [ 37.25, 15.497983739828868 ], "wc_weaknesses_avg": [ 148.25, 135.55326443874378 ], "wc_questions_avg": [ 76.75, 87.41674610736779 ], "wc_review_avg": [ 363.0, 196.6659604507094 ], "wc_reply_reviewers_avg": [ 71.75, 119.70040726747759 ], "wc_reply_authors_avg": [ 788.0, 798.4099824025249 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14554465176635499021&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0;1;2", "aff_unique_norm": "Google;University of Edinburgh;University of Copenhagen", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.ed.ac.uk;https://www.ku.dk", "aff_unique_abbr": "Google;Edinburgh;UCPH", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1;2", "aff_country_unique": "United States;United Kingdom;Denmark" }, { "id": "6NO5UVWvo6", "title": "Annotation by Clicks: A Point-Supervised Contrastive Variance Method for Medical Semantic Segmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Medical image segmentation methods typically rely on numerous dense annotated images for model training, which are notoriously expensive and time-consuming to collect. To alleviate this burden, weakly supervised techniques have been exploited to train segmentation models with less expensive annotations. In this paper, we propose a novel point-supervised contrastive variance method (PSCV) for medical image semantic segmentation, which only requires one pixel-point from each organ category to be annotated. The proposed method trains the base segmentation network by using a novel contrastive variance (CV) loss to exploit the unlabeled pixels and a partial cross-entropy loss on the labeled pixels. The CV loss function is designed to exploit the statistical spatial distribution properties of organs in medical images and their variance distribution map representations to enforce discriminative predictions over the unlabeled pixels. Experimental results on two standard medical image datasets demonstrate that the proposed method outperforms the state-of-the-art weakly supervised methods on point-supervised medical image semantic segmentation tasks.", "keywords": "medical image segmentation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/2cec34e201fefce2f43fe52a658cc51eb6dfb67b.pdf", "author": "Qing En;Yuhong Guo", "authorids": "~Qing_En2;~Yuhong_Guo1", "gender": "M;", "homepage": ";", "dblp": "189/4347;", "google_scholar": "4tNrFv8AAAAJ;", "orcid": "0000-0003-0173-7437;", "linkedin": "qingen;", "or_profile": "~Qing_En2;~Yuhong_Guo1", "aff": "Carleton University;", "aff_domain": "cunet.carleton.ca;", "position": "Postdoc;", "bibtex": "@misc{\nen2024annotation,\ntitle={Annotation by Clicks: A Point-Supervised Contrastive Variance Method for Medical Semantic Segmentation},\nauthor={Qing En and Yuhong Guo},\nyear={2024},\nurl={https://openreview.net/forum?id=6NO5UVWvo6}\n}", "github": "", "project": "", "reviewers": "WEjx;jaNJ;2T67;JEHB", "site": "https://openreview.net/forum?id=6NO5UVWvo6", "pdf_size": 2172041, "rating": "3;5;5;5", "confidence": "5;4;4;4", "soundness": "2;3;3;2", "contribution": "2;2;3;2", "presentation": "3;3;3;2", "wc_summary": "58;54;72;142", "wc_strengths": "31;73;61;58", "wc_weaknesses": "150;358;111;171", "wc_questions": "79;14;156;22", "wc_review": "318;499;400;393", "wc_reply_reviewers": "0;0;0;23", "wc_reply_authors": "537;523;431;504", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 35.563323804166565 ], "wc_strengths_avg": [ 55.75, 15.35211711784404 ], "wc_weaknesses_avg": [ 197.5, 95.13280191395604 ], "wc_questions_avg": [ 67.75, 56.78192934376217 ], "wc_review_avg": [ 402.5, 64.32145831680124 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 498.75, 40.831207427652686 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3282580224938276696&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "Carleton University", "aff_unique_dep": "", "aff_unique_url": "https://carleton.ca", "aff_unique_abbr": "Carleton", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "6NaiZHL3l1", "title": "A Novel Evaluation Framework for Image Inpainting via Multi-Pass Self-Consistency", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Image inpainting aims to restore missing regions of corrupted images by utilizing the available unmasked content while ensuring consistency and fidelity. In scenarios where limited information is available, determining a unique optimal solution for a given inpainting case becomes challenging. However, existing assessment approaches predominantly rely on the availability of corresponding unmasked images, which introduces potential biases toward specific inpainting solutions. To address this disparity, we propose a novel evaluation framework that leverages the power of aggregated multi-pass image inpainting. Our self-supervised metric offers exceptional performance in scenarios with or without unmasked images. Rather than solely relying on similarity to the original images in terms of pixel space or feature space, our method prioritizes intrinsic self-consistency. This allows us to explore diverse and viable inpainting solutions while mitigating biases. Through extensive experimentation on multiple baselines, we demonstrate the strong alignment of our method with human perception, which is further supported by a comprehensive user study.", "keywords": "Image Inpainting", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/3b7090fa99ac74876d1ae24b46dcb3c750d7b316.zip", "author": "Tianyi Chen;Jianfu Zhang;Yan Hong;Liqing Zhang", "authorids": "~Tianyi_Chen7;~Jianfu_Zhang2;~Yan_Hong1;~Liqing_Zhang2", "gender": "M;M;F;M", "homepage": "https://github.com/control-cyber;https://matt-sjtu.github.io/;https://github.com/hy-zpg;http://bcmi.sjtu.edu.cn/~zhangliqing/", "dblp": "93/4437-1;78/3993-3;68/974-2.html;20/4627-1.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=ztq5-xcAAAAJ;1smFmxAAAAAJ", "orcid": "0009-0000-1203-6746;0000-0002-2673-5860;0000-0001-6401-0812;", "linkedin": ";;;", "or_profile": "~Tianyi_Chen7;~Jianfu_Zhang2;~Yan_Hong1;~Liqing_Zhang2", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Alibaba Group;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;antgroup.com;sjtu.edu.cn", "position": "Undergrad student;Assistant Professor;Researcher;Full Professor", "bibtex": "@misc{\nchen2024a,\ntitle={A Novel Evaluation Framework for Image Inpainting via Multi-Pass Self-Consistency},\nauthor={Tianyi Chen and Jianfu Zhang and Yan Hong and Liqing Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=6NaiZHL3l1}\n}", "github": "", "project": "", "reviewers": "R8hK;ZP4Y;rSjt;yYxo", "site": "https://openreview.net/forum?id=6NaiZHL3l1", "pdf_size": 23052601, "rating": "1;3;3;5", "confidence": "4;5;4;3", "soundness": "1;1;2;2", "contribution": "1;1;2;3", "presentation": "2;2;3;2", "wc_summary": "48;56;87;51", "wc_strengths": "26;36;25;15", "wc_weaknesses": "162;168;224;35", "wc_questions": "31;6;23;404", "wc_review": "267;266;359;505", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 1.5, 0.5 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 60.5, 15.56438241627338 ], "wc_strengths_avg": [ 25.5, 7.433034373659253 ], "wc_weaknesses_avg": [ 147.25, 69.17143557856812 ], "wc_questions_avg": [ 116.0, 166.52177034850428 ], "wc_review_avg": [ 349.25, 97.5304439649487 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:unpA5IYKZE4J:scholar.google.com/&scioq=A+Novel+Evaluation+Framework+for+Image+Inpainting+via+Multi-Pass+Self-Consistency&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SJTU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "NoiseDiffusion: Correcting Noise for Image Interpolation with Diffusion Models beyond Spherical Linear Interpolation", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19392", "id": "6O3Q6AFUTu", "author_site": "Pengfei Zheng, Yonggang Zhang, Zhen Fang, Tongliang Liu, Defu Lian, Bo Han", "tldr": "", "abstract": "Image interpolation based on diffusion models is promising in creating fresh and interesting images. \nAdvanced interpolation methods mainly focus on spherical linear interpolation, where images are encoded into the noise space and then interpolated for denoising to images. \nHowever, existing methods face challenges in effectively interpolating natural images (not generated by diffusion models), thereby restricting their practical applicability. \nOur experimental investigations reveal that these challenges stem from the invalidity of the encoding noise, which may no longer obey the expected noise distribution, e.g., a normal distribution. \nTo address these challenges, we propose a novel approach to correct noise for image interpolation, NoiseDiffusion. Specifically, NoiseDiffusion approaches the invalid noise to the expected distribution by introducing subtle Gaussian noise and introduces a constraint to suppress noise with extreme values. In this context, promoting noise validity contributes to mitigating image artifacts, but the constraint and introduced exogenous noise typically lead to a reduction in signal-to-noise ratio, i.e., loss of original image information. Hence, NoiseDiffusion performs interpolation within the noisy image space and injects raw images into these noisy counterparts to address the challenge of information loss. Consequently, NoiseDiffusion enables us to interpolate natural images without causing artifacts or information loss, thus achieving the best interpolation results.", "keywords": "Image Interpolation; Diffusion Models", "primary_area": "generative models", "supplementary_material": "", "author": "PengFei Zheng;Yonggang Zhang;Zhen Fang;Tongliang Liu;Defu Lian;Bo Han", "authorids": "~PengFei_Zheng2;~Yonggang_Zhang1;~Zhen_Fang2;~Tongliang_Liu1;~Defu_Lian1;~Bo_Han1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/zheng-peng-fei;https://yonggangzhangben.github.io/index.html;https://fang-zhen.github.io/index.html;https://tongliang-liu.github.io/;https://faculty.ustc.edu.cn/liandefu/en/index.htm;https://bhanml.github.io/", "dblp": ";27/6859-3;;150/6667;87/10734;241/0472-3", "google_scholar": ";XSbEr98AAAAJ;OzD6WJcAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;QW0ad4sAAAAJ;nTNjqHwAAAAJ", "orcid": ";0000-0002-4080-7592;0000-0003-0602-6255;;0000-0002-3507-9607;", "linkedin": ";;;;;", "or_profile": "~PengFei_Zheng2;~Yonggang_Zhang1;~Zhen_Fang2;~Tongliang_Liu1;~Defu_Lian1;~bo_han2", "aff": "University of Science and Technology of China;Hong Kong Baptist University;University of Technology Sydney;Mohamed bin Zayed University of Artificial Intelligence;University of Science and Technology of China;MBZUAI", "aff_domain": "ustc.edu.cn;hkbu.edu.hk;uts.edu.au;mbzuai.ac.ae;ustc.edu.cn;mbzuai.ac.ae", "position": "MS student;Postdoc;Assistant Professor;Affiliated Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nzheng2024noisediffusion,\ntitle={NoiseDiffusion: Correcting Noise for Image Interpolation with Diffusion Models beyond Spherical Linear Interpolation},\nauthor={PengFei Zheng and Yonggang Zhang and Zhen Fang and Tongliang Liu and Defu Lian and Bo Han},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6O3Q6AFUTu}\n}", "github": "", "project": "", "reviewers": "A19z;YU98;h8PS", "pdf_size": 51159343, "rating": "8;8;8", "confidence": "4;3;3", "soundness": "4;3;3", "contribution": "4;3;3", "presentation": "4;3;2", "wc_summary": "133;148;86", "wc_strengths": "243;63;82", "wc_weaknesses": "224;170;75", "wc_questions": "60;114;13", "wc_review": "660;495;256", "wc_reply_reviewers": "14;64;35", "wc_reply_authors": "809;1456;1110", "reply_reviewers": "1;1;1", "reply_authors": "2;4;3", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 122.33333333333333, 26.411277052720408 ], "wc_strengths_avg": [ 129.33333333333334, 80.74789298936675 ], "wc_weaknesses_avg": [ 156.33333333333334, 61.5918465022405 ], "wc_questions_avg": [ 62.333333333333336, 41.26607430915726 ], "wc_review_avg": [ 470.3333333333333, 165.85200899865183 ], "wc_reply_reviewers_avg": [ 37.666666666666664, 20.499322482029065 ], "wc_reply_authors_avg": [ 1125.0, 264.3495161082514 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11457188536986191953&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6O3Q6AFUTu", "pdf": "https://openreview.net/pdf?id=6O3Q6AFUTu", "email": "ustc.edu.cn;hkbu.edu.hk;uts.edu.au;mbzuai.ac.ae;ustc.edu.cn;mbzuai.ac.ae", "author_num": 6, "aff_unique_index": "0;1;2;3;0;3", "aff_unique_norm": "University of Science and Technology of China;Hong Kong Baptist University;University of Technology Sydney;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.hkbu.edu.hk;https://www.uts.edu.au;https://mbzuai.ac.ae", "aff_unique_abbr": "USTC;HKBU;UTS;MBZUAI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;2;0;2", "aff_country_unique": "China;Australia;United Arab Emirates" }, { "id": "6PVgHZUepm", "title": "Rep-Adapter: Parameter-free Automatic Adaptation of Pre-trained ConvNets via Re-parameterization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advances in visual pre-training have demonstrated the advantage of transferring pre-trained models to target tasks. However, different transfer learning protocols have distinctive advantages regarding target tasks, and are nontrivial to choose without repeated trial and error. This paper presents a parameter-free automatic model adaptation protocol for ConvNets, aiming at automatically balancing between fine-tuning and linear probing, by using adaptive learning rate for each convolution filters on target tasks. First, we propose Rep-Adapter, an adapter module with re-parameterization scheme, which can achieve soft balancing between the pre-trained and fine-tuned filters, and can be equivalently converted to a single weight layer, without introducing additional parameters to the inference phase. We show by theoretical analysis that Rep-Adapter can simulate a ConvNet layer with each filter fine-tuning at different learning rate. We present a simple adapter tuning protocol with Rep-Adapter to achieve automatic adaptation of pretrained models without additional search cost. Extensive experiments on various datasets with ResNet and CLIP demonstrate the superiority of our Rep-Adapter on semi-supervised, few-shot and full dataset transfer learning scenarios.", "keywords": "Parameter-free Automatic Adaptation", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Changlin Li;Jiqi Zhang;Hongwei Xie;Kaicheng Yu;Bing Wang;Xiaodan Liang;Xiaojun Chang", "authorids": "~Changlin_Li2;~Jiqi_Zhang1;~Hongwei_Xie1;~Kaicheng_Yu1;~Bing_Wang14;~Xiaodan_Liang2;~Xiaojun_Chang4", "gender": "M;;M;M;F;M;M", "homepage": ";;;https://www.yukaicheng.cn;https://www.sysu-hcp.net/;https://scholar.google.com.sg/citations?hl=en&user=uwTzb6IAAAAJ&view_op=list_works;https://www.xiaojun.ai", "dblp": ";;37/1678;;;;116/8412", "google_scholar": "https://scholar.google.com/citations?hl=en;;kRvS9KAAAAAJ;j9OguiIAAAAJ;voxznZAAAAAJ;https://scholar.google.com.sg/citations?hl=en;https://scholar.google.co.uk/citations?user=8suupocAAAAJ", "orcid": ";;;;;;", "linkedin": ";https://cn.linkedin.com/in/%E5%90%89%E7%A5%BA-%E5%BC%A0-00389112a;;;;;", "or_profile": "~Changlin_Li2;~Jiqi_Zhang1;~Hongwei_Xie1;~Kaicheng_Yu1;~Xiaodan_Liang2;~bing_wang13;~Xiaojun_Chang1", "aff": "University of Technology Sydney;;Xiaomi Corporation;Westlake University;SUN YAT-SEN UNIVERSITY;Xiaomi Corporation;University of Technology Sydney", "aff_domain": "uts.edu.au;;xiaomi.com;westlake.edu;sysu.edu.cn;xiaomi.com;uts.edu.au", "position": "Postdoc;;Researcher;Assistant Professor;Associate Professor;Researcher;Full Professor", "bibtex": "@misc{\nli2024repadapter,\ntitle={Rep-Adapter: Parameter-free Automatic Adaptation of Pre-trained ConvNets via Re-parameterization},\nauthor={Changlin Li and Jiqi Zhang and Hongwei Xie and Kaicheng Yu and Bing Wang and Xiaodan Liang and Xiaojun Chang},\nyear={2024},\nurl={https://openreview.net/forum?id=6PVgHZUepm}\n}", "github": "", "project": "", "reviewers": "xJSa;ZcYB;XwS8", "site": "https://openreview.net/forum?id=6PVgHZUepm", "pdf_size": 664882, "rating": "3;6;8", "confidence": "5;4;5", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "2;3;3", "wc_summary": "39;108;64", "wc_strengths": "18;40;43", "wc_weaknesses": "132;66;268", "wc_questions": "70;10;72", "wc_review": "259;224;447", "wc_reply_reviewers": "83;0;0", "wc_reply_authors": "1441;481;384", "reply_reviewers": "1;0;0", "reply_authors": "3;1;1", "rating_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 70.33333333333333, 28.522895287041873 ], "wc_strengths_avg": [ 33.666666666666664, 11.14550233153366 ], "wc_weaknesses_avg": [ 155.33333333333334, 84.10046901705654 ], "wc_questions_avg": [ 50.666666666666664, 28.767265347188555 ], "wc_review_avg": [ 310.0, 97.92173745735248 ], "wc_reply_reviewers_avg": [ 27.666666666666668, 39.12657522565563 ], "wc_reply_authors_avg": [ 768.6666666666666, 477.05788141715277 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.11470786693528084, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9NPzcIq4CWAJ:scholar.google.com/&scioq=Rep-Adapter:+Parameter-free+Automatic+Adaptation+of+Pre-trained+ConvNets+via+Re-parameterization&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1;0", "aff_unique_norm": "University of Technology Sydney;Xiaomi Corporation;Westlake University;Sun Yat-sen University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uts.edu.au;https://www.xiaomi.com;https://www.westlake.edu.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "UTS;Xiaomi;WU;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;0", "aff_country_unique": "Australia;China" }, { "title": "Dynamic Discounted Counterfactual Regret Minimization", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19391", "id": "6PbvbLyqT6", "author_site": "Hang Xu, Kai Li, Haobo Fu, QIANG FU, Junliang Xing, Jian Cheng", "tldr": "", "abstract": "Counterfactual regret minimization (CFR) is a family of iterative algorithms showing promising results in solving imperfect-information games. Recent novel CFR variants (e.g., CFR+, DCFR) have significantly improved the convergence rate of the vanilla CFR. The key to these CFR variants\u2019 performance is weighting each iteration non-uniformly, i.e., discounting earlier iterations. However, these algorithms use a fixed, manually-specified scheme to weight each iteration, which enormously limits their potential. In this work, we propose Dynamic Discounted CFR (DDCFR), the first equilibrium-finding framework that discounts prior iterations using a dynamic, automatically-learned scheme. We formalize CFR\u2019s iteration process as a carefully designed Markov decision process and transform the discounting scheme learning problem into a policy optimization problem within it. The learned discounting scheme dynamically weights each iteration on the fly using information available at runtime. Experimental results across multiple games demonstrate that DDCFR\u2019s dynamic discounting scheme has a strong generalization ability and leads to faster convergence with improved performance. The code is available at https://github.com/rpSebastian/DDCFR.", "keywords": "imperfect-information games;regret minimization;Nash equilibrium", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/20881043000a5b9be13557160cb756ac7746d388.zip", "author": "Hang Xu;Kai Li;Haobo Fu;QIANG FU;Junliang Xing;Jian Cheng", "authorids": "~Hang_Xu5;~Kai_Li2;~Haobo_Fu2;~QIANG_FU8;~Junliang_Xing1;~Jian_Cheng7", "gender": "M;M;M;M;M;M", "homepage": ";;;http://people.ucas.ac.cn/~jlxing?language=en;https://people.ucas.ac.cn/~chengjian?language=en;https://github.com/rpSebastian", "dblp": "181/2853;85/8571;;43/7659.html;14/6145-1;", "google_scholar": "_cY_PXgAAAAJ;LFdJXNcAAAAJ;gANaxT0AAAAJ;jSwNd3MAAAAJ;ZGCIUJ8AAAAJ;", "orcid": ";;;0000-0001-6801-0510;0000-0003-1289-2758;", "linkedin": ";haobo-fu-382b0784/;;https://www.linkedin.cn/incareer/in/ACoAAAvlU14B40ZWH1pxg5JJDtQ6LlgMYkp0e5s;;", "or_profile": "~Kai_Li2;~Haobo_Fu2;~QIANG_FU8;~Junliang_Xing1;~Jian_Cheng7;~Xu_Hang1", "aff": "Institute of Automation, Chinese Academy of Sciences;Tencent AI Lab;Tencent AI Lab;Tsinghua University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;tencent.com;tencent.com;tsinghua.edu.cn;ia.ac.cn;ia.ac.cn", "position": "Associate Professor;Principal Researcher;Principal Researcher;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nxu2024dynamic,\ntitle={Dynamic Discounted Counterfactual Regret Minimization},\nauthor={Hang Xu and Kai Li and Haobo Fu and QIANG FU and Junliang Xing and Jian Cheng},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6PbvbLyqT6}\n}", "github": "", "project": "", "reviewers": "ESsF;ffQX;u3Qf;f8hB", "pdf_size": 1141279, "rating": "8;8;8;8", "confidence": "3;3;4;5", "soundness": "3;4;3;4", "contribution": "3;3;4;3", "presentation": "3;3;4;4", "wc_summary": "125;66;48;104", "wc_strengths": "125;67;55;69", "wc_weaknesses": "123;352;78;219", "wc_questions": "62;84;97;75", "wc_review": "435;569;278;467", "wc_reply_reviewers": "20;77;19;8", "wc_reply_authors": "593;1109;214;851", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;3", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 85.75, 30.367540236245674 ], "wc_strengths_avg": [ 79.0, 27.09243436828813 ], "wc_weaknesses_avg": [ 193.0, 104.97856924153615 ], "wc_questions_avg": [ 79.5, 12.776932339180638 ], "wc_review_avg": [ 437.25, 104.41354078853949 ], "wc_reply_reviewers_avg": [ 31.0, 26.972207918522354 ], "wc_reply_authors_avg": [ 691.75, 330.7018105484154 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3622742088901319178&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6PbvbLyqT6", "pdf": "https://openreview.net/pdf?id=6PbvbLyqT6", "email": "ia.ac.cn;tencent.com;tencent.com;tsinghua.edu.cn;ia.ac.cn;ia.ac.cn", "author_num": 6, "aff_unique_index": "0;1;1;2;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Tencent;Tsinghua University", "aff_unique_dep": "Institute of Automation;Tencent AI Lab;", "aff_unique_url": "http://www.ia.cas.cn;https://ai.tencent.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CAS;Tencent AI Lab;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6PjS5RnxeK", "title": "On progressive sharpening, flat minima and generalisation", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present a new approach to understanding the relationship between loss curvature and input-output model behaviour in deep learning. Specifically, we use existing empirical analyses of the spectrum of deep network loss Hessians to ground an ansatz tying together the loss Hessian and the input-output Jacobian over training samples during the training of deep neural networks. We then prove a series of theoretical results which quantify the degree to which the input-output Jacobian of a model approximates its Lipschitz norm over a data distribution, and deduce a novel generalisation bound in terms of the empirical Jacobian. We use our ansatz, together with our theoretical results, to give a new account of the recently observed progressive sharpening phenomenon, as well as the generalisation properties of flat minima. Experimental evidence is provided to validate our claims.", "keywords": "progressive sharpening;flat minima;generalisation;generalization", "primary_area": "learning theory", "supplementary_material": "/attachment/c2208f44613f193ef0afa123ec35aa2edfc21b1b.zip", "author": "Lachlan Ewen MacDonald;Jack Valmadre;Simon Lucey", "authorids": "~Lachlan_Ewen_MacDonald1;~Jack_Valmadre1;~Simon_Lucey2", "gender": ";M;M", "homepage": "https://researchers.adelaide.edu.au/profile/lachlan.macdonald;https://jack.valmadre.net/;https://www.adelaide.edu.au/directory/simon.lucey", "dblp": "306/7691;50/8535;01/3542", "google_scholar": "r953DlQAAAAJ;_VSBqL0AAAAJ;vmAe35UAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Lachlan_Ewen_MacDonald1;~Jack_Valmadre1;~Simon_Lucey2", "aff": "Johns Hopkins University;University of Adelaide;University of Adelaide", "aff_domain": "jhu.edu;adelaide.edu.au;adelaide.edu.au", "position": "Postdoc;Lecturer;Full Professor", "bibtex": "@misc{\nmacdonald2024on,\ntitle={On progressive sharpening, flat minima and generalisation},\nauthor={Lachlan Ewen MacDonald and Jack Valmadre and Simon Lucey},\nyear={2024},\nurl={https://openreview.net/forum?id=6PjS5RnxeK}\n}", "github": "", "project": "", "reviewers": "L7Vu;LTAo;7LFk;dFnj", "site": "https://openreview.net/forum?id=6PjS5RnxeK", "pdf_size": 650955, "rating": "3;3;6;8", "confidence": "4;4;2;2", "soundness": "3;2;3;3", "contribution": "1;2;2;3", "presentation": "3;2;3;3", "wc_summary": "18;100;72;215", "wc_strengths": "28;100;110;63", "wc_weaknesses": "127;601;245;98", "wc_questions": "40;1;83;73", "wc_review": "213;802;510;449", "wc_reply_reviewers": "0;0;72;0", "wc_reply_authors": "512;794;458;208", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.25, 71.9839391809034 ], "wc_strengths_avg": [ 75.25, 32.41431011143072 ], "wc_weaknesses_avg": [ 267.75, 200.12418019819594 ], "wc_questions_avg": [ 49.25, 32.080952292598795 ], "wc_review_avg": [ 493.5, 209.82433128691247 ], "wc_reply_reviewers_avg": [ 18.0, 31.176914536239792 ], "wc_reply_authors_avg": [ 493.0, 208.21383239352758 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9428090415820635, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14425807823744012155&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Johns Hopkins University;University of Adelaide", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.adelaide.edu.au", "aff_unique_abbr": "JHU;Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Australia" }, { "title": "LongLoRA: Efficient Fine-tuning of Long-Context Large Language Models", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19390", "id": "6PmJoRfdaK", "author_site": "Yukang Chen, Shengju Qian, Haotian Tang, Xin Lai, Zhijian Liu, Song Han, Jiaya Jia", "tldr": "", "abstract": "We present LongLoRA, an efficient fine-tuning approach that extends the context sizes of pre-trained large language models (LLMs), with limited computation cost.\nTypically, training LLMs with long context sizes is computationally expensive, requiring extensive training hours and GPU resources. For example, training on the context length of 8192 needs 16x computational costs in self-attention layers as that of 2048. In this paper, we speed up the context extension of LLMs in two aspects. On the one hand, although dense global attention is needed during inference, fine-tuning the model can be effectively and efficiently done by sparse local attention. The proposed shifted sparse attention effectively enables context extension, leading to non-trivial computation saving with similar performance to fine-tuning with vanilla attention. Particularly, it can be implemented with only two lines of code in training, while being optional in inference. On the other hand, we revisit the parameter-efficient fine-tuning regime for context expansion. Notably, we find that LoRA for context extension works well under the premise of trainable embedding and normalization. LongLoRA combines this improved LoRA with S^2-Attn. LongLoRA demonstrates strong empirical results on various tasks on Llama2 models from 7B/13B to 70B. LongLoRA extends Llama2 7B from 4k context to 100k, or Llama2 70B to 32k on a single 8x A100 machine. LongLoRA extends models' context while retaining their original architectures, and is compatible with most existing techniques, like Flash-Attention2. In addition, we further conduct supervised fine-tuning with LongLoRA and our long instruction-following LongAlpaca dataset. All our code, models, dataset, and demo are available at https://github.com/dvlab-research/LongLoRA.", "keywords": "Efficient fine-tuning;Long context;Large language model", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yukang Chen;Shengju Qian;Haotian Tang;Xin Lai;Zhijian Liu;Song Han;Jiaya Jia", "authorids": "~Yukang_Chen1;~Shengju_Qian1;~Haotian_Tang1;~Xin_Lai1;~Zhijian_Liu1;~Song_Han5;~Jiaya_Jia1", "gender": "M;M;M;M;M;;M", "homepage": "https://yukangchen.com/;http://thesouthfrog.com/about.me/;http://kentang.net;https://x-lai.github.io;https://zhijianliu.com;;https://jiaya.me", "dblp": "225/4601;247/6076;245/0058;;;;31/5649", "google_scholar": "6p0ygKUAAAAJ;QNnWmasAAAAJ;WxL13BAAAAAJ;tqNDPA4AAAAJ;mwzYYPgAAAAJ;;https://scholar.google.com.tw/citations?user=XPAkzTEAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;zhijianliu/;;", "or_profile": "~Yukang_Chen1;~Shengju_Qian1;~Haotian_Tang1;~Xin_Lai1;~Zhijian_Liu1;~Song_Han5;~Jiaya_Jia1", "aff": "NVIDIA;Tencent;NVIDIA;The Chinese University of Hong Kong;Massachusetts Institute of Technology;;Department of Computer Science and Engineering, Hong Kong University of Science and Technology", "aff_domain": "nvidia.com;tencent.com;nvidia.com;cuhk.edu.hk;mit.edu;;cse.ust.hk", "position": "Researcher;Researcher;Intern;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nchen2024longlora,\ntitle={LongLo{RA}: Efficient Fine-tuning of Long-Context Large Language Models},\nauthor={Yukang Chen and Shengju Qian and Haotian Tang and Xin Lai and Zhijian Liu and Song Han and Jiaya Jia},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6PmJoRfdaK}\n}", "github": "", "project": "", "reviewers": "WV76;7K1u;Sw6W;FsJg", "pdf_size": 1168720, "rating": "6;6;8;8", "confidence": "3;4;4;3", "soundness": "3;3;4;3", "contribution": "2;3;4;2", "presentation": "3;2;4;2", "wc_summary": "44;137;66;130", "wc_strengths": "47;18;48;71", "wc_weaknesses": "35;9;3;32", "wc_questions": "9;76;56;208", "wc_review": "135;240;173;441", "wc_reply_reviewers": "0;0;8;40", "wc_reply_authors": "472;569;536;477", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 94.25, 40.08974307725107 ], "wc_strengths_avg": [ 46.0, 18.801595676963167 ], "wc_weaknesses_avg": [ 19.75, 13.953046262375826 ], "wc_questions_avg": [ 87.25, 73.83554360875256 ], "wc_review_avg": [ 247.25, 118.00926870377597 ], "wc_reply_reviewers_avg": [ 12.0, 16.492422502470642 ], "wc_reply_authors_avg": [ 513.5, 40.74616546375867 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 360, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15175040643590476650&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6PmJoRfdaK", "pdf": "https://openreview.net/pdf?id=6PmJoRfdaK", "email": "nvidia.com;tencent.com;nvidia.com;cuhk.edu.hk;mit.edu;;cse.ust.hk", "author_num": 7, "aff_unique_index": "0;1;0;2;3;4", "aff_unique_norm": "NVIDIA;Tencent;Chinese University of Hong Kong;Massachusetts Institute of Technology;Hong Kong University of Science and Technology", "aff_unique_dep": "NVIDIA Corporation;Tencent Holdings Limited;;;Department of Computer Science and Engineering", "aff_unique_url": "https://www.nvidia.com;https://www.tencent.com;https://www.cuhk.edu.hk;https://web.mit.edu;https://www.ust.hk", "aff_unique_abbr": "NVIDIA;Tencent;CUHK;MIT;HKUST", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;0;1", "aff_country_unique": "United States;China" }, { "title": "IceFormer: Accelerated Inference with Long-Sequence Transformers on CPUs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19389", "id": "6RR3wU4mSZ", "author_site": "Yuzhen Mao, Martin Ester, Ke Li", "tldr": "", "abstract": "One limitation of existing Transformer-based models is that they cannot handle very long sequences as input since their self-attention operations exhibit quadratic time and space complexity. This problem becomes especially acute when Transformers are deployed on hardware platforms equipped only with CPUs. To address this issue, we propose a novel method for accelerating self-attention at inference time that works with pretrained Transformer models out-of-the-box without requiring retraining. We experiment using our method to accelerate various long-sequence Transformers, including a leading LLaMA 2-based LLM, on various benchmarks and demonstrate a greater speedup of $2.73\\times$ - $7.63\\times$ while retaining $98.6$% - $99.6$% of the accuracy of the original pretrained models. The code is available on our project website at https://yuzhenmao.github.io/IceFormer/.", "keywords": "Efficient Transformers;Inference-time Efficiency;CPU", "primary_area": "generative models", "supplementary_material": "/attachment/4ffc52aa275005fa3921f695f72131d5e57fa122.zip", "author": "Yuzhen Mao;Martin Ester;Ke Li", "authorids": "~Yuzhen_Mao2;~Martin_Ester1;~Ke_Li1", "gender": "M;M;M", "homepage": "https://github.com/yuzhenmao;https://sites.google.com/view/esterlab;http://www.sfu.ca/~keli/", "dblp": "336/2249;e/MartinEster;75/6627-11", "google_scholar": "9wKn1A0AAAAJ;https://scholar.google.com.tw/citations?user=ZYwC_CQAAAAJ;vQc8tI4AAAAJ", "orcid": ";0000-0001-7732-2815;", "linkedin": ";;", "or_profile": "~Yuzhen_Mao2;~Martin_Ester1;~Ke_Li1", "aff": "Simon Fraser University;Simon Fraser University;Simon Fraser University", "aff_domain": "sfu.ca;sfu.ca;sfu.ca", "position": "Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmao2024iceformer,\ntitle={IceFormer: Accelerated Inference with Long-Sequence Transformers on {CPU}s},\nauthor={Yuzhen Mao and Martin Ester and Ke Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6RR3wU4mSZ}\n}", "github": "", "project": "", "reviewers": "dxGt;nr9P;d3mB;ad3y;5Bmg", "pdf_size": 2372458, "rating": "3;6;6;6;6", "confidence": "3;3;5;4;3", "soundness": "2;3;2;3;3", "contribution": "1;3;3;2;3", "presentation": "1;3;3;2;3", "wc_summary": "109;62;36;41;87", "wc_strengths": "89;40;59;33;59", "wc_weaknesses": "184;39;134;72;93", "wc_questions": "3;46;40;135;92", "wc_review": "385;187;269;281;331", "wc_reply_reviewers": "0;0;0;26;19", "wc_reply_authors": "1008;553;823;1473;723", "reply_reviewers": "0;0;0;1;1", "reply_authors": "2;1;2;3;1", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 67.0, 27.66224864323217 ], "wc_strengths_avg": [ 56.0, 19.452506265260524 ], "wc_weaknesses_avg": [ 104.4, 50.337262539792526 ], "wc_questions_avg": [ 63.2, 45.70076585791534 ], "wc_review_avg": [ 290.6, 66.08358343794622 ], "wc_reply_reviewers_avg": [ 9.0, 11.242775458044157 ], "wc_reply_authors_avg": [ 916.0, 315.08094198158034 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.37499999999999994, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8420608667553434018&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=6RR3wU4mSZ", "pdf": "https://openreview.net/pdf?id=6RR3wU4mSZ", "email": "sfu.ca;sfu.ca;sfu.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Simon Fraser University", "aff_unique_dep": "", "aff_unique_url": "https://www.sfu.ca", "aff_unique_abbr": "SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "6SNyuiph3F", "title": "Chat Vector: A Simple Approach to Equip LLMs With New Language Chat Capabilities", "track": "main", "status": "Reject", "tldr": "", "abstract": "With the advancements in conversational AI, such as ChatGPT, this paper focuses on exploring developing Large Language Models (LLMs) for non-English languages, especially emphasizing alignment with human preferences. We introduce a computationally efficient method, leveraging \u201cchat vector,\u201d to synergize pre-existing knowledge and behaviors in LLMs, restructuring the conventional training paradigm from continual pretrain $\\rightarrow$ SFT $\\rightarrow$ RLHF to continual pretrain + chat. Our empirical studies, primarily focused on Traditional Chinese, employ LLaMA2 as the base model and acquire the chat vector by subtracting the pre-trained weights, LLaMA2, from the weights of LLaMA2-chat. Evaluating from three distinct facets, which are toxicity, ability of instruction following and multi-turn dialogue demonstrates the chat vector's superior efficacy in \u201cchatting\u201d. To confirm the adaptability of our approach, we extend our experiments to include models pre-trained in both Korean and Simplified Chinese, illustrating the versatility of our methodology. Overall, we present a significant solution in aligning LLMs with human preferences efficiently across various languages, accomplished by the chat vector.", "keywords": "RLHF;LLM", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Shih-Cheng Huang;Pin-Zu Li;YU-CHI HSU;Kuang-Ming Chen;Yu Tung Lin;Shih-Kai Hsiao;Richard Tzong-Han Tsai;Hung-yi Lee", "authorids": "~Shih-Cheng_Huang2;~Pin-Zu_Li1;~YU-CHI_HSU1;~Kuang-Ming_Chen1;~Yu_Tung_Lin1;anna.shiker1822@gmail.com;~Richard_Tzong-Han_Tsai1;~Hung-yi_Lee2", "gender": "M;M;M;M;F;;M;Non-Binary", "homepage": ";https://github.com/aqweteddy;https://github.com/ba144220;;;;;https://speech.ee.ntu.edu.tw/~hylee/index.html", "dblp": ";;;37/10484.html;;;t/TzongHanTsai;81/8056", "google_scholar": ";;;;;;;DxLO11IAAAAJ", "orcid": ";;;;;;;", "linkedin": "%E4%B8%96%E4%B8%9E-%E9%BB%83-863b68192/;pin-zu-li-237b57181/;;kuang-ming-chen;judy-lin-3b9771247/;;;", "or_profile": "~Shih-Cheng_Huang2;~Pin-Zu_Li1;~YU-CHI_HSU1;~Kuang-Ming_Chen1;~Yu_Tung_Lin1;anna.shiker1822@gmail.com;~Richard_Tzong-Han_Tsai1;~Hung-yi_Lee2", "aff": "Appier Inc.;National Applied Research Laboratories;National Taiwan University;National Taiwan University;;;National Central University;National Taiwan University", "aff_domain": "appier.com;narlabs.org.tw;ntu.edu.tw;ntu.edu.tw;;;ncu.edu.tw;ntu.edu.tw", "position": "Researcher;Researcher;Undergrad student;Undergrad student;;;Full Professor;Full Professor", "bibtex": "@misc{\nhuang2024chat,\ntitle={Chat Vector: A Simple Approach to Equip {LLM}s With New Language Chat Capabilities},\nauthor={Shih-Cheng Huang and Pin-Zu Li and YU-CHI HSU and Kuang-Ming Chen and Yu Tung Lin and Shih-Kai Hsiao and Richard Tzong-Han Tsai and Hung-yi Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=6SNyuiph3F}\n}", "github": "", "project": "", "reviewers": "uV1v;PhbB;X3rn;ERju", "site": "https://openreview.net/forum?id=6SNyuiph3F", "pdf_size": 707865, "rating": "5;5;5;6", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "contribution": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "99;67;71;113", "wc_strengths": "73;41;82;21", "wc_weaknesses": "263;314;288;88", "wc_questions": "28;54;4;13", "wc_review": "463;476;445;235", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "498;274;245;358", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.5, 19.20286436967152 ], "wc_strengths_avg": [ 54.25, 24.508926945094924 ], "wc_weaknesses_avg": [ 238.25, 88.60128385074339 ], "wc_questions_avg": [ 24.75, 18.93904696651867 ], "wc_review_avg": [ 404.75, 98.6214352967954 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 343.75, 98.25063613025617 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8869576311636760884&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "Appier Inc.;National Applied Research Laboratories;National Taiwan University;National Central University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.appier.com;https://www.narlabs.org.tw;https://www.ntu.edu.tw;https://www.ncu.edu.tw", "aff_unique_abbr": "Appier;NARLabs;NTU;NCU", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6SmcAt0JmF", "title": "CAT: Collaborative Adversarial Training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Adversarial training has proven to be effective in enhancing the robustness of neural networks. However, previous methods typically focus on a single adversarial training strategy and do not consider the characteristics of models trained using different strategies. Upon revisiting these methods, we have observed that different adversarial training methods exhibit distinct levels of robustness for different sample instances. For instance, a model trained using AT may correctly classify a sample instance that is misclassified by a model trained using TRADES, and vice versa. Motivated by this observation, we propose a Collaborative Adversarial Training (CAT) framework to enhance the robustness of neural networks. CAT utilizes different adversarial training methods to train robust models and facilitate the interaction of these models to leverage their combined knowledge during the training process.Extensive experiments conducted on various networks and datasets validate the effectiveness of our method.", "keywords": "adversarial training;adversarial robustness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "xingbin liu", "authorids": "~xingbin_liu1", "gender": "", "homepage": "https://github.com/liuxingbin", "dblp": "", "google_scholar": "CqL88JwAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~xingbin_liu1", "aff": "Megvii Technology Inc.", "aff_domain": "megvii.com", "position": "Researcher", "bibtex": "@misc{\nliu2024cat,\ntitle={{CAT}: Collaborative Adversarial Training},\nauthor={xingbin liu},\nyear={2024},\nurl={https://openreview.net/forum?id=6SmcAt0JmF}\n}", "github": "", "project": "", "reviewers": "aZir;9NK1;VPkg;V2Su", "site": "https://openreview.net/forum?id=6SmcAt0JmF", "pdf_size": 297957, "rating": "3;5;5;5", "confidence": "4;2;4;4", "soundness": "3;2;3;2", "contribution": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "42;128;42;81", "wc_strengths": "9;26;45;48", "wc_weaknesses": "189;276;230;127", "wc_questions": "68;69;69;2", "wc_review": "308;499;386;258", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 73.25, 35.39332564199075 ], "wc_strengths_avg": [ 32.0, 15.732132722552274 ], "wc_weaknesses_avg": [ 205.5, 54.78366544874485 ], "wc_questions_avg": [ 52.0, 28.8704000665041 ], "wc_review_avg": [ 362.75, 90.93232373584215 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2458217275536159128&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "Megvii Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.megvii.com", "aff_unique_abbr": "Megvii", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "6UQaXJm53B", "title": "DfPO: Degeneration-free Policy Optimization via Action Masking in Natural Language Action Spaces", "track": "main", "status": "Reject", "tldr": "", "abstract": "As the pre-training objectives (e.g., next token prediction) of language models (LMs) are inherently not aligned with task scores, optimizing LMs to achieve higher downstream task scores is essential. One of the promising approaches is to fine-tune LMs by using reinforcement learning (RL). However, conventional RL methods based on PPO and a penalty of KL divergence are vulnerable to the text degeneration problem which LMs do not generate natural texts anymore after RL fine-tuning. To address this problem, we provide Degeneration-free Policy Optimization (DfPO) that can fine-tune LMs to generate texts that achieve improved downstream task scores, while preserving the naturalness of the generated texts. To achieve this, we introduce action-masked policy with which a behavior policy can avoid to select tokens that potentially make policy optimization unexpected. Then, we devise clipped advantage functions to separately perform likelihood maximization and minimization, conditioned on texts sampled from the action-masked policy. Our experiments on the GRUE benchmark demonstrate that DfPO successfully improves the downstream task scores, while preserving the naturalness of the generated texts. Moreover, even DfPO does not perform hyperparameter search, it outperforms PPO and NLPO which require additional hyperparameter search for the penalty ratio of KL divergence.", "keywords": "Reinforcement learning;Natural language processing", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Youngsoo Jang;Geon-Hyeong Kim;Byoungjip Kim;Honglak Lee;Moontae Lee", "authorids": "~Youngsoo_Jang2;~Geon-Hyeong_Kim2;~Byoungjip_Kim1;~Honglak_Lee2;~Moontae_Lee1", "gender": ";M;;;", "homepage": "http://www.ysjang.me;https://sites.google.com/view/ghkim;;;https://moontae.people.uic.edu", "dblp": "195/0471;231/7707;;;132/1761", "google_scholar": "6EoBBggAAAAJ;https://scholar.google.co.kr/citations?user=IJL0uXoAAAAJ;;;BMvYy9cAAAAJ", "orcid": ";;;;0000-0001-5542-3463", "linkedin": ";;;;moontae-lee-975248123/", "or_profile": "~Youngsoo_Jang2;~Geon-Hyeong_Kim2;~Byoungjip_Kim1;~Honglak_Lee2;~Moontae_Lee1", "aff": "LG AI Research;LG AI Research;;;University of Illinois, Chicago", "aff_domain": "lgresearch.ai;lgresearch.ai;;;uic.edu", "position": "Researcher;Researcher;;;Assistant Professor", "bibtex": "@misc{\njang2024dfpo,\ntitle={Df{PO}: Degeneration-free Policy Optimization via Action Masking in Natural Language Action Spaces},\nauthor={Youngsoo Jang and Geon-Hyeong Kim and Byoungjip Kim and Honglak Lee and Moontae Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=6UQaXJm53B}\n}", "github": "", "project": "", "reviewers": "wZxt;6zCc;99mP;hR6e", "site": "https://openreview.net/forum?id=6UQaXJm53B", "pdf_size": 1050988, "rating": "3;5;5;8", "confidence": "3;4;4;3", "soundness": "1;2;2;3", "contribution": "2;3;2;3", "presentation": "1;2;2;3", "wc_summary": "134;70;61;138", "wc_strengths": "94;47;28;219", "wc_weaknesses": "312;126;121;110", "wc_questions": "1134;120;88;1", "wc_review": "1674;363;298;468", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1357;825;538;230", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 100.75, 35.42156828826189 ], "wc_strengths_avg": [ 97.0, 74.42109915877352 ], "wc_weaknesses_avg": [ 167.25, 83.7716389955455 ], "wc_questions_avg": [ 335.75, 462.9224422081954 ], "wc_review_avg": [ 700.75, 565.1704941873735 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 737.5, 414.96776983279074 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.14002800840280097, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EJnsjzBa9W8J:scholar.google.com/&scioq=DfPO:+Degeneration-free+Policy+Optimization+via+Action+Masking+in+Natural+Language+Action+Spaces&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "LG;University of Illinois at Chicago", "aff_unique_dep": "LG AI Research;", "aff_unique_url": "https://www.lgaires.com;https://www.uic.edu", "aff_unique_abbr": "LG AI;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "6Uc7Fgwrsm", "title": "OmniMixup: Generalize Mixup with Mixing-Pair Sampling Distribution", "track": "main", "status": "Reject", "tldr": "", "abstract": "Mixup is a widely-adopted data augmentation techniques to mitigates the overfitting issue in empirical risk minimization. Current works of modifying Mixup are modality-specific, thereby limiting the applicability across diverse modalities. Although alternative approaches try circumventing such barrier via mixing-up data from latent features based on sampling distribution, they still require domain knowledge for designing sampling distribution. Moreover, a unified theoretical framework for analyzing the generalization bound for this line of research remains absent. In this paper, we introduce OmniMixup, a generalization of prior works by introducing Mixing-Pair Sampling Distribution (MPSD), accompanied by a holistic theoretical analysis framwork. We find both theoretically and empirically that the Mahalanobis distance (M-Score), derived from the sampling distribution, offers significant insights into OmniMixup's generalization capabilities. Accordingly, we propose OmniEval, an evaluation framework designed to autonomously identify the optimal sampling distribution. The empirical study on both images and molecules demonstrates that 1) OmniEval is adept at determining the appropriate sampling distribution for OmniMixup, and 2) OmniMixup exhibits promising capability for application across various modalities and domains.", "keywords": "Mixup;Machine Learning;molecule property prediction;image classification;data augmentation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Xingran Chen;Zhangyang Gao;Cheng Tan;Siyuan Li;Stan Z. Li", "authorids": "~Xingran_Chen1;~Zhangyang_Gao1;~Cheng_Tan1;~Siyuan_Li6;~Stan_Z._Li2", "gender": "M;M;M;M;M", "homepage": "https://www.chenxingran.com/;;https://chengtan9907.github.io/;https://lupin1998.github.io/;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "203/8349;275/3266;70/1533-12.html;63/9705-2;l/StanZLi", "google_scholar": "X01oTv8AAAAJ;4SclT-QAAAAJ;6kTV6aMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0003-1026-6083;;0000-0001-6806-2468;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Xingran_Chen1;~Zhangyang_Gao1;~Cheng_Tan1;~Siyuan_Li6;~Stan_Z._Li1", "aff": "University of Michigan - Ann Arbor;Westlake University, China;Zhejiang University & Westlake University;Alibaba Group;Westlake University", "aff_domain": "umich.edu;westlake.edu.cn;westlake.edu.cn;alibaba-inc.com;westlake.edu.cn", "position": "MS student;PhD student;PhD student;Intern;Chair Professor", "bibtex": "@misc{\nchen2024omnimixup,\ntitle={OmniMixup: Generalize Mixup with Mixing-Pair Sampling Distribution},\nauthor={Xingran Chen and Zhangyang Gao and Cheng Tan and Siyuan Li and Stan Z. Li},\nyear={2024},\nurl={https://openreview.net/forum?id=6Uc7Fgwrsm}\n}", "github": "", "project": "", "reviewers": "AcyS;sgeX;39U8;CqYZ;wmTG", "site": "https://openreview.net/forum?id=6Uc7Fgwrsm", "pdf_size": 461374, "rating": "1;3;3;5;5", "confidence": "4;5;3;3;4", "soundness": "2;2;1;2;3", "contribution": "1;2;2;2;2", "presentation": "1;2;1;3;3", "wc_summary": "75;58;121;58;78", "wc_strengths": "19;48;93;85;43", "wc_weaknesses": "296;251;139;54;164", "wc_questions": "57;3;271;194;5", "wc_review": "447;360;624;391;290", "wc_reply_reviewers": "189;0;0;103;0", "wc_reply_authors": "1128;722;897;560;849", "reply_reviewers": "1;0;0;1;0", "reply_authors": "2;1;2;1;2", "rating_avg": [ 3.4, 1.4966629547095767 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.0, 0.6324555320336759 ], "contribution_avg": [ 1.8, 0.4000000000000001 ], "presentation_avg": [ 2.0, 0.8944271909999159 ], "wc_summary_avg": [ 78.0, 23.05645245912736 ], "wc_strengths_avg": [ 57.6, 27.56519544643208 ], "wc_weaknesses_avg": [ 180.8, 85.21361393580253 ], "wc_questions_avg": [ 106.0, 107.8702924812944 ], "wc_review_avg": [ 422.4, 112.83368291427875 ], "wc_reply_reviewers_avg": [ 58.4, 76.52084683274225 ], "wc_reply_authors_avg": [ 831.2, 188.79131335948696 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2857142857142857, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XDHue5wf3VoJ:scholar.google.com/&scioq=OmniMixup:+Generalize+Mixup+with+Mixing-Pair+Sampling+Distribution&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "University of Michigan;Westlake University;Zhejiang University;Alibaba Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.umich.edu;https://www.westlake.edu.cn;http://www.zju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "UM;WU;ZJU;Alibaba", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United States;China" }, { "id": "6W35Wcs077", "title": "Decomposition Ascribed Synergistic Learning for Unified Image Restoration", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning to restore multiple image degradations within a single model is quite beneficial for real-world applications. Nevertheless, existing works typically concentrate on regarding each degradation independently, while their relationship has been less exploited to ensure the synergistic learning. To this end, we revisit the diverse degradations through the lens of singular value decomposition, with the observation that the decomposed singular vectors and singular values naturally undertake the different types of degradation information, dividing various restoration tasks into two groups, \\ie, singular vector dominated and singular value dominated. The above analysis renders a more unified perspective to ascribe the diverse degradations, compared to previous task-level independent learning. The dedicated optimization of degraded singular vectors and singular values inherently utilizes the potential relationship among diverse restoration tasks, attributing to the Decomposition Ascribed Synergistic Learning (DASL). Specifically, DASL comprises two effective operators, namely, Singular VEctor Operator (SVEO) and Singular VAlue Operator (SVAO), to favor the decomposed optimization, which can be lightly integrated into existing convolutional image restoration backbone. Moreover, the congruous decomposition loss has been devised for auxiliary. Extensive experiments on blended five image restoration tasks demonstrate the effectiveness of our method, including image deraining, image dehazing, image denoising, image deblurring, and low-light image enhancement.", "keywords": "Image Restoration;Decomposition;Orthogonality;Signal formation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "JingHao Zhang;Jie Huang;Man Zhou;Chongyi Li;Feng Zhao", "authorids": "~JingHao_Zhang2;~Jie_Huang4;~Man_Zhou5;~Chongyi_Li1;~Feng_Zhao6", "gender": "M;M;M;;M", "homepage": "https://jinghao99.github.io/;;https://zz.github.io;;https://bivlab123.github.io/", "dblp": ";;;;181/2734-4", "google_scholar": "Lis9e2MAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0002-5407-4641;0000-0002-3518-3404;;;0000-0001-6767-8105", "linkedin": ";;;;", "or_profile": "~JingHao_Zhang2;~Jie_Huang4;~Man_Zhou5;~Chongyi_Li1;~Feng_Zhao6", "aff": "University of Science and Technology of China;University of Science and Technology of China;iim;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;iim.cn;;ustc.edu.cn", "position": "PhD student;PhD student;PhD student;;Full Professor", "bibtex": "@misc{\nzhang2024decomposition,\ntitle={Decomposition Ascribed Synergistic Learning for Unified Image Restoration},\nauthor={JingHao Zhang and Jie Huang and Man Zhou and Chongyi Li and Feng Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=6W35Wcs077}\n}", "github": "", "project": "", "reviewers": "5Pyv;4FV9;vj9U;Ym8K", "site": "https://openreview.net/forum?id=6W35Wcs077", "pdf_size": 43862690, "rating": "3;6;8;8", "confidence": "4;5;4;5", "soundness": "1;3;4;3", "contribution": "2;2;4;3", "presentation": "1;3;4;4", "wc_summary": "96;54;77;103", "wc_strengths": "32;18;151;55", "wc_weaknesses": "62;71;55;155", "wc_questions": "361;247;131;7", "wc_review": "551;390;414;320", "wc_reply_reviewers": "0;0;130;0", "wc_reply_authors": "1592;1300;563;672", "reply_reviewers": "0;0;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 82.5, 19.00657780874821 ], "wc_strengths_avg": [ 64.0, 51.93746239469156 ], "wc_weaknesses_avg": [ 85.75, 40.38177187791541 ], "wc_questions_avg": [ 186.5, 131.72983716683171 ], "wc_review_avg": [ 418.75, 83.80147671729897 ], "wc_reply_reviewers_avg": [ 32.5, 56.29165124598851 ], "wc_reply_authors_avg": [ 1031.75, 428.6562579736822 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3665083330689157, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17089949665565746036&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Science and Technology of China;Indian Institute of Management", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.iim.edu", "aff_unique_abbr": "USTC;IIM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;India" }, { "id": "6YZmkpivVH", "title": "TpopT: Efficient Trainable Template Optimization on Low-Dimensional Manifolds", "track": "main", "status": "Reject", "tldr": "", "abstract": "In scientific and engineering scenarios, a recurring task is the detection of low-dimensional families of signals or patterns. A classic family of approaches, exemplified by template matching, aims to cover the search space with a dense template bank. While simple and highly interpretable, it suffers from poor computational efficiency due to unfavorable scaling in the signal space dimensionality. In this work, we study TpopT (TemPlate OPTimization) as an alternative scalable framework for detecting low-dimensional families of signals which maintains high interpretability. We provide a theoretical analysis of the convergence of Riemannian gradient descent for TpopT, and prove that it has a superior dimension scaling to covering. We also propose a practical TpopT framework for nonparametric signal sets, which incorporates techniques of embedding and kernel interpolation, and is further configurable into a trainable network architecture by unrolled optimization. The proposed trainable TpopT exhibits significantly improved efficiency-accuracy tradeoffs for gravitational wave detection, where matched filtering is currently a method of choice. We further illustrate the general applicability of this approach with experiments on handwritten digit data.", "keywords": "Signal Detection;Scientific Machine Learning;Unrolled Optimization;Template Optimization", "primary_area": "optimization", "supplementary_material": "/attachment/e84caf366508589af520c0b379baed754ab3aa75.zip", "author": "Jingkai Yan;Shiyu Wang;Xinyu Rain Wei;Jimmy Wang;Zsuzsanna Marka;Szabolcs Marka;John Wright", "authorids": "~Jingkai_Yan1;~Shiyu_Wang4;~Xinyu_Rain_Wei1;~Jimmy_Wang1;~Zsuzsanna_Marka1;~Szabolcs_Marka1;~John_Wright1", "gender": ";;F;M;;M;", "homepage": ";http://www.linkedin.com/in/shiyuwang3601;https://www.linkedin.com/in/xinyuwei/;;;https://datascience.columbia.edu/people/szabolcs-marka/;http://www.columbia.edu/~jw2966", "dblp": "209/9672;;;;254/2791;289/7409;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;;EtCHb8YAAAAJ;nujTx04AAAAJ", "orcid": ";;;;0000-0003-1306-5260;0000-0002-3957-1324;", "linkedin": ";;;jameswang771/;;szabolcs-marka-94130624;", "or_profile": "~Jingkai_Yan1;~Shiyu_Wang4;~Xinyu_Rain_Wei1;~Jimmy_Wang1;~Zsuzsanna_Marka1;~Szabolcs_Marka1;~John_Wright1", "aff": "Apple;Columbia University;Columbia University;Columbia University;Columbia University;Columbia University;Columbia University", "aff_domain": "apple.com;columbia.edu;columbia.edu;columbia.edu;columbia.edu;columbia.edu;columbia.edu", "position": "Researcher;PhD student;Undergrad student;Undergrad student;Researcher;Full Professor;Associate Professor", "bibtex": "@misc{\nyan2024tpopt,\ntitle={TpopT: Efficient Trainable Template Optimization on Low-Dimensional Manifolds},\nauthor={Jingkai Yan and Shiyu Wang and Xinyu Rain Wei and Jimmy Wang and Zsuzsanna Marka and Szabolcs Marka and John Wright},\nyear={2024},\nurl={https://openreview.net/forum?id=6YZmkpivVH}\n}", "github": "", "project": "", "reviewers": "X8Hz;Y4xN;wGVm;uRtk", "site": "https://openreview.net/forum?id=6YZmkpivVH", "pdf_size": 1025707, "rating": "3;5;6;8", "confidence": "4;4;3;4", "soundness": "2;4;2;4", "contribution": "2;2;3;4", "presentation": "2;3;3;3", "wc_summary": "213;119;73;51", "wc_strengths": "23;72;33;42", "wc_weaknesses": "197;315;303;5", "wc_questions": "283;128;49;185", "wc_review": "716;634;458;283", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2294;1433;1782;890", "reply_reviewers": "0;0;0;0", "reply_authors": "4;2;3;2", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 114.0, 62.20128616033595 ], "wc_strengths_avg": [ 42.5, 18.309833423600555 ], "wc_weaknesses_avg": [ 205.0, 124.26584406022437 ], "wc_questions_avg": [ 161.25, 85.2829848211236 ], "wc_review_avg": [ 522.75, 166.87926024524438 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1599.75, 511.55369952723436 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.16012815380508713, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oLJeAMIrlhMJ:scholar.google.com/&scioq=TpopT:+Efficient+Trainable+Template+Optimization+on+Low-Dimensional+Manifolds&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "Apple;Columbia University", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.columbia.edu", "aff_unique_abbr": "Apple;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "6Z8rZlKpNT", "title": "Normalizing Flows For Out of Distribution Detection via Latent Density Estimation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is a critical task for safe deployment of learning systems in the open world setting. In this work, we propose the use of latent density estimation via normalizing flows for the OOD task and present a fully unsupervised approach with no requirement for exposure to OOD data, avoiding researcher bias in OOD sample selection. This is a fully post-hoc method which can be applied to any pretrained model, and involves training a lightweight auxiliary normalizing flow model to perform the out-of-distribution detection via density thresholding. Experiments on OOD detection in image classification show strong results, including 98.2\\% AUROC for ImageNet-1k vs. Textures, which exceeds the state of the art by 8.4\\%. Further, we provide insights into training pitfalls that have plagued normalizing flows for use in OOD detection.", "keywords": "Out-of-distribution detection;normalizing flow;image classification", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Evan Cook;Marc-Antoine Lavoie;Steven L. Waslander", "authorids": "~Evan_Cook1;~Marc-Antoine_Lavoie1;~Steven_L._Waslander1", "gender": ";M;M", "homepage": ";;https://trailab.utias.utoronto.ca", "dblp": ";;18/7142", "google_scholar": ";https://scholar.google.com/citations?hl=en;jY_Bcd8AAAAJ", "orcid": ";;0000-0003-4217-4415", "linkedin": "evan-d-cook/;;", "or_profile": "~Evan_Cook1;~Marc-Antoine_Lavoie1;~Steven_Lake_Waslander1", "aff": "University of Toronto;University of Toronto;University of Toronto", "aff_domain": "utoronto.ca;utoronto.ca;utoronto.ca", "position": "MS student;PhD student;Full Professor", "bibtex": "@misc{\ncook2024normalizing,\ntitle={Normalizing Flows For Out of Distribution Detection via Latent Density Estimation},\nauthor={Evan Cook and Marc-Antoine Lavoie and Steven L. Waslander},\nyear={2024},\nurl={https://openreview.net/forum?id=6Z8rZlKpNT}\n}", "github": "", "project": "", "reviewers": "avuP;gGnx;wdco;E6xa;qftB", "site": "https://openreview.net/forum?id=6Z8rZlKpNT", "pdf_size": 10875232, "rating": "3;3;3;3;5", "confidence": "5;3;4;4;4", "soundness": "2;2;2;2;2", "contribution": "2;2;1;1;1", "presentation": "1;2;1;3;2", "wc_summary": "45;52;24;51;104", "wc_strengths": "33;52;15;66;69", "wc_weaknesses": "387;105;145;232;442", "wc_questions": "24;5;3;58;89", "wc_review": "489;214;187;407;704", "wc_reply_reviewers": "0;11;0;0;0", "wc_reply_authors": "192;242;224;489;539", "reply_reviewers": "0;1;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 3.4, 0.8 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.4, 0.4898979485566356 ], "presentation_avg": [ 1.8, 0.7483314773547883 ], "wc_summary_avg": [ 55.2, 26.40757467091592 ], "wc_strengths_avg": [ 47.0, 20.445048300260872 ], "wc_weaknesses_avg": [ 262.2, 132.10813752377254 ], "wc_questions_avg": [ 35.8, 33.12642449767255 ], "wc_review_avg": [ 400.2, 189.9214574501786 ], "wc_reply_reviewers_avg": [ 2.2, 4.4 ], "wc_reply_authors_avg": [ 337.2, 146.10051334612072 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YVVxFzWeQzIJ:scholar.google.com/&scioq=Normalizing+Flows+For+Out+of+Distribution+Detection+via+Latent+Density+Estimation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "6ZE9Zktbh6", "title": "Screening Unlearnable Examples via Iterative Self Regression", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deep neural networks are proven to be vulnerable to data poisoning attacks. Recently, a specific type of data poisoning attack known as availability attacks, has led to the failure of data utilization for model learning by adding imperceptible perturbations to images. Consequently, it is quite beneficial and challenging to detect poisoned samples, also known as Unlearnable Examples (UEs), from a mixed dataset. To tackle this problem, in this paper, we introduce a novel Iterative Self-Regression approach for identifying UEs within a mixed dataset. This method leverages the distinction between the inherent semantic mapping rules and shortcuts, without the need for any additional information. Our investigation reveals a critical observation: when training a classifier on a mixed dataset containing both UEs and clean data, the model tends to quickly adapt to the UEs compared to the clean data. Due to the accuracy gaps between training with clean/poisoned samples, we employ a model to misclassify clean samples while correctly identifying the poisoned ones for identifying tainted samples. Furthermore, we find that it is more effective to differentiate between clean and poisoned samples and build the Iterative Self Regression algorithm. \nWith incorporated additional classes and iterative refinement, the model becomes more capable of differentiating between clean and poisoned samples. \nExtensive experiments demonstrate that our method outperforms state-of-the-art detection approaches across various types of attacks, datasets, and poisoning ratios, and it significantly reduces the Half Total Error Rate (HTER) in comparison to existing methods.", "keywords": "data poisoning attack;iterative self regression;availability attacks detection;unlearnable examples", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Yi Yu;Qichen Zheng;SIYUAN YANG;Wenhan Yang;Jun Liu;Shijian Lu;Yap-peng Tan;Kwok-Yan Lam;Alex Kot", "authorids": "~Yi_Yu5;~Qichen_Zheng1;~SIYUAN_YANG1;~Wenhan_Yang6;~Jun_Liu8;~Shijian_Lu1;~Yap-peng_Tan1;~Kwok-Yan_Lam1;~Alex_Kot1", "gender": ";M;M;M;M;M;M;M;", "homepage": "https://github.com/yuyi-sd;https://github.com/QichenZheng;;https://flyywh.github.io/;;https://personal.ntu.edu.sg/shijian.lu/;https://personal.ntu.edu.sg/eyptan/;https://personal.ntu.edu.sg/kwokyan.lam/;https://www.ntu.edu.sg/home/eackot/", "dblp": "99/111-11.html;285/4546;201/7699-1.html;156/2359.html;95/3736-36;42/2718;93/4472.html;10/1993;", "google_scholar": "https://scholar.google.com/citations?hl=en;d6AbpzgAAAAJ;lzLsF2MAAAAJ;S8nAnakAAAAJ;Q5Ild8UAAAAJ;https://scholar.google.com.sg/scholar?hl=en;https://scholar.google.com.sg/citations?user=t9EqYQIAAAAJ;https://scholar.google.com.sg/citations?user=NDMIYKsAAAAJ;", "orcid": "0000-0003-2730-9553;;0000-0003-4681-0431;;;;0000-0002-0645-9109;;", "linkedin": "%E7%9B%8A-%E4%BD%99-6b453a229;;;;;;;;", "or_profile": "~Yi_Yu5;~Qichen_Zheng1;~SIYUAN_YANG1;~Wenhan_Yang6;~Jun_Liu8;~Shijian_Lu1;~Yap-peng_Tan1;~Kwok-Yan_Lam1;~Alex_Kot1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Peng Cheng Laboratory;Singapore University of Technology and Design;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu;ntu.edu.sg;pcl.ac.cn;sutd.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;Researcher;Assistant Professor;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nyu2024screening,\ntitle={Screening Unlearnable Examples via Iterative Self Regression},\nauthor={Yi Yu and Qichen Zheng and SIYUAN YANG and Wenhan Yang and Jun Liu and Shijian Lu and Yap-peng Tan and Kwok-Yan Lam and Alex Kot},\nyear={2024},\nurl={https://openreview.net/forum?id=6ZE9Zktbh6}\n}", "github": "", "project": "", "reviewers": "6SZH;Ymu2;cckS;cGpA", "site": "https://openreview.net/forum?id=6ZE9Zktbh6", "pdf_size": 793902, "rating": "1;3;5;5", "confidence": "5;4;4;4", "soundness": "2;2;2;3", "contribution": "1;2;3;3", "presentation": "1;2;3;2", "wc_summary": "36;90;92;63", "wc_strengths": "7;43;45;53", "wc_weaknesses": "654;206;325;115", "wc_questions": "16;166;3;17", "wc_review": "713;505;465;248", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.25, 22.851422275210794 ], "wc_strengths_avg": [ 37.0, 17.72004514666935 ], "wc_weaknesses_avg": [ 325.0, 204.0232829850554 ], "wc_questions_avg": [ 50.5, 66.91225597751132 ], "wc_review_avg": [ 482.75, 165.024808740989 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5V7wE0J09X8J:scholar.google.com/&scioq=Screening+Unlearnable+Examples+via+Iterative+Self+Regression&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;0;0;0;0", "aff_unique_norm": "Nanyang Technological University;Pengcheng Laboratory;Singapore University of Technology and Design", "aff_unique_dep": ";Peng Cheng Laboratory;", "aff_unique_url": "https://www.ntu.edu.sg;http://www.pcl.ac.cn;https://www.sutd.edu.sg", "aff_unique_abbr": "NTU;PCL;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0", "aff_country_unique": "Singapore;China" }, { "id": "6ZbMLZb4gL", "title": "Big Picture Thinking: Enhance Multi-Agent Imitation Learning through Global Dependencies", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Multi-agent reinforcement learning (MARL) has emerged as a promising approach for solving complex problems involving multi-agent collaboration or competition. Recently, researchers have turned to imitation learning to avoid the explicit design of intricate reward functions in MARL. By formulating the problem as a distribution-matching task based on expert trajectories, imitation learning enables agents to continually approximate expert policies without requiring manual reward engineering. However, classical multi-agent imitation learning frameworks, such as MAGAIL, often treat individual agent's distribution matching independently, disregarding the intricate dependencies that arise from agent cooperation. This neglect results in inaccurate estimations of action-value functions, weak feedback from the discriminator, and a significant vanishing gradient problem. This paper proposed a novel multi-agent joint distribution matching framework based on the Transformer architecture. It explicitly models global dependencies among agents within the generator and discriminator components sequentially and autoregressively. We also theoretically prove the effectiveness of this framework in enhancing reward variance and advantage gradient. Extensive experiments demonstrated the remarkable performance improvements achieved by our proposed method on various benchmarks.", "keywords": "Multi-agent reinforcement learning;Generative adversarial imitation learning;Complex dependency;Joint distribution matching;Transformer", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/851cac550a088a88f457f28cd3bc58c8ff7325a6.zip", "author": "Tianchen Zhu", "authorids": "~Tianchen_Zhu1", "gender": "M", "homepage": "https://zhutc.tk", "dblp": "https://dblp.uni-trier.de/pid/163/4107", "google_scholar": "P60wcZwAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Tianchen_Zhu1", "aff": "Beihang University", "aff_domain": "buaa.edu.cn", "position": "PhD student", "bibtex": "@misc{\nzhu2024big,\ntitle={Big Picture Thinking: Enhance Multi-Agent Imitation Learning through Global Dependencies},\nauthor={Tianchen Zhu},\nyear={2024},\nurl={https://openreview.net/forum?id=6ZbMLZb4gL}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=6ZbMLZb4gL", "pdf_size": 798943, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nTV4pdGrPvYJ:scholar.google.com/&scioq=Big+Picture+Thinking:+Enhance+Multi-Agent+Imitation+Learning+through+Global+Dependencies&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "6ZuDeSHzjj", "title": "Outliers Memorized Last: Trends in Memorization of Diffusion Models Based on Training Distribution and Epoch", "track": "main", "status": "Reject", "tldr": "", "abstract": "Memorization and replication of training data in diffusion models like Stable Diffusion is a poorly understood phenomenon with a number of privacy and legal issues tied to it. This paper analyzes how the location of a data point in the training dataset's distribution affects its likelihood of memorization over training epochs. Importantly, it finds that memorization of 'outliers' is less likely early in the training process until eventually matching with the rest of the dataset. It then suggests applications utilizing this difference in memorization rate, including hyperparameter tuning and anomaly detection. It then suggests research that could be done from this conclusion to further improve memorization understanding.", "keywords": "Diffusion Models;Generative AI;Memorization", "primary_area": "generative models", "supplementary_material": "/attachment/81b134131e59b8d7087486d7e50684b54846fea0.zip", "author": "Aryan Janolkar", "authorids": "~Aryan_Janolkar1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "aryan-janolkar-99b152197/", "or_profile": "~Aryan_Janolkar1", "aff": "University of California, Los Angeles", "aff_domain": "ucla.edu", "position": "Undergrad student", "bibtex": "@misc{\njanolkar2024outliers,\ntitle={Outliers Memorized Last: Trends in Memorization of Diffusion Models Based on Training Distribution and Epoch},\nauthor={Aryan Janolkar},\nyear={2024},\nurl={https://openreview.net/forum?id=6ZuDeSHzjj}\n}", "github": "", "project": "", "reviewers": "uuB7;FcWa;ehcz;c1Ba", "site": "https://openreview.net/forum?id=6ZuDeSHzjj", "pdf_size": 234421, "rating": "1;1;1;3", "confidence": "4;5;5;4", "soundness": "1;2;1;2", "contribution": "1;2;1;2", "presentation": "1;1;1;2", "wc_summary": "45;132;55;75", "wc_strengths": "14;45;48;12", "wc_weaknesses": "18;63;453;49", "wc_questions": "1;1;77;5", "wc_review": "78;241;633;141", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 1.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 1.5, 0.5 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 33.677700337166726 ], "wc_strengths_avg": [ 29.75, 16.798437427332342 ], "wc_weaknesses_avg": [ 145.75, 178.13671014139675 ], "wc_questions_avg": [ 21.0, 32.37282811247729 ], "wc_review_avg": [ 273.25, 215.68075366151706 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6203931028012849713&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "6aRMQVlPVE", "title": "Rank-adaptive spectral pruning of convolutional layers during training", "track": "main", "status": "Reject", "tldr": "", "abstract": "The computing cost and memory demand of deep learning pipelines have grown fast in recent years and thus a variety of techniques have been developed to reduce model parameters. The majority of these techniques focus on reducing inference costs by pruning the network after a pass of full training. A smaller number of methods addresses the reduction of training costs, mostly based on compressing the network via low-rank layer factorizations. Despite their efficiency for linear layers, these methods fail to effectively handle convolutional filters. In this work, we propose a low-parametric training method that factorizes the convolutions into tensor Tucker format and adaptively prunes the Tucker ranks of the convolutional kernel during training. Leveraging fundamental results from geometric integration theory of differential equations on tensor manifolds, we obtain a robust training algorithm that provably approximates the full baseline performance and guarantees loss descent. \nA variety of experiments against the full model and alternative low-rank baselines are implemented, \nshowing that the proposed method drastically reduces the training costs, while achieving high performance, comparable to or better than the full baseline, outperforming competing low-rank approaches.", "keywords": "Convolutional neural networks;Neural Network Compression;Low-Rank Tensors;Dynamical Low-Rank Approximation;Neural Network Training;Pruning", "primary_area": "optimization", "supplementary_material": "/attachment/9af1937b3d262e44f4cbaa472d4994cb713de705.zip", "author": "Emanuele Zangrando;Steffen Schotth\u00f6fer;Gianluca Ceruti;Jonas Kusch;Francesco Tudisco", "authorids": "~Emanuele_Zangrando1;~Steffen_Schotth\u00f6fer1;~Gianluca_Ceruti1;~Jonas_Kusch1;~Francesco_Tudisco1", "gender": "M;M;M;M;M", "homepage": ";https://scsteffen.github.io/;;;https://ftudisco.gitlab.io/", "dblp": "321/1701;;;236/0493;136/5777", "google_scholar": "https://scholar.google.it/citations?hl=it;dZqiHeMAAAAJ;eyptuo8AAAAJ;https://scholar.google.de/citations?user=8JGYQTYAAAAJ;uND_5REAAAAJ", "orcid": ";;;0000-0002-2061-2114;0000-0002-8150-4475", "linkedin": ";steffen-schotthoefer/;;;", "or_profile": "~Emanuele_Zangrando1;~Steffen_Schotth\u00f6fer1;~Gianluca_Ceruti1;~Jonas_Kusch1;~Francesco_Tudisco1", "aff": "Gran Sasso Science Institute;Oak Ridge National Laboratory;Universit\u00e4t Innsbruck;Norwegian University of Life Sciences;Gran Sasso Science Institute", "aff_domain": "gssi.it;ornl.gov;uibk.ac.at;nmbu.no;gssi.it", "position": "PhD student;Researcher;Postdoc;Associate Professor;Associate Professor", "bibtex": "@misc{\nzangrando2024rankadaptive,\ntitle={Rank-adaptive spectral pruning of convolutional layers during training},\nauthor={Emanuele Zangrando and Steffen Schotth{\\\"o}fer and Gianluca Ceruti and Jonas Kusch and Francesco Tudisco},\nyear={2024},\nurl={https://openreview.net/forum?id=6aRMQVlPVE}\n}", "github": "", "project": "", "reviewers": "SS8V;T5Ao;AtDm", "site": "https://openreview.net/forum?id=6aRMQVlPVE", "pdf_size": 395678, "rating": "3;5;5", "confidence": "4;3;2", "soundness": "2;3;2", "contribution": "2;2;2", "presentation": "3;3;2", "wc_summary": "84;62;80", "wc_strengths": "66;11;59", "wc_weaknesses": "259;26;62", "wc_questions": "97;104;46", "wc_review": "506;203;247", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "804;401;414", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 75.33333333333333, 9.568466729604882 ], "wc_strengths_avg": [ 45.333333333333336, 24.44494948973214 ], "wc_weaknesses_avg": [ 115.66666666666667, 102.41202186375494 ], "wc_questions_avg": [ 82.33333333333333, 25.84999462712173 ], "wc_review_avg": [ 318.6666666666667, 133.6770569527754 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 539.6666666666666, 186.98722475672562 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6015670969082691589&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Gran Sasso Science Institute;Oak Ridge National Laboratory;University of Innsbruck;Norwegian University of Life Sciences", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.gssi.it;https://www.ornl.gov;https://www.uibk.ac.at;https://www.nmbu.no", "aff_unique_abbr": ";ORNL;UIBK;NMBU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Innsbruck", "aff_country_unique_index": "0;1;2;3;0", "aff_country_unique": "Italy;United States;Austria;Norway" }, { "id": "6bAfAcuuZD", "title": "Emergence of Surprise and Predictive Signals from Local Contrastive Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Hierarchical predictive models are often used to model cortical representations. These models exploit the local or global computation of predictive signals in the neural network, but their biological plausibility is limited as it is currently unknown whether cortical circuits perform such computations at all. This paper seeks to further investigate the inverted Forward-Forward Algorithm, a biologically plausible innovative approach to learning with only forward passes, in order to demonstrate that hierarchical predictive computations can emerge from a simpler contrastive constraint on the network's representation. Through the identification of compelling similarities between our model and hierarchical predictive coding, as well as the examination of the emergent properties of resulting representations, we advance the hypothesis that the computational properties that emerge in neocortical circuits, widely acknowledged as the basis of human intelligence, may be attributed to local learning principles.", "keywords": "Forward Forward Algorithm;Contrastive Learning;Predictive Coding;Cortical Representations;Biological Plausibility", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/85112c502d5cd9b6eba345889c23a1fe0a5b3054.pdf", "author": "Andrew Lawrence Smith;Linxing Preston Jiang;Stefano Recanatesi;Matthew Storm Bull", "authorids": "~Andrew_Lawrence_Smith1;~Linxing_Preston_Jiang1;~Stefano_Recanatesi1;~Matthew_Storm_Bull1", "gender": "M;M;M;", "homepage": "https://github.com/and-rewsmith;https://lpjiang97.github.io/;;", "dblp": ";;;", "google_scholar": ";B706p2YAAAAJ;;79lMvCMRCK8C", "orcid": ";;0000-0002-3576-9261;", "linkedin": "and-rewsmith/;;;", "or_profile": "~Andrew_Lawrence_Smith1;~Linxing_Preston_Jiang1;~Stefano_Recanatesi1;~Matthew_Storm_Bull1", "aff": ";Department of Computer Science, University of Washington;University of Washington;Allen Institute + University of Washington", "aff_domain": ";cs.washington.edu;uw.edu;alleninstitute.org", "position": ";PhD student;Postdoc;Postdoc", "bibtex": "@misc{\nsmith2024emergence,\ntitle={Emergence of Surprise and Predictive Signals from Local Contrastive Learning},\nauthor={Andrew Lawrence Smith and Linxing Preston Jiang and Stefano Recanatesi and Matthew Storm Bull},\nyear={2024},\nurl={https://openreview.net/forum?id=6bAfAcuuZD}\n}", "github": "", "project": "", "reviewers": "3133;mvwV;D7tC;fE2s", "site": "https://openreview.net/forum?id=6bAfAcuuZD", "pdf_size": 707504, "rating": "3;3;8;8", "confidence": "4;4;4;4", "soundness": "2;1;3;3", "contribution": "2;2;3;3", "presentation": "2;1;4;3", "wc_summary": "153;163;63;219", "wc_strengths": "34;60;178;105", "wc_weaknesses": "736;286;102;36", "wc_questions": "331;85;180;36", "wc_review": "1254;594;523;396", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "692;305;452;148", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 2.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 149.5, 55.91734972260399 ], "wc_strengths_avg": [ 94.25, 54.61856369404087 ], "wc_weaknesses_avg": [ 290.0, 273.3093485411723 ], "wc_questions_avg": [ 158.0, 112.50111110562419 ], "wc_review_avg": [ 691.75, 332.2742654795884 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 399.25, 200.30898007827807 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bpC6jt93z8QJ:scholar.google.com/&scioq=Emergence+of+Surprise+and+Predictive+Signals+from+Local+Contrastive+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1+0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "0;", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "United States" }, { "title": "Unmasking and Improving Data Credibility: A Study with Datasets for Training Harmless Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19388", "id": "6bcAD6g688", "author_site": "Zhaowei Zhu, Jialu Wang, Hao Cheng, Yang Liu", "tldr": "", "abstract": "Language models have shown promise in various tasks but can be affected by undesired data during training, fine-tuning, or alignment. For example, if some unsafe conversations are wrongly annotated as safe ones, the model fine-tuned on these samples may be harmful. Therefore, the correctness of annotations, i.e., the credibility of the dataset, is important. This study focuses on the credibility of real-world datasets, including the popular benchmarks Jigsaw Civil Comments, Anthropic Harmless & Red Team, PKU BeaverTails & SafeRLHF, that can be used for training a harmless language model. Given the cost and difficulty of cleaning these datasets by humans, we introduce a systematic framework for evaluating the credibility of datasets, identifying label errors, and evaluating the influence of noisy labels in the curated language data, specifically focusing on unsafe comments and conversation classification. With the framework, we find and fix an average of **6.16\\%** label errors in **11** datasets constructed from the above benchmarks. The data credibility and downstream learning performance can be remarkably improved by directly fixing label errors, indicating the significance of cleaning existing real-world datasets. Code is available at [https://github.com/Docta-ai/docta](https://github.com/Docta-ai/docta).", "keywords": "Label errors;dataset cleaning;AI safety;toxicity;harmless;language models", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/e944516261150a416fc2926d2276766a0a3dc06c.zip", "author": "Zhaowei Zhu;Jialu Wang;Hao Cheng;Yang Liu", "authorids": "~Zhaowei_Zhu1;~Jialu_Wang1;~Hao_Cheng5;~Yang_Liu3", "gender": "M;;M;M", "homepage": "https://www.zzw.ai;https://people.ucsc.edu/~jwang470/;https://haochenglouis.github.io;http://www.yliuu.com", "dblp": "202/1712;195/2701;;51/3710-18", "google_scholar": "YS8pSQoAAAAJ;HOtDeN0AAAAJ;ftlVqVIAAAAJ;jKrIVCIAAAAJ", "orcid": "0000-0003-3894-5862;;0000-0001-8864-7818;0000-0001-8420-6011", "linkedin": ";;;", "or_profile": "~Zhaowei_Zhu1;~Jialu_Wang1;~Hao_Cheng5;~Yang_Liu3", "aff": "Docta.ai;University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz", "aff_domain": "docta.ai;ucsc.edu;ucsc.edu;ucsc.edu", "position": "Researcher;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhu2024unmasking,\ntitle={Unmasking and Improving Data Credibility: A Study with Datasets for Training Harmless Language Models},\nauthor={Zhaowei Zhu and Jialu Wang and Hao Cheng and Yang Liu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6bcAD6g688}\n}", "github": "", "project": "", "reviewers": "rcNR;LDAu;RBmm;c2zo", "pdf_size": 417690, "rating": "5;6;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "1;3;3;3", "wc_summary": "69;133;99;54", "wc_strengths": "30;64;8;74", "wc_weaknesses": "166;144;71;298", "wc_questions": "35;24;90;28", "wc_review": "300;365;268;454", "wc_reply_reviewers": "0;27;0;47", "wc_reply_authors": "1149;633;446;1342", "reply_reviewers": "0;1;0;1", "reply_authors": "3;3;2;4", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 88.75, 30.252066045148055 ], "wc_strengths_avg": [ 44.0, 26.419689627245813 ], "wc_weaknesses_avg": [ 169.75, 81.97065023531289 ], "wc_questions_avg": [ 44.25, 26.705570579937064 ], "wc_review_avg": [ 346.75, 71.1033578672625 ], "wc_reply_reviewers_avg": [ 18.5, 19.80530232033836 ], "wc_reply_authors_avg": [ 892.5, 365.56292208045386 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14596580015436199352&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6bcAD6g688", "pdf": "https://openreview.net/pdf?id=6bcAD6g688", "email": "docta.ai;ucsc.edu;ucsc.edu;ucsc.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Docta.ai;University of California, Santa Cruz", "aff_unique_dep": ";", "aff_unique_url": "https://docta.ai;https://www.ucsc.edu", "aff_unique_abbr": "Docta.ai;UCSC", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "6c4gv0E9sF", "title": "SpikeBERT: A Language Spikformer Learned from BERT with Knowledge Distillation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Spiking neural networks (SNNs) offer a promising avenue to implement deep neural networks in a more energy-efficient way.\nHowever, the network architectures of existing SNNs for language tasks are still simplistic and relatively shallow, and deep architectures have not been fully explored, resulting in a significant performance gap compared to mainstream transformer-based networks such as BERT.\nTo this end, we improve a recently-proposed spiking Transformer (i.e., Spikformer) to make it possible to process language tasks and propose a two-stage knowledge distillation method for training it, which combines pre-training by distilling knowledge from BERT with a large collection of unlabelled texts and fine-tuning with task-specific instances via knowledge distillation again from the BERT fine-tuned on the same training examples.\nThrough extensive experimentation, we show that the models trained with our method, named SpikeBERT, outperform state-of-the-art SNNs and even achieve comparable results to BERTs on text classification tasks for both English and Chinese with much less energy consumption.", "keywords": "Spiking neural networks;Natural language processing;Knowledge distillation", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/c90775d34aead3652b58ce36b83217f4b378a525.zip", "author": "Changze Lv;Tianlong Li;Jianhan Xu;Chenxi Gu;Zixuan Ling;Cenyuan Zhang;Xiaoqing Zheng;Xuanjing Huang", "authorids": "~Changze_Lv1;~Tianlong_Li4;~Jianhan_Xu1;~Chenxi_Gu2;~Zixuan_Ling1;~Cenyuan_Zhang1;~Xiaoqing_Zheng2;~Xuanjing_Huang1", "gender": "M;M;M;;M;;;F", "homepage": "https://lvchangze.github.io;https://github.com/Tengyuantuohai-113;;;https://github.com/narcissusLZX;;;https://xuanjing-huang.github.io/", "dblp": "350/4445;;278/1558.html;;;293/9880;;05/6735-1", "google_scholar": "t3-viUwAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;G_p-oocAAAAJ;;;ghu4BZcAAAAJ;;RGsMgZA4H78C", "orcid": ";;;;;;;0000-0001-9197-9426", "linkedin": ";;;;;;;", "or_profile": "~Changze_Lv1;~Tianlong_Li4;~Jianhan_Xu1;~Chenxi_Gu2;~Zixuan_Ling1;~Cenyuan_Zhang1;~Xiaoqing_Zheng2;~Xuanjing_Huang1", "aff": "Fudan University;Fudan University;Xiaohongshu;;Fudan University;Fudan University;;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;xiaohongshu.com;;fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn", "position": "PhD student;MS student;Researcher;;MS student;MS student;;Full Professor", "bibtex": "@misc{\nanonymous2024spikebert,\ntitle={Spike{BERT}: A Language Spikformer Learned from {BERT} with Knowledge Distillation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=6c4gv0E9sF}\n}", "github": "", "project": "", "reviewers": "sLn7;17Yy;is3e", "site": "https://openreview.net/forum?id=6c4gv0E9sF", "pdf_size": 916951, "rating": "3;8;8", "confidence": "4;5;4", "soundness": "2;3;3", "contribution": "1;3;4", "presentation": "2;3;3", "wc_summary": "43;66;117", "wc_strengths": "45;58;102", "wc_weaknesses": "475;50;67", "wc_questions": "438;32;84", "wc_review": "1001;206;370", "wc_reply_reviewers": "115;23;0", "wc_reply_authors": "3499;487;958", "reply_reviewers": "2;1;0", "reply_authors": "7;2;2", "rating_avg": [ 6.333333333333333, 2.357022603955158 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 75.33333333333333, 30.922843048824316 ], "wc_strengths_avg": [ 68.33333333333333, 24.390344173235622 ], "wc_weaknesses_avg": [ 197.33333333333334, 196.46260599807675 ], "wc_questions_avg": [ 184.66666666666666, 180.38723778459374 ], "wc_review_avg": [ 525.6666666666666, 342.7149771392484 ], "wc_reply_reviewers_avg": [ 46.0, 49.68567868779359 ], "wc_reply_authors_avg": [ 1648.0, 1322.903624607628 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.6666666666666665, 2.3570226039551585 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9584516174752539551&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Fudan University;Xiaohongshu", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.xiaohongshu.com", "aff_unique_abbr": "Fudan;XHS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6cDEcJsE1Y", "title": "Certainty In, Certainty Out: REVQCs for Quantum Machine Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The field of Quantum Machine Learning (QML) has emerged recently in the hopes of finding new machine learning protocols or exponential speedups for classical ones. Apart from problems with vanishing gradients and efficient encoding methods, these speedups are hard to find because the sampling nature of quantum computers promotes either simulating computations classically or running them many times on quantum computers in order to use approximate expectation values in gradient calculations. In this paper, we make a case for setting high single-sample accuracy as a primary goal. We discuss the statistical theory which enables highly accurate and precise sample inference, and propose a method of reversed training towards this end. We show the effectiveness of this training method by assessing several effective variational quantum circuits (VQCs), trained in both the standard and reversed directions, on random binary subsets of the MNIST and MNIST Fashion datasets, on which our method provides an increase of $10-15\\\\%$ in single-sample inference accuracy.", "keywords": "Quantum machine learning;variational quantum circuits;receptive field;alleatoric uncertainty;epistemic uncertainty", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Hannah D Helgesen;Michael Felsberg;Jan-\u00c5ke Larsson", "authorids": "~Hannah_D_Helgesen1;~Michael_Felsberg2;~Jan-\u00c5ke_Larsson1", "gender": "M;;M", "homepage": "https://liu.se/medarbetare/hanhe49;https://liu.se/en/employee/micfe03;https://liu.se/en/employee/janla64", "dblp": ";00/78;95/10166", "google_scholar": ";https://scholar.google.se/citations?hl=en;https://scholar.google.se/citations?user=lVhwET4AAAAJ", "orcid": ";0000-0002-6096-3648;0000-0002-1082-8325", "linkedin": "hannah-helgesen;https://linkedin.com/in/michael-felsberg-668a202;", "or_profile": "~Hannah_D_Helgesen1;~Michael_Felsberg2;~Jan-\u00c5ke_Larsson1", "aff": "Link\u00f6ping University;Link\u00f6ping University;Link\u00f6ping University", "aff_domain": "liu.se;liu.se;liu.se", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nhelgesen2024certainty,\ntitle={Certainty In, Certainty Out: {REVQC}s for Quantum Machine Learning},\nauthor={Hannah D Helgesen and Michael Felsberg and Jan-{\\r{A}}ke Larsson},\nyear={2024},\nurl={https://openreview.net/forum?id=6cDEcJsE1Y}\n}", "github": "", "project": "", "reviewers": "eDnW;Udj4;2wYu", "site": "https://openreview.net/forum?id=6cDEcJsE1Y", "pdf_size": 1110376, "rating": "3;3;3", "confidence": "5;4;5", "soundness": "1;1;2", "contribution": "2;1;2", "presentation": "1;2;3", "wc_summary": "50;77;35", "wc_strengths": "32;23;29", "wc_weaknesses": "208;260;353", "wc_questions": "63;18;34", "wc_review": "353;378;451", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 1.3333333333333333, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 54.0, 17.378147196982766 ], "wc_strengths_avg": [ 28.0, 3.7416573867739413 ], "wc_weaknesses_avg": [ 273.6666666666667, 59.979626170521904 ], "wc_questions_avg": [ 38.333333333333336, 18.624953392931992 ], "wc_review_avg": [ 394.0, 41.57723736212721 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PbhUiwKDKskJ:scholar.google.com/&scioq=Certainty+In,+Certainty+Out:+REVQCs+for+Quantum+Machine+Learning&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Link\u00f6ping University", "aff_unique_dep": "", "aff_unique_url": "https://www.liu.se", "aff_unique_abbr": "LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Sweden" }, { "title": "Local Search GFlowNets", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19387", "id": "6cFcw1Rxww", "author_site": "Minsu Kim, Yun Taeyoung, Emmanuel Bengio, Dinghuai Zhang, Yoshua Bengio, Sungsoo Ahn, Jinkyoo Park", "tldr": "", "abstract": "Generative Flow Networks (GFlowNets) are amortized sampling methods that learn a distribution over discrete objects proportional to their rewards. GFlowNets exhibit a remarkable ability to generate diverse samples, yet occasionally struggle to consistently produce samples with high rewards due to over-exploration on wide sample space. \nThis paper proposes to train GFlowNets with local search, which focuses on exploiting high-rewarded sample space to resolve this issue. Our main idea is to explore the local neighborhood via backtracking and reconstruction guided by backward and forward policies, respectively. This allows biasing the samples toward high-reward solutions, which is not possible for a typical GFlowNet solution generation scheme, which uses the forward policy to generate the solution from scratch. Extensive experiments demonstrate a remarkable performance improvement in several biochemical tasks. Source code is available: \\url{https://github.com/dbsxodud-11/ls_gfn}.", "keywords": "GFlowNet;molecule optimization;biological sequence design;local search;reinforcement learning", "primary_area": "generative models", "supplementary_material": "", "author": "Minsu Kim;Taeyoung Yun;Emmanuel Bengio;Dinghuai Zhang;Yoshua Bengio;Sungsoo Ahn;Jinkyoo Park", "authorids": "~Minsu_Kim2;~Taeyoung_Yun1;~Emmanuel_Bengio1;~Dinghuai_Zhang1;~Yoshua_Bengio1;~Sungsoo_Ahn1;~Jinkyoo_Park1", "gender": "M;M;M;;M;M;M", "homepage": "https://minsuukim.github.io/;https://dbsxodud-11.github.io;http://folinoid.com;;http://yoshuabengio.org;https://sungsooahn.super.site/;http://silab.kaist.ac.kr/", "dblp": ";358/5797.html;137/8040;;56/953;90/5164;156/7535", "google_scholar": "https://scholar.google.ca/citations?user=VvyLuhAAAAAJ;_51PhLQAAAAJ;https://scholar.google.ca/citations?user=yVtSOt8AAAAJ;;kukA0LcAAAAJ;XTenHs0AAAAJ;sH2a0nkAAAAJ", "orcid": ";0009-0001-4602-6367;;;;;0000-0003-2620-1479", "linkedin": ";;;;yoshuabengio/?originalSubdomain=ca;;", "or_profile": "~Minsu_Kim2;~Taeyoung_Yun1;~Emmanuel_Bengio1;~Dinghuai_Zhang1;~Yoshua_Bengio1;~Sungsoo_Ahn1;~Jinkyoo_Park1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Valence Labs powered by recursion;;University of Montreal;Pohang University of Science and Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;valencelabs.com;;umontreal.ca;postech.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;Researcher;;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2024local,\ntitle={Local Search {GF}lowNets},\nauthor={Minsu Kim and Taeyoung Yun and Emmanuel Bengio and Dinghuai Zhang and Yoshua Bengio and Sungsoo Ahn and Jinkyoo Park},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6cFcw1Rxww}\n}", "github": "", "project": "", "reviewers": "xEGb;wUkH;4fxw", "pdf_size": 3084550, "rating": "6;6;8", "confidence": "4;3;2", "soundness": "3;3;3", "contribution": "2;3;3", "presentation": "3;4;3", "wc_summary": "46;99;75", "wc_strengths": "36;90;26", "wc_weaknesses": "42;201;8", "wc_questions": "61;151;22", "wc_review": "185;541;131", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "570;1708;360", "reply_reviewers": "0;0;0", "reply_authors": "3;5;3", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 73.33333333333333, 21.66923061752668 ], "wc_strengths_avg": [ 50.666666666666664, 28.110891523077356 ], "wc_weaknesses_avg": [ 83.66666666666667, 84.12028424953296 ], "wc_questions_avg": [ 78.0, 54.01851534427802 ], "wc_review_avg": [ 285.6666666666667, 181.88885495146633 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 879.3333333333334, 592.194412521954 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=307485571580394019&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=6cFcw1Rxww", "pdf": "https://openreview.net/pdf?id=6cFcw1Rxww", "email": "kaist.ac.kr;kaist.ac.kr;valencelabs.com;;umontreal.ca;postech.ac.kr;kaist.ac.kr", "author_num": 7, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Valence Labs;University of Montreal;Pohang University of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.kaist.ac.kr;;https://wwwumontreal.ca;https://www.postech.ac.kr", "aff_unique_abbr": "KAIST;;UM;POSTECH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;2;0;0", "aff_country_unique": "South Korea;;Canada" }, { "id": "6cGiRiExUd", "title": "Efficient Point Cloud Matching for 3D Geometric Shape Assembly", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning to assemble geometric shapes into a larger target structure is a fundamental task with various high-level visual applications. In this work, we frame this problem as geometric registration with extremely low overlap. Our goal is to establish accurate correspondences on the mating surface of the shape fragments to predict their relative rigid transformations for assembly. To this end, we introduce Proxy Match Transform (PMT), an approximate high-order feature transform layer that enables reliable correspondences between dense point clouds of shape fragments, while incurring low costs in memory and compute. In our experiments, we demonstrate that Proxy Match Transform surpasses existing state-of-the-art baselines on a popular geometric shape assembly dataset, while exhibiting higher efficiency than other high-order feature transform methods.", "keywords": "Geometric shape assembly;High-dimensional feature transform;Correlation aggregation;Proxy Match Transform", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Nahyuk Lee;Juhong Min;Junha Lee;Seungwook Kim;Kanghee Lee;Jaesik Park;Minsu Cho", "authorids": "~Nahyuk_Lee1;~Juhong_Min1;~Junha_Lee2;~Seungwook_Kim2;~Kanghee_Lee1;~Jaesik_Park3;~Minsu_Cho1", "gender": "M;;M;;;M;M", "homepage": "https://nahyuklee.github.io/;;https://junha-l.github.io;;;http://jaesik.info;http://cvlab.postech.ac.kr/~mcho/", "dblp": "383/1188;;53/11266;;;00/10336;", "google_scholar": "QQamvI0AAAAJ;;RB7qMm4AAAAJ;;;_3q6KBIAAAAJ;5TyoF5QAAAAJ", "orcid": ";;;;;;", "linkedin": "nahyuk-lee/;;https://linkedin.com/in/junha-l/;;;;minsu-cho-062b3750/", "or_profile": "~Nahyuk_Lee1;~Juhong_Min1;~Junha_Lee2;~Seungwook_Kim2;~Kanghee_Lee1;~Jaesik_Park3;~Minsu_Cho1", "aff": "Pohang University of Science and Technology;;Pohang University of Science and Technology;;;Seoul National University;POSTECH", "aff_domain": "postech.ac.kr;;postech.ac.kr;;;snu.ac.kr;postech.ac.kr", "position": "MS student;;PhD student;;;Assistant Professor;Associate Professor", "bibtex": "@misc{\nlee2024efficient,\ntitle={Efficient Point Cloud Matching for 3D Geometric Shape Assembly},\nauthor={Nahyuk Lee and Juhong Min and Junha Lee and Seungwook Kim and Kanghee Lee and Jaesik Park and Minsu Cho},\nyear={2024},\nurl={https://openreview.net/forum?id=6cGiRiExUd}\n}", "github": "", "project": "", "reviewers": "jjsW;jVVT;NyfY;oLtM", "site": "https://openreview.net/forum?id=6cGiRiExUd", "pdf_size": 13768238, "rating": "5;5;5;8", "confidence": "4;4;2;2", "soundness": "3;3;3;3", "contribution": "2;3;3;4", "presentation": "1;2;3;3", "wc_summary": "49;97;53;80", "wc_strengths": "108;50;28;63", "wc_weaknesses": "271;145;55;3", "wc_questions": "48;107;3;42", "wc_review": "476;399;139;188", "wc_reply_reviewers": "151;94;0;242", "wc_reply_authors": "2751;1908;423;927", "reply_reviewers": "1;1;0;1", "reply_authors": "7;5;2;4", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 69.75, 19.74050404624968 ], "wc_strengths_avg": [ 62.25, 29.22648627529488 ], "wc_weaknesses_avg": [ 118.5, 101.65013526798673 ], "wc_questions_avg": [ 50.0, 37.1685350800916 ], "wc_review_avg": [ 300.5, 140.74888987128816 ], "wc_reply_reviewers_avg": [ 121.75, 87.90442252810719 ], "wc_reply_authors_avg": [ 1502.25, 897.1765085533615 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.5, 1.8027756377319946 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IubrjESbtLQJ:scholar.google.com/&scioq=Efficient+Point+Cloud+Matching+for+3D+Geometric+Shape+Assembly&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Pohang University of Science and Technology;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.postech.ac.kr;https://www.snu.ac.kr", "aff_unique_abbr": "POSTECH;SNU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "6cMmSnOpCs", "title": "ScaLearn: Simple and Highly Parameter-Efficient Task Transfer by Learning to Scale", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multi-task learning (MTL) has shown considerable practical benefits, particularly when using pre-trained language models (PLMs). While this is commonly achieved by simultaneously learning n tasks under a joint optimization procedure, recent methods such as AdapterFusion structure the problem into two distinct stages: (i) task learning, where knowledge specific to a task is encapsulated within sets of parameters (e.g., adapters), and (ii) transfer, where this already learned knowledge is leveraged for a target task. This separation of concerns provides numerous benefits, such as promoting reusability, and addressing cases involving data privacy and societal concerns; on the flip side, current two-stage MTL methods come with the cost of introducing a substantial number of additional parameters. In this work, we address this issue by leveraging the usefulness of linearly scaling the output representations of source adapters for transfer learning. We introduce ScaLearn, a simple and highly parameter-efficient two-stage MTL method that capitalizes on the knowledge of the source tasks by learning a minimal set of scaling parameters that enable effective knowledge transfer to a target task. Our experiments on three benchmarks (GLUE, SuperGLUE, and HumSet) show that our ScaLearn, in addition to facilitating the benefits of two-stage MTL, consistently outperforms strong baselines with only a small number of transfer parameters \u2013 roughly 0.35% of those of AdapterFusion. Remarkably, we observe that ScaLearn maintains its strong abilities even when further reducing parameters through uniform scaling and layer-sharing, achieving similarly competitive results with only 8 transfer parameters for each target task. Our proposed approach thus demonstrates the power of simple scaling as a promise for more efficient task transfer.", "keywords": "natural language processing;multi-task learning;transfer learning;adapters;efficient learning;peft methods", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Markus Frohmann;Carolin Holtermann;Shahed Masoudian;Anne Lauscher;Navid Rekabsaz", "authorids": "~Markus_Frohmann1;~Carolin_Holtermann1;~Shahed_Masoudian1;~Anne_Lauscher1;~Navid_Rekabsaz2", "gender": "M;F;M;;M", "homepage": ";https://www.carolin-holtermann.de/;https://github.com/ShawMask;;https://navid-rekabsaz.github.io", "dblp": "358/4606;318/1237;;209/6857;150/5089", "google_scholar": "8PxZyK8AAAAJ;x1vg4BgAAAAJ;hyWDk2wAAAAJ;https://scholar.google.it/citations?user=IbJS3UEAAAAJ;lZjyLyEAAAAJ", "orcid": ";0000-0003-0449-1348;0009-0007-2747-0386;;0000-0001-5764-8738", "linkedin": "markus-frohmann/;carolin-holtermann;shahed-masoudian;;", "or_profile": "~Markus_Frohmann1;~Carolin_Holtermann1;~Shahed_Masoudian1;~Anne_Lauscher1;~Navid_Rekabsaz2", "aff": "Deezer;Universit\u00e4t Hamburg;Johannes Kepler Universit\u00e4t Linz;Universit\u00e4t Hamburg;Thomson Reuters", "aff_domain": "research.deezer.com;uni-hamburg.de;jku.at;uni-hamburg.de;thomsonreuters.com", "position": "Intern;PhD student;PhD student;Associate Professor;Lead AI Scientist", "bibtex": "@misc{\nfrohmann2024scalearn,\ntitle={ScaLearn: Simple and Highly Parameter-Efficient Task Transfer by Learning to Scale},\nauthor={Markus Frohmann and Carolin Holtermann and Shahed Masoudian and Anne Lauscher and Navid Rekabsaz},\nyear={2024},\nurl={https://openreview.net/forum?id=6cMmSnOpCs}\n}", "github": "", "project": "", "reviewers": "6MN5;YAiw;tpyz;Exzr", "site": "https://openreview.net/forum?id=6cMmSnOpCs", "pdf_size": 1061918, "rating": "5;5;5;8", "confidence": "4;4;3;4", "soundness": "3;2;2;4", "contribution": "2;2;2;2", "presentation": "3;2;3;3", "wc_summary": "65;111;91;53", "wc_strengths": "43;53;156;74", "wc_weaknesses": "97;55;203;107", "wc_questions": "35;450;8;139", "wc_review": "240;669;458;373", "wc_reply_reviewers": "0;29;62;0", "wc_reply_authors": "727;1562;1594;370", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;3;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.0, 22.561028345356956 ], "wc_strengths_avg": [ 81.5, 44.44378471732578 ], "wc_weaknesses_avg": [ 115.5, 54.1548705104167 ], "wc_questions_avg": [ 158.0, 175.5377452287684 ], "wc_review_avg": [ 435.0, 155.8476820488518 ], "wc_reply_reviewers_avg": [ 22.75, 25.567313116555678 ], "wc_reply_authors_avg": [ 1063.25, 530.1195030368153 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9368531401909249420&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Deezer;University of Hamburg;Johannes Kepler University Linz;Thomson Reuters", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.deezer.com;https://www.uni-hamburg.de;https://www.jku.at;https://www.thomsonreuters.com", "aff_unique_abbr": "Deezer;UHH;JKU;TR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Linz", "aff_country_unique_index": "0;1;2;1;3", "aff_country_unique": "France;Germany;Austria;United States" }, { "id": "6cV6q8RIw2", "title": "Homotopy Relaxation Training Algorithms for Infinite-Width Two-Layer ReLU Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this paper, we present a novel training approach called the Homotopy Relaxation Training Algorithm (HRTA), aimed at accelerating the training process in contrast to traditional methods. Our algorithm incorporates two key mechanisms: one involves building a homotopy activation function that seamlessly connects the linear activation function with the ReLU activation function; the other technique entails relaxing the homotopy parameter to enhance the training refinement process. We have conducted an in-depth analysis of this novel method within the context of the neural tangent kernel (NTK), revealing significantly improved convergence rates. Our experimental results, especially when considering networks with larger widths, validate the theoretical conclusions. This proposed homotopy algorithm exhibits the potential for other activation functions and deep neural networks.", "keywords": "homotopy;relaxation;ReLU Neural Networks;infinite-width limit", "primary_area": "optimization", "supplementary_material": "/attachment/7ce8ee9f34896c221d0a3c907d0206cf8fbb1d0c.pdf", "author": "Yahong Yang;Qipin Chen;Wenrui Hao", "authorids": "~Yahong_Yang1;~Qipin_Chen1;~Wenrui_Hao1", "gender": "M;M;", "homepage": ";;https://sites.psu.edu/whao/", "dblp": ";255/5039;", "google_scholar": ";hRZ99NUAAAAJ;7x6CVcAAAAAJ", "orcid": "0000-0002-9721-2362;;", "linkedin": ";qipin-chen-395a97159/;", "or_profile": "~Yahong_Yang1;~Qipin_Chen1;~Wenrui_Hao1", "aff": "Pennsylvania State University;;Pennsylvania State University", "aff_domain": "psu.edu;;psu.edu", "position": "Postdoc;;Associate Professor", "bibtex": "@misc{\nyang2024homotopy,\ntitle={Homotopy Relaxation Training Algorithms for Infinite-Width Two-Layer Re{LU} Neural Networks},\nauthor={Yahong Yang and Qipin Chen and Wenrui Hao},\nyear={2024},\nurl={https://openreview.net/forum?id=6cV6q8RIw2}\n}", "github": "", "project": "", "reviewers": "wUUw;4y1a;kAtf", "site": "https://openreview.net/forum?id=6cV6q8RIw2", "pdf_size": 716844, "rating": "3;3;3", "confidence": "3;4;3", "soundness": "2;2;2", "contribution": "1;2;2", "presentation": "1;3;2", "wc_summary": "54;38;57", "wc_strengths": "22;7;45", "wc_weaknesses": "58;142;147", "wc_questions": "280;3;69", "wc_review": "414;190;318", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 49.666666666666664, 8.339997335464536 ], "wc_strengths_avg": [ 24.666666666666668, 15.627610892974724 ], "wc_weaknesses_avg": [ 115.66666666666667, 40.827550610940264 ], "wc_questions_avg": [ 117.33333333333333, 118.13645594067152 ], "wc_review_avg": [ 307.3333333333333, 91.75813618178802 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18198803661618719588&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "6fFd8QaPVx", "title": "OneBNet: Binarized Neural Networks using Decomposed 1-D Binarized Convolutions on Edge Device", "track": "main", "status": "Reject", "tldr": "", "abstract": "Nowadays, it is natural to use 2-D convolutions in convolutional neural networks (CNNs) for computer vision, but this paper shows that 1-D binarized convolutions can achieve excellent performance on CPU-based edge devices. This paper proposes a new structure called OneBNet to maximize the effects of 1-D binarized convolutions. The proposed 1-D downsampling can perform information compression gradually through two 1-D convolutions,\nwhich can contribute tremendously to the performance improvement in binarized convolutional neural networks (BCNNs). Compared with 2-D binarized convolutions, a $n \\times n$ 2-D binarized convolution is replaced by $n \\times 1$ row-wise and $1 \\times n$ column-wise 1-D binarized convolutions, thus doubling the effects of adjusting the activation distribution and non-linear activation function. In the decomposed 1-D binarized convolution, although computational costs are reduced, the number of element-wise activation functions and learnable bias layers can be doubled, which can be a significant burden. Therefore, we expect that the 1-D binarized convolution is not suitable for all layers, and we present the reason and experimental results proving it. Based on the above assumption and experimental results, we can provide more optimized structure in terms of performance and costs. With ResNet as a backbone, we evaluate the proposed model on several conventional image datasets. In experiments, the proposed model based on ResNet18 achieves 93.4\\% and 93.6\\% Top-1 accuracy on the FashionMNIST and CIFAR10 datasets. In the case of training from scratch, the proposed OneBNet based on ResNet18 can produce 63.9\\% Top-1 accuracy, showing better performance over the state-of-the-art (SOTA) binarized CNNs based on ResNet18. When applying the teacher-student training, 68.4\\% Top-1 accuracy can be obtained, which overwhelms the existing SOTA BCNNs. With 5\\% additional delay on a single thread of Raspberry Pi, the proposed lightweight model achieves 67.3\\% Top-1 accuracy on the ImageNet dataset, outperforming the baseline by 1.8\\%.", "keywords": "Binarized Neural Networks;Computer Vision;Inference;1-D convolution", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/1a51fbffff47a488a9a0a870b4f9c61e58c2a9b2.zip", "author": "YOUNGWOOK KWON;Minkee Chang;Daeryong Shin;WANSOO KIM;HyunJin Kim", "authorids": "~YOUNGWOOK_KWON2;~Minkee_Chang1;~Daeryong_Shin1;~WANSOO_KIM2;~HyunJin_Kim1", "gender": "M;M;M;M;M", "homepage": "https://www.empaslab.com/;https://www.empaslab.com/home;;https://www.dankook.ac.kr/web/kor;https://www.empaslab.com", "dblp": ";;;;97/8639-1", "google_scholar": ";;;;https://scholar.google.co.kr/citations?hl=ko", "orcid": ";;0009-0002-0027-9059;;0000-0001-5017-3995", "linkedin": ";;;;", "or_profile": "~YOUNGWOOK_KWON2;~Minkee_Chang1;~Daeryong_Shin1;~WANSOO_KIM2;~HyunJin_Kim1", "aff": "Dankook University;Dankook University;Dankook University;Dankook University;Dankook University", "aff_domain": "dankook.ac.kr;dankook.ac.kr;dankook.ac.kr;dankook.ac.kr;dankook.ac.kr", "position": "MS student;Undergrad student;Undergrad student;Undergrad student;Full Professor", "bibtex": "@misc{\nkwon2024onebnet,\ntitle={One{BN}et: Binarized Neural Networks using Decomposed 1-D Binarized Convolutions on Edge Device},\nauthor={YOUNGWOOK KWON and Minkee Chang and Daeryong Shin and WANSOO KIM and HyunJin Kim},\nyear={2024},\nurl={https://openreview.net/forum?id=6fFd8QaPVx}\n}", "github": "", "project": "", "reviewers": "HiaQ;w6Pw;38Cc;6xWT", "site": "https://openreview.net/forum?id=6fFd8QaPVx", "pdf_size": 4980376, "rating": "3;3;3;5", "confidence": "4;4;3;4", "soundness": "2;1;2;3", "contribution": "2;1;2;3", "presentation": "2;3;1;3", "wc_summary": "93;54;38;53", "wc_strengths": "51;45;25;50", "wc_weaknesses": "114;289;28;85", "wc_questions": "3;36;154;4", "wc_review": "261;424;245;192", "wc_reply_reviewers": "86;197;0;29", "wc_reply_authors": "468;1074;758;554", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 59.5, 20.35313243704762 ], "wc_strengths_avg": [ 42.75, 10.497023387608508 ], "wc_weaknesses_avg": [ 129.0, 97.4191972867771 ], "wc_questions_avg": [ 49.25, 61.91677882448343 ], "wc_review_avg": [ 280.5, 86.69630903331468 ], "wc_reply_reviewers_avg": [ 78.0, 75.3491871223572 ], "wc_reply_authors_avg": [ 713.5, 233.265406779488 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QDzLibUQzQYJ:scholar.google.com/&scioq=OneBNet:+Binarized+Neural+Networks+using+Decomposed+1-D+Binarized+Convolutions+on+Edge+Device&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Dankook University", "aff_unique_dep": "", "aff_unique_url": "https://www.dankook.ac.kr", "aff_unique_abbr": "Dankook", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "6hP9JcXpNk", "title": "Going beyond familiar features for deep anomaly detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Anomaly Detection (AD) is a critical task that involves identifying observations that do not conform to a learned model of normality.\nPrior work in deep AD is predominantly based on a familiarity hypothesis, where familiar features serve as the reference in a pre-trained embedding space. While this strategy has proven highly successful, it turns out that it causes consistent false negatives when anomalies consist of truly novel features that are not well captured by the pre-trained encoding. We propose a novel approach to AD using explainability to capture novel features as unexplained observations in the input space. We achieve strong performance across a wide range of anomaly benchmarks by combining similarity and novelty in a hybrid approach. Our approach establishes a new state-of-the-art across multiple benchmarks, handling diverse anomaly types while eliminating the need for expensive background models and dense matching. In particular, we show that by taking account of novel features, we reduce false negative anomalies by up to 40% on challenging benchmarks compared to the state-of-the-art. Our method give visually inspectable explanations for pixel level anomalies.", "keywords": "Anomaly detection;familiarity hypothesis", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/a00ef70ddbb24d13ef6384bff21f203c94384ebe.pdf", "author": "Sarath Sivaprasad;Mario Fritz", "authorids": "~Sarath_Sivaprasad2;~Mario_Fritz1", "gender": "M;M", "homepage": ";https://cispa.saarland/group/fritz/", "dblp": ";", "google_scholar": "37HTgYcAAAAJ;https://scholar.google.de/citations?user=4V1nNm4AAAAJ", "orcid": ";", "linkedin": "sarathsivaprasad1729/;", "or_profile": "~Sarath_Sivaprasad2;~Mario_Fritz1", "aff": "cispa;Saarland University", "aff_domain": "cispa.de;uni-saarland.de", "position": "PhD student;Full Professor", "bibtex": "@misc{\nsivaprasad2024going,\ntitle={Going beyond familiar features for deep anomaly detection},\nauthor={Sarath Sivaprasad and Mario Fritz},\nyear={2024},\nurl={https://openreview.net/forum?id=6hP9JcXpNk}\n}", "github": "", "project": "", "reviewers": "oe8U;vwNp;Qn1L", "site": "https://openreview.net/forum?id=6hP9JcXpNk", "pdf_size": 5543769, "rating": "3;3;5", "confidence": "3;4;4", "soundness": "3;2;3", "contribution": "3;2;2", "presentation": "2;1;2", "wc_summary": "32;32;139", "wc_strengths": "48;16;32", "wc_weaknesses": "235;153;180", "wc_questions": "102;169;2", "wc_review": "417;370;353", "wc_reply_reviewers": "0;0;92", "wc_reply_authors": "726;745;388", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 67.66666666666667, 50.44028372464039 ], "wc_strengths_avg": [ 32.0, 13.063945294843617 ], "wc_weaknesses_avg": [ 189.33333333333334, 34.120700787384514 ], "wc_questions_avg": [ 91.0, 68.61972505531239 ], "wc_review_avg": [ 380.0, 27.067816067549053 ], "wc_reply_reviewers_avg": [ 30.666666666666668, 43.36921591277491 ], "wc_reply_authors_avg": [ 619.6666666666666, 163.996612431138 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4223184860631744350&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;Saarland University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cispa.de/;https://www.uni-saarland.de", "aff_unique_abbr": "CISPA;UdS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Learning to solve Class-Constrained Bin Packing Problems via Encoder-Decoder Model", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19386", "id": "6hvtSLkKeZ", "author_site": "Hanni Cheng, Ya Cong, Weihao Jiang, Shiliang Pu", "tldr": "", "abstract": "Neural methods have shown significant merit in solving combinatorial optimization (CO) problems, including the Bin Packing Problem (BPP). However, most existing ML-based approaches focus on geometric BPP like 3DBPP, neglecting complex vector BPP. In this study, we introduce a vector BPP variant called Class-Constrained Bin Packing Problem (CCBPP), dealing with items of both classes and sizes, and the objective is to pack the items in the least amount of bins respecting the bin capacity and the number of different classes that it can hold. To enhance the efficiency and practicality of solving CCBPP, we propose a learning-based Encoder-Decoder Model. The Encoder employs a Graph Convolution Network (GCN) to generate a heat-map, representing probabilities of different items packing together. The Decoder decodes and fine-tunes the solution through Cluster Decode and Active Search methods, thereby producing high-quality solutions for CCBPP instances. Extensive experiments demonstrate that our proposed method consistently yields high-quality solutions for various kinds of CCBPP with a very small gap from the optimal. Moreover, our Encoder-Decoder Model also shows promising performance on one practical application of CCBPP, the *Manufacturing Order Consolidation Problem* (OCP).", "keywords": "Combinatorial Optimization;Class-Contrained Bin Packing Problems;Graph Convolution Network;Cluster Decode", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/3599f9da74963d1b16c12cd25b5c59756ac78166.zip", "author": "Hanni Cheng;Ya Cong;Weihao Jiang;Shiliang Pu", "authorids": "~Hanni_Cheng1;~Ya_Cong1;~Weihao_Jiang2;~Shiliang_Pu1", "gender": "F;M;M;M", "homepage": ";;https://orcid.org/0000-0003-3482-8538;", "dblp": "189/5971;;262/6776.html;155/3173", "google_scholar": ";;;https://scholar.google.com.hk/citations?user=NWR_wpoAAAAJ", "orcid": "0009-0004-5319-7254;0000-0003-2432-5996;0000-0003-3482-8538;", "linkedin": ";;;", "or_profile": "~Hanni_Cheng1;~Ya_Cong1;~Weihao_Jiang2;~Shiliang_Pu1", "aff": "Hikvision Research Institute;Hikvision Research Institute;Hikvision Research Institute;", "aff_domain": "hikvision.com;hikvision.com;hikvision.com;", "position": "Researcher;Researcher;Researcher;", "bibtex": "@inproceedings{\ncheng2024learning,\ntitle={Learning to solve Class-Constrained Bin Packing Problems via Encoder-Decoder Model},\nauthor={Hanni Cheng and Ya Cong and Weihao Jiang and Shiliang Pu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6hvtSLkKeZ}\n}", "github": "", "project": "", "reviewers": "hLWM;Rj4P;HxmL;aXcu;2rkj", "pdf_size": 604652, "rating": "6;6;6;6;8", "confidence": "4;4;4;4;5", "soundness": "2;3;2;4;3", "contribution": "2;3;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "60;51;53;106;51", "wc_strengths": "34;63;32;187;28", "wc_weaknesses": "71;63;300;110;1", "wc_questions": "108;11;33;128;35", "wc_review": "273;188;418;531;115", "wc_reply_reviewers": "0;62;22;0;0", "wc_reply_authors": "1524;887;1716;958;268", "reply_reviewers": "0;1;1;0;0", "reply_authors": "3;3;4;3;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 64.2, 21.160340261914506 ], "wc_strengths_avg": [ 68.8, 60.39006540814474 ], "wc_weaknesses_avg": [ 109.0, 101.69169090933634 ], "wc_questions_avg": [ 63.0, 46.12591462507817 ], "wc_review_avg": [ 305.0, 151.43183284897532 ], "wc_reply_reviewers_avg": [ 16.8, 24.152846623120844 ], "wc_reply_authors_avg": [ 1070.6, 512.3801713571672 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.8, 0.9797958971132712 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9033160795790333827&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "openreview": "https://openreview.net/forum?id=6hvtSLkKeZ", "pdf": "https://openreview.net/pdf?id=6hvtSLkKeZ", "email": "hikvision.com;hikvision.com;hikvision.com;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hikvision Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.hikvision.com/cn/", "aff_unique_abbr": "Hikvision", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "6hzNVNSz8O", "title": "No learning rates needed: Introducing SaLSa - Stable Armijo Line Search Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent studies, line search methods have been demonstrated to significantly\nenhance the performance of conventional stochastic gradient descent techniques\nacross various datasets and architectures, while making an otherwise critical choice\nof learning rate schedule superfluous Vaswani et al. (2019); Mahsereci & Hennig\n(2015); Vaswani et al. (2021). In this paper, we identify problems of current state-of-the-art of line search methods Vaswani et al. (2019; 2021), propose enhancements,\nand rigorously assess their effectiveness. Furthermore, we evaluate these methods\non orders of magnitude larger datasets and more complex data domains than\npreviously done.\nMore specifically, we enhance the Armijo line search method by speeding up\nits computation and incorporating a momentum term into the Armijo criterion,\nmaking it better suited for stochastic mini-batching. Our optimization approach\noutperforms both the previous Armijo implementation and a tuned learning rate\nschedule for the Adam and SGD optimizers. Our evaluation covers a diverse range\nof architectures, such as Transformers, CNNs, and MLPs, as well as data domains,\nincluding NLP and image data.\nOur work is publicly available as a Python package, which provides a hyperparameter free Pytorch optimizer.", "keywords": "Optimizer;Line Search;Learning Rate;Transformer;CNN", "primary_area": "optimization", "supplementary_material": "/attachment/3e2cb2716f600ac3e583f19ee60d465c0a1a8a31.zip", "author": "Philip Kenneweg;Tristan Kenneweg;Fabian Fumagalli;Barbara Hammer", "authorids": "~Philip_Kenneweg1;~Tristan_Kenneweg1;~Fabian_Fumagalli1;~Barbara_Hammer4", "gender": "M;M;M;F", "homepage": ";;https://hammer-lab.techfak.uni-bielefeld.de/people/316634936/;https://www.techfak.uni-bielefeld.de/~bhammer/", "dblp": "306/1101;;329/4508;h/BarbaraHammer", "google_scholar": ";;anUMB08AAAAJ;1d3OxaUAAAAJ", "orcid": "0000-0002-7097-173X;0000-0001-8213-9396;0000-0003-3955-3510;0000-0002-2615-8151", "linkedin": ";;fabian-fumagalli/;", "or_profile": "~Philip_Kenneweg1;~Tristan_Kenneweg1;~Fabian_Fumagalli1;~Barbara_Hammer4", "aff": ";Universit\u00e4t Bielefeld;Universit\u00e4t Bielefeld;Universit\u00e4t Bielefeld", "aff_domain": ";uni-bielefeld.de;uni-bielefeld.de;uni-bielefeld.de", "position": ";PhD student;PhD student;Full Professor", "bibtex": "@misc{\nkenneweg2024no,\ntitle={No learning rates needed: Introducing Sa{LS}a - Stable Armijo Line Search Adaptation},\nauthor={Philip Kenneweg and Tristan Kenneweg and Fabian Fumagalli and Barbara Hammer},\nyear={2024},\nurl={https://openreview.net/forum?id=6hzNVNSz8O}\n}", "github": "", "project": "", "reviewers": "XiSh;QdFA;F4Yh;9vmP", "site": "https://openreview.net/forum?id=6hzNVNSz8O", "pdf_size": 8030403, "rating": "3;3;6;8", "confidence": "5;3;3;5", "soundness": "2;2;3;3", "contribution": "2;3;3;3", "presentation": "3;2;4;3", "wc_summary": "16;38;104;83", "wc_strengths": "27;33;39;100", "wc_weaknesses": "299;181;89;98", "wc_questions": "25;106;81;75", "wc_review": "367;358;313;356", "wc_reply_reviewers": "94;235;0;9", "wc_reply_authors": "622;875;38;302", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 60.25, 34.945493271665235 ], "wc_strengths_avg": [ 49.75, 29.32042803234632 ], "wc_weaknesses_avg": [ 166.75, 84.35749818480868 ], "wc_questions_avg": [ 71.75, 29.38856069970083 ], "wc_review_avg": [ 348.5, 20.910523666326483 ], "wc_reply_reviewers_avg": [ 84.5, 94.31463301100206 ], "wc_reply_authors_avg": [ 459.25, 316.8259577433642 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.23570226039551587, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Op7vTIATlvUJ:scholar.google.com/&scioq=No+learning+rates+needed:+Introducing+SaLSa+-+Stable+Armijo+Line+Search+Adaptation&hl=en&as_sdt=0,33", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Universit\u00e4t Bielefeld", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-bielefeld.de/", "aff_unique_abbr": "Uni Bielefeld", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "6iM2asNCjK", "title": "On Robustness-Accuracy Characterization of Large Language Models using Synthetic Datasets", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In recent years, large language models (LLMs) that were pretrained at scale on diverse data have proven to be a successful approach for solving different downstream tasks. However, new concerns about proper performance evaluation have been raised, especially for test-data leakage caused by accidentally including them during pretraining, or by indirectly exposing them through API calls for evaluation. Motivated by these, in this paper, we propose a new evaluation workflow that generates steerable synthetic language datasets and proxy tasks for benchmarking the performance of pertrained LLMs on sentence classification tasks. This approach allows for better characterization of the joint analysis on the robustness and accuracy of LLMs without risking sensitive information leakage. It also provides a more controlled and private way to evaluate LLMs that avoids overfitting specific test sets. Verified on various pretrained LLMs, the proposed approach demonstrates promising high correlation with real downstream performance.", "keywords": "language model; real-data-free", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Ching-Yun Ko;Pin-Yu Chen;Payel Das;Yung-Sung Chuang;Luca Daniel", "authorids": "~Ching-Yun_Ko1;~Pin-Yu_Chen1;~Payel_Das1;~Yung-Sung_Chuang1;~Luca_Daniel1", "gender": "F;M;F;M;", "homepage": ";http://www.pinyuchen.com;;https://people.csail.mit.edu/yungsung/;https://www.mit.edu/~dluca/", "dblp": "206/6472;39/8969;56/7926;64/3095;35/5202", "google_scholar": ";jxwlCUUAAAAJ;;3ar1DOwAAAAJ;", "orcid": ";0000-0003-1039-8369;;0000-0002-1723-5063;0000-0002-5880-3151", "linkedin": ";pin-yu-chen-940062a2;;yschuang;", "or_profile": "~Ching-Yun_Ko1;~Pin-Yu_Chen1;~Payel_Das1;~Yung-Sung_Chuang1;~Luca_Daniel1", "aff": "Massachusetts Institute of Technology;International Business Machines;IBM, International Business Machines;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;ibm.com;us.ibm.com;mit.edu;", "position": "PhD student;Principal Researcher;Principal Researcher;PhD student;", "bibtex": "@misc{\nko2024on,\ntitle={On Robustness-Accuracy Characterization of Large Language Models using Synthetic Datasets},\nauthor={Ching-Yun Ko and Pin-Yu Chen and Payel Das and Yung-Sung Chuang and Luca Daniel},\nyear={2024},\nurl={https://openreview.net/forum?id=6iM2asNCjK}\n}", "github": "", "project": "", "reviewers": "QVm9;ALMM;XF6K", "site": "https://openreview.net/forum?id=6iM2asNCjK", "pdf_size": 1124006, "rating": "3;3;5", "confidence": "2;4;3", "soundness": "2;3;4", "contribution": "2;2;2", "presentation": "1;3;3", "wc_summary": "252;66;249", "wc_strengths": "44;25;75", "wc_weaknesses": "268;133;428", "wc_questions": "2;1;160", "wc_review": "566;225;912", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 189.0, 86.98275691193054 ], "wc_strengths_avg": [ 48.0, 20.607442021431645 ], "wc_weaknesses_avg": [ 276.3333333333333, 120.57731498457282 ], "wc_questions_avg": [ 54.333333333333336, 74.71873184743494 ], "wc_review_avg": [ 567.6666666666666, 280.46905157055903 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6108874926867295991&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;International Business Machines Corporation;International Business Machines", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.ibm.com;https://www.ibm.com", "aff_unique_abbr": "MIT;IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "STanHop: Sparse Tandem Hopfield Model for Memory-Enhanced Time Series Prediction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19385", "id": "6iwg437CZs", "author_site": "Yu-Hsuan Wu, Jerry Hu, Weijian Li, Bo-Yu Chen, Han Liu", "tldr": "", "abstract": "We present **STanHop-Net** (**S**parse **Tan**dem **Hop**field **Net**work) for multivariate time series prediction with memory-enhanced capabilities. At the heart of our approach is **STanHop**, a novel Hopfield-based neural network block, which sparsely learns and stores both temporal and cross-series representations in a data-dependent fashion. In essence, STanHop sequentially learns temporal representation and cross-series representation using two tandem sparse Hopfield layers. Additionally, STanHop incorporates two external memory modules: **Plug-and-Play** and **Tune-and-Play** for train-less and task-aware memory enhancements, respectively. They allow StanHop-Net to swiftly respond to sudden events. Methodologically, we construct the STanHop-Net by stacking STanHop blocks in a hierarchical fashion, enabling multi-resolution feature extraction with resolution-specific sparsity. Theoretically, we introduce a unified construction (**Generalized Sparse Modern Hopfield Model**) for both dense and sparse modern Hopfield models and show that it endows a tighter memory retrieval error compared to the dense counterpart without sacrificing memory capacity. Empirically, we validate the efficacy of STanHop-Net on many settings: time series prediction, fast test-time adaptation, and strongly correlated time series prediction.", "keywords": "Time Series Prediction; Multivariate Time Series; Modern Hopfield Networks; Sparse Hopfield Model; Hopfield Layer; Attention Mechanism", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/f4fbc7215901e8f014e4e4ac13e571e625f93d01.pdf", "author": "Dennis Wu;Jerry Yao-Chieh Hu;Weijian Li;Bo-Yu Chen;Han Liu", "authorids": "~Dennis_Wu1;~Jerry_Yao-Chieh_Hu1;~Weijian_Li2;~Bo-Yu_Chen1;~Han_Liu4", "gender": ";;M;M;", "homepage": ";;;https://phys-mattchen.github.io/;", "dblp": ";;;;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;", "orcid": ";;0009-0003-4158-4380;0000-0003-4997-1652;", "linkedin": ";;weijian-li-b52566153/;;", "or_profile": "~Dennis_Wu1;~Jerry_Yao-Chieh_Hu1;~Weijian_Li2;~Bo-Yu_Chen1;~Han_Liu4", "aff": ";;Northwestern University;University of Chicago;Northwestern University", "aff_domain": ";;northwestern.edu;uchicago.edu;u.northwestern.edu", "position": ";;PhD student;Exchange student;Associate Professor", "bibtex": "@inproceedings{\nwu2024stanhop,\ntitle={{ST}anHop: Sparse Tandem Hopfield Model for Memory-Enhanced Time Series Prediction},\nauthor={Dennis Wu and Jerry Yao-Chieh Hu and Weijian Li and Bo-Yu Chen and Han Liu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6iwg437CZs}\n}", "github": "", "project": "", "reviewers": "xuHy;ntR2;u61L;BpQa", "pdf_size": 5898172, "rating": "5;5;8;8", "confidence": "3;2;4;4", "soundness": "3;2;3;3", "contribution": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "94;53;74;79", "wc_strengths": "53;23;33;33", "wc_weaknesses": "176;32;65;27", "wc_questions": "22;37;348;338", "wc_review": "345;145;520;477", "wc_reply_reviewers": "55;0;26;46", "wc_reply_authors": "2836;1403;2667;1299", "reply_reviewers": "1;0;1;1", "reply_authors": "6;4;7;4", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 75.0, 14.679918255903198 ], "wc_strengths_avg": [ 35.5, 10.897247358851684 ], "wc_weaknesses_avg": [ 75.0, 60.11239472854163 ], "wc_questions_avg": [ 186.25, 156.8795318070525 ], "wc_review_avg": [ 371.75, 145.93384631400627 ], "wc_reply_reviewers_avg": [ 31.75, 21.123150806638673 ], "wc_reply_authors_avg": [ 2051.25, 703.755772622861 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 5.25, 1.299038105676658 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7180925615475172223&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=6iwg437CZs", "pdf": "https://openreview.net/pdf?id=6iwg437CZs", "email": ";;northwestern.edu;uchicago.edu;u.northwestern.edu", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Northwestern University;University of Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.northwestern.edu;https://www.uchicago.edu", "aff_unique_abbr": "NU;UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6j9Y1APsAm", "title": "Hierarchical Probabilistic Neural Network: Efficient and Accurate Uncertainty Quantification", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Bayesian neural networks (BNNs) are known for accurately estimating the posterior distribution of model parameters, showcasing their effectiveness in uncertainty quantification (UQ). However, the computational demands of Bayesian inference can be challenging. Evidential deep learning methods address this by treating target distribution parameters as random variables with a learnable conjugate distribution, thus allowing for efficient UQ. In our paper, we present the Hierarchical Probabilistic Neural Network (HPNN), offering new insights into existing evidential deep learning methods. Firstly, it distills BNN knowledge into a single deterministic network, endowing it with a Bayesian perspective and theoretical guarantees. Secondly, we introduce a self-regularized training strategy using Laplacian approximation (LA) for self-distillation, bypassing the heavy computational load with BNNs. Thirdly, we propose to utilize flexible normalizing flows to alleviate the conjugate prior assumption in a post-processing manner, where a few training iterations can enhance model performance. Lastly, we present the Hierarchical Bayesian Neural Network, which treats the NN parameters in HPNN as random variables, for further improving UQ accuracy. The experiment results demonstrate the effectiveness of our proposed methods in both UQ accuracy and robustness.", "keywords": "Uncertainty Quantification;Evidential Deep Learning", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Hanjing Wang;Qiang Ji", "authorids": "~Hanjing_Wang2;~Qiang_Ji1", "gender": "M;M", "homepage": "https://www.ecse.rpi.edu/~cvrl/people_zw.html;https://www.ecse.rpi.edu/~qji/", "dblp": "234/8752;", "google_scholar": ";vAXmpVIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Hanjing_Wang2;~Qiang_Ji1", "aff": "Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;rpi.edu", "position": "PhD student;Professor", "bibtex": "@misc{\nwang2024hierarchical,\ntitle={Hierarchical Probabilistic Neural Network: Efficient and Accurate Uncertainty Quantification},\nauthor={Hanjing Wang and Qiang Ji},\nyear={2024},\nurl={https://openreview.net/forum?id=6j9Y1APsAm}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=6j9Y1APsAm", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zOj0eyQ-wCcJ:scholar.google.com/&scioq=Hierarchical+Probabilistic+Neural+Network:+Efficient+and+Accurate+Uncertainty+Quantification&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Rensselaer Polytechnic Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.rpi.edu", "aff_unique_abbr": "RPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "6jBNQ8nSxA", "title": "Just-in-Time Security Patch Detection - LLM At the Rescue for Data Augmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the face of growing vulnerabilities found in open-source software, the need to identify {discreet} security patches has become paramount. The lack of consistency in how software providers handle maintenance often leads to the release of security patches without comprehensive advisories, leaving users vulnerable to unaddressed security risks. To address this pressing issue, we introduce a novel security patch detection system, LLMDA, which capitalizes on Large Language Models (LLMs) and code-text alignment methodologies for patch review, data enhancement, and feature combination. Within LLMDA, we initially utilize LLMs for examining patches and expanding data of PatchDB and SPI-DB, two security patch datasets from recent literature. We then use labeled instructions to direct our LLMDA, differentiating patches based on security relevance. Following this, we apply a PTFormer to merge patches with code, formulating hybrid attributes that encompass both the innate details and the interconnections between the patches and the code. This distinctive combination method allows our system to capture more insights from the combined context of patches and code, hence improving detection precision. Finally, we devise a probabilistic batch contrastive learning mechanism within batches to augment the capability of the our LLMDA in discerning security patches. The results reveal that LLMDA significantly surpasses the start of the art techniques in detecting security patches, underscoring its promise in fortifying software maintenance.", "keywords": "Open-source software vulnerabilities;Security patch detection;Large Language Models (LLMs);PT-Former;Code-text alignment", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Xunzhu Tang;Zhenghan Chen;KISUB KIM;Haoye Tian;Saad Ezzini;Jacques Klein;Tegawend\u00e9 F. Bissyand\u00e9", "authorids": "~Xunzhu_Tang1;~Zhenghan_Chen3;~KISUB_KIM1;~Haoye_Tian2;~Saad_Ezzini1;~Jacques_Klein1;~Tegawend\u00e9_F._Bissyand\u00e91", "gender": ";;M;;M;M;M", "homepage": ";;https://falconlk.github.io/react-gh-pages/;;https://ezzini.me;https://jacquesklein2302.github.io/;https://bissyande.github.io/", "dblp": ";;216/3065.html;;216/8359.html;k/JacquesKlein;00/8006.html", "google_scholar": ";;Nr_IDzQAAAAJ;;48ebm6wAAAAJ;https://scholar.google.fr/citations?user=9E_KKT4AAAAJ;t73Mqm8AAAAJ", "orcid": ";;0000-0002-4462-6916;;0000-0001-7657-4738;0000-0003-4052-475X;0000-0001-7270-9869", "linkedin": ";;kisub-kim-81199a179/;;saad-ezzini/;jacques-klein-188b0b5/;", "or_profile": "~Xunzhu_Tang1;~Zhenghan_Chen3;~KISUB_KIM1;~Haoye_Tian2;~Saad_Ezzini1;~Jacques_Klein1;~Tegawend\u00e9_F._Bissyand\u00e91", "aff": ";;;;Lancaster University;University of Luxemburg;University of Luxemburg", "aff_domain": ";;;;lancaster.ac.uk;uni.lu;uni.lu", "position": ";;;;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@misc{\ntang2024justintime,\ntitle={Just-in-Time Security Patch Detection - {LLM} At the Rescue for Data Augmentation},\nauthor={Xunzhu Tang and Zhenghan Chen and KISUB KIM and Haoye Tian and Saad Ezzini and Jacques Klein and Tegawend{\\'e} F. Bissyand{\\'e}},\nyear={2024},\nurl={https://openreview.net/forum?id=6jBNQ8nSxA}\n}", "github": "", "project": "", "reviewers": "pMnS;z5t8;T4hh;aN2M", "site": "https://openreview.net/forum?id=6jBNQ8nSxA", "pdf_size": 3436072, "rating": "3;5;8;8", "confidence": "4;3;5;4", "soundness": "3;2;4;3", "contribution": "2;2;4;4", "presentation": "1;2;4;3", "wc_summary": "53;80;87;58", "wc_strengths": "65;46;184;99", "wc_weaknesses": "364;183;51;57", "wc_questions": "2;170;45;57", "wc_review": "484;479;367;271", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "781;719;390;694", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 1.0 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 69.5, 14.326548781894402 ], "wc_strengths_avg": [ 98.5, 52.88903478037768 ], "wc_weaknesses_avg": [ 163.75, 127.06174680052214 ], "wc_questions_avg": [ 68.5, 62.06649659840645 ], "wc_review_avg": [ 400.25, 88.07205856569948 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 646.0, 151.15720293786862 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5000000000000001, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5054369013991040277&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;1", "aff_unique_norm": "Lancaster University;University of Luxembourg", "aff_unique_dep": ";", "aff_unique_url": "https://www.lancaster.ac.uk;https://wwwen.uniluxembourg.lu", "aff_unique_abbr": "Lancaster;Uni Lu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;Luxembourg" }, { "id": "6jFjYmahxu", "title": "DiffSound: Differentiable Modal Sound Simulation for Inverse Reasoning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Accurately estimating and simulating the physical properties of objects from real-world audio observations is of great practical importance in the field of vision and embodied AI. However, previous differentiable rigid or soft body simulations cannot be directly applied to modal sound synthesis due to the high sampling rate of sound, and previous audio synthesizers do not fully model the physical properties of objects behind the modal analysis.\nWe propose DiffSound, a differentiable sound simulation framework for physically based modal sound synthesis.\nOur framework is capable of solving a range of inverse problems, including object shape, material parameter, and impact position reasoning.\nExperimental results demonstrate the effectiveness of our approach, highlighting its ability to accurately estimate physical parameters and reproduce the target sound. Our DiffSound differentiable sound simulator serves as a valuable tool for applications requiring sound synthesis and analysis.", "keywords": "sound synthesis;differentiable simulation;modal analysis;vibration;audio", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/4e30cfff50a5bd6be4665224e4ef810b732e1f6f.zip", "author": "Xutong Jin;Chenxi Xu;Ruohan Gao;Jiajun Wu;Guoping Wang;Sheng Li", "authorids": "~Xutong_Jin1;~Chenxi_Xu1;~Ruohan_Gao2;~Jiajun_Wu1;~Guoping_Wang1;~Sheng_Li9", "gender": ";M;M;M;M;M", "homepage": "https://hellojxt.github.io/;https://technetiumman.github.io/;https://ruohangao.github.io/;https://jiajunwu.com;https://www.graphics.pku.edu.cn/xztd/jgfaculty/wgp2/index.htm;https://lishengpku.github.io/", "dblp": ";;176/5787;117/4768;;23/3439-8", "google_scholar": ";;i02oEgMAAAAJ;2efgcS0AAAAJ;;", "orcid": ";;0000-0002-8346-1114;0000-0002-4176-343X;0000-0001-7819-0076;", "linkedin": ";;;jiajunwu/;;", "or_profile": "~Xutong_Jin1;~Chenxi_Xu1;~Ruohan_Gao2;~Jiajun_Wu1;~Guoping_Wang1;~Sheng_Li9", "aff": "Peking University;Peking University;Meta;Stanford University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;meta.com;stanford.edu;pku.edu.cn;pku.edu.cn", "position": "PhD student;MS student;Researcher;Assistant Professor;Full Professor;Full Professor", "bibtex": "@misc{\njin2024diffsound,\ntitle={DiffSound: Differentiable Modal Sound Simulation for Inverse Reasoning},\nauthor={Xutong Jin and Chenxi Xu and Ruohan Gao and Jiajun Wu and Guoping Wang and Sheng Li},\nyear={2024},\nurl={https://openreview.net/forum?id=6jFjYmahxu}\n}", "github": "", "project": "", "reviewers": "oyHm;cDNR;Kc8Y;aeBV", "site": "https://openreview.net/forum?id=6jFjYmahxu", "pdf_size": 21414000, "rating": "3;3;5;5", "confidence": "3;3;3;4", "soundness": "2;2;3;2", "contribution": "2;2;2;2", "presentation": "3;3;2;2", "wc_summary": "73;82;49;66", "wc_strengths": "39;49;20;91", "wc_weaknesses": "59;68;89;262", "wc_questions": "77;72;102;7", "wc_review": "248;271;260;426", "wc_reply_reviewers": "9;0;0;0", "wc_reply_authors": "170;93;48;53", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.5, 12.093386622447824 ], "wc_strengths_avg": [ 49.75, 25.9939896899264 ], "wc_weaknesses_avg": [ 119.5, 82.98945716173831 ], "wc_questions_avg": [ 64.5, 35.0891721190455 ], "wc_review_avg": [ 301.25, 72.48232543179061 ], "wc_reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 91.0, 48.83134239399937 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NZhLCezfaYAJ:scholar.google.com/&scioq=DiffSound:+Differentiable+Modal+Sound+Simulation+for+Inverse+Reasoning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Peking University;Meta;Stanford University", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "http://www.pku.edu.cn;https://meta.com;https://www.stanford.edu", "aff_unique_abbr": "Peking U;Meta;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "China;United States" }, { "id": "6jJFmwAlen", "title": "Investigating the Fairness of Large Language Models for Predictions on Tabular Data", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent literature has suggested the potential of using large language models (LLMs) to make predictions for tabular tasks. However, LLMs have been shown to exhibit harmful social biases that reflect the stereotypes and inequalities present in the society. To this end, as well as the widespread use of tabular data in many high-stake applications, it is imperative to explore the following questions: what sources of information do LLMs draw upon when making predictions for tabular tasks; whether and to what extent are LLM predictions for tabular tasks influenced by social biases and stereotypes; and what are the consequential implications for fairness? Through a series of experiments, we delve into these questions and show that LLMs tend to inherit social biases from their training data which significantly impact their fairness in tabular prediction tasks. Furthermore, our investigations show that in the context of bias mitigation, though in-context learning and fine-tuning have a moderate effect, the fairness metric gap between different subgroups is still larger than that in traditional machine learning models, such as Random Forest and shallow Neural Networks. This observation emphasizes that the social biases are inherent within the LLMs themselves and inherited from their pre-training corpus, not only from the downstream task datasets. Besides, we demonstrate that label-flipping of in-context examples can significantly reduce biases, further highlighting the presence of inherent bias within LLMs.", "keywords": "Fairness;Social Biases;Large Language Models;In-Context Learning;Tabular Data;Trustworthy ML", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Yanchen Liu;Srishti Gautam;Jiaqi Ma;Himabindu Lakkaraju", "authorids": "~Yanchen_Liu2;~Srishti_Gautam1;~Jiaqi_Ma1;~Himabindu_Lakkaraju1", "gender": "M;F;;F", "homepage": "https://liuyanchen1015.github.io/;;https://jiaqima.github.io;http://web.stanford.edu/~himalv", "dblp": ";201/6628;155/2199-1;68/9376", "google_scholar": "https://scholar.google.com/citations?hl=en;7V_riiYAAAAJ;Z9X2A1MAAAAJ;", "orcid": ";;0000-0001-8292-5901;", "linkedin": ";;;", "or_profile": "~Yanchen_Liu2;~Srishti_Gautam1;~Jiaqi_Ma1;~Hima_Lakkaraju1", "aff": "Harvard University;UiT The Arctic University of Norway;University of Illinois Urbana-Champaign;Harvard University", "aff_domain": "harvard.edu;uit.no;illinois.edu;harvard.edu", "position": "MS student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nliu2024investigating,\ntitle={Investigating the Fairness of Large Language Models for Predictions on Tabular Data},\nauthor={Yanchen Liu and Srishti Gautam and Jiaqi Ma and Himabindu Lakkaraju},\nyear={2024},\nurl={https://openreview.net/forum?id=6jJFmwAlen}\n}", "github": "", "project": "", "reviewers": "qNdb;eUu3;rhXH;Vp7m;fjE3;88s7", "site": "https://openreview.net/forum?id=6jJFmwAlen", "pdf_size": 227848, "rating": "3;3;3;5;5;5", "confidence": "4;4;4;4;4;4", "soundness": "2;2;1;2;2;2", "contribution": "2;2;3;2;3;2", "presentation": "3;3;2;2;3;3", "wc_summary": "77;57;31;50;32;200", "wc_strengths": "89;35;50;19;83;74", "wc_weaknesses": "252;158;80;531;105;60", "wc_questions": "5;235;15;3;148;66", "wc_review": "423;485;176;603;368;400", "wc_reply_reviewers": "0;0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;0;0;0;0;0", "reply_authors": "0;0;0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 1.8333333333333333, 0.3726779962499649 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 74.5, 58.254470500840824 ], "wc_strengths_avg": [ 58.333333333333336, 25.67532321553562 ], "wc_weaknesses_avg": [ 197.66666666666666, 161.77213054856585 ], "wc_questions_avg": [ 78.66666666666667, 86.22966749069344 ], "wc_review_avg": [ 409.1666666666667, 128.87903458497647 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4414628081737124617&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Harvard University;Arctic University of Norway;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://www.harvard.edu;https://www.uit.no;https://illinois.edu", "aff_unique_abbr": "Harvard;UiT;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Norway" }, { "id": "6kpXxfA3Oi", "title": "Fill with Anything: High-Resolution and Prompt-Faithful Image Completion", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Building on the achievements of text-to-image diffusion models, recent advancements in text-guided image inpainting have yielded remarkably realistic and visually compelling outcomes. \nNevertheless, current text-to-image inpainting models leave substantial room for enhancement, particularly in addressing the often inadequate alignment of user prompts with the inpainted region, and in extending applicability to high-resolution images. \nTo this end, this paper introduces an entirely $\\textbf{training-free}$ approach that $\\textbf{faithfully adheres to prompts}$ and seamlessly $\\textbf{scale to high-resolution}$ image inpainting. \nTo achieve this, we first present the Prompt-Aware Introverted Attention (PAIntA) layer, which enriches self-attention modules by incorporating prompt information derived from cross-attention scores, alleviating the visual context dominance in inpainting caused by all-to-all attention. \nFurthermore, we introduce the Reweighting Attention Score Guidance (RASG) mechanism, which directs cross-attention scores towards improved textual alignment while preserving the generation domain. \nIn addition, to address inpainting at larger scales, we introduce a specialized super-resolution technique tailored for inpainting, enabling the completion of missing regions in images of up to 2K resolution. Experimental results demonstrate that our proposed method surpasses existing state-of-the-art approaches in both qualitative and quantitative measures, achieving a substantial generation accuracy improvement of $\\textbf{61.4\\%}$ compared to $\\textbf{51.9\\%}$. Our codes will be open-sourced.", "keywords": "text-guided inpainting;diffusion inpainting;reweighting attention score guidance;prompt-aware introverted attention;RASG;PaIntA;conditional super-resolution;classifier guidance;classifier-free guidance;introvert attention;diffusion models", "primary_area": "generative models", "supplementary_material": "/attachment/215f397fd495b4c5a4252b56774027d4c45c8b4e.pdf", "author": "Hayk Manukyan;Andranik Sargsyan;Barsegh Atanyan;Zhangyang Wang;Shant Navasardyan;Humphrey Shi", "authorids": "~Hayk_Manukyan1;~Andranik_Sargsyan1;~Barsegh_Atanyan1;~Zhangyang_Wang1;~Shant_Navasardyan1;~Humphrey_Shi1", "gender": "M;M;M;M;M;M", "homepage": ";;;https://vita-group.github.io;;https://www.humphreyshi.com", "dblp": ";332/5291;;119/4026;286/5315;176/5516", "google_scholar": ";cg74A98AAAAJ;;pxFyKAIAAAAJ;VJSh59sAAAAJ;WBvt5A8AAAAJ", "orcid": ";0000-0001-8018-7941;;;0000-0002-1999-9999;0000-0002-2922-5663", "linkedin": "hayk-manukyan-4aab0962/;andraniksargsyan/;barsegh-atanyan-2012a412b;;shant-navasardyan-1302aa149;humphreyshi", "or_profile": "~Hayk_Manukyan1;~Andranik_Sargsyan1;~Barsegh_Atanyan1;~Zhangyang_Wang1;~Shant_Navasardyan1;~Honghui_Shi1", "aff": "Picsart AI Research;Yerevan State University;Picsart;University of Texas at Austin;Picsart Inc;University of Illinois, Urbana Champaign", "aff_domain": "picsart.com;ysu.am;picsart.com;utexas.edu;picsart.com;illinois.edu", "position": "Researcher;MS student;Researcher;Associate Professor;Researcher;Adjunct Assistant Professor", "bibtex": "@misc{\nmanukyan2024fill,\ntitle={Fill with Anything: High-Resolution and Prompt-Faithful Image Completion},\nauthor={Hayk Manukyan and Andranik Sargsyan and Barsegh Atanyan and Zhangyang Wang and Shant Navasardyan and Humphrey Shi},\nyear={2024},\nurl={https://openreview.net/forum?id=6kpXxfA3Oi}\n}", "github": "", "project": "", "reviewers": "Q343;BeCN;WycU;ZNkY", "site": "https://openreview.net/forum?id=6kpXxfA3Oi", "pdf_size": 32290227, "rating": "3;3;5;6", "confidence": "5;5;4;4", "soundness": "3;2;3;3", "contribution": "3;2;3;3", "presentation": "3;3;2;3", "wc_summary": "52;64;69;56", "wc_strengths": "21;33;46;90", "wc_weaknesses": "19;313;173;149", "wc_questions": "201;62;52;19", "wc_review": "293;472;340;314", "wc_reply_reviewers": "43;204;117;0", "wc_reply_authors": "938;1489;1374;756", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 60.25, 6.6473679001541655 ], "wc_strengths_avg": [ 47.5, 26.081602711489953 ], "wc_weaknesses_avg": [ 163.5, 104.32041986111827 ], "wc_questions_avg": [ 83.5, 69.67962399439308 ], "wc_review_avg": [ 354.75, 69.71145888589622 ], "wc_reply_reviewers_avg": [ 91.0, 77.50806409658288 ], "wc_reply_authors_avg": [ 1139.25, 301.99948261545086 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FgXryZtrne4J:scholar.google.com/&scioq=Fill+with+Anything:+High-Resolution+and+Prompt-Faithful+Image+Completion&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;3;4", "aff_unique_norm": "Picsart;Yerevan State University;University of Texas at Austin;Picsart Inc;University of Illinois Urbana-Champaign", "aff_unique_dep": "AI Research;;;;", "aff_unique_url": "https://research.picsart.com;https://www.yerevanstateuniversity.am;https://www.utexas.edu;https://www.picsart.com;https://illinois.edu", "aff_unique_abbr": "Picsart AI;YSU;UT Austin;Picsart;UIUC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Urbana-Champaign", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;Armenia" }, { "title": "Mixture-of-Experts Meets Instruction Tuning: A Winning Combination for Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19384", "id": "6mLjDwYte5", "author_site": "Sheng Shen, Le Hou, Yanqi Zhou, Nan Du, Shayne Longpre, Jason Wei, Hyung Won Chung, Barret Zoph, William Fedus, Xinyun Chen, Tu Vu, Yuexin Wu, Wuyang Chen, Albert Webson, Yunxuan Li, Vincent Zhao, Hongkun Yu, Kurt Keutzer, trevor darrell, Denny Zhou", "tldr": "", "abstract": "Sparse Mixture-of-Experts (MoE) is a neural architecture design that adds learnable parameters to Large Language Models (LLMs) without increasing computational complexity (FLOPs). Instruction tuning is a technique for training LLMs to follow instructions. We advocate combining these two approaches, as we find that MoE models benefit more from instruction tuning than dense models. In particular, we conduct empirical studies across three experimental setups: (i) Direct finetuning on individual downstream tasks devoid of instruction tuning; (ii) Instruction tuning followed by in-context few-shot or zero-shot generalization on downstream tasks; and (iii) Instruction tuning supplemented by further finetuning on individual downstream tasks. In the first scenario, MoE models overall underperform dense models of identical computational capacity. This narrative, however, dramatically changes with the introduction of instruction tuning (in the second and third scenarios), used independently or in conjunction with task-specific finetuning. Our most powerful model, FLAN-MoE-32B, surpasses the performance of Flan-PaLM-62B on four benchmark tasks, while using only a third of the FLOPs. The advancements embodied by FLAN-MoE inspire a reevaluation of the design principles of large-scale, high-performance language models in the framework of task-agnostic learning.", "keywords": "MoE;Instruction Tuning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/25fb86090add41f7670f809a72fbb705fecd186c.pdf", "author": "Sheng Shen;Le Hou;Yanqi Zhou;Nan Du;Shayne Longpre;Jason Wei;Hyung Won Chung;Barret Zoph;William Fedus;Xinyun Chen;Tu Vu;Yuexin Wu;Wuyang Chen;Albert Webson;Yunxuan Li;Vincent Y Zhao;Hongkun Yu;Kurt Keutzer;Trevor Darrell;Denny Zhou", "authorids": "~Sheng_Shen2;~Le_Hou1;~Yanqi_Zhou1;~Nan_Du1;~Shayne_Longpre1;~Jason_Wei1;~Hyung_Won_Chung1;~Barret_Zoph1;~William_Fedus2;~Xinyun_Chen1;~Tu_Vu1;~Yuexin_Wu1;~Wuyang_Chen1;~Albert_Webson1;~Yunxuan_Li2;~Vincent_Y_Zhao1;~Hongkun_Yu2;~Kurt_Keutzer1;~Trevor_Darrell2;~Denny_Zhou1", "gender": "M;M;F;M;M;M;M;M;;;M;M;;;M;M;M;M;;", "homepage": "https://sincerass.github.io;http://vision.cs.stonybrook.edu/~lehhou/home/index.html;https://zhouyanqi.github.io/;;https://www.shaynelongpre.com;https://jasonwei20.github.io;;;;;https://tuvllms.github.io;https://crickwu.github.io;;https://representations.ai;;https://foo.bar;;https://people.eecs.berkeley.edu/~keutzer/;;", "dblp": "138/5764-1.html;161/9892;;;190/7024;02/11220.html;;;;;186/7716.html;09/1661;;276/1456;;301/7889;;k/KurtKeutzer.html;;", "google_scholar": "https://scholar.google.com/citations?hl=en;kQ0HeQIAAAAJ;ZKEDQXYAAAAJ;v474hP4AAAAJ;ADd_YfkAAAAJ;;1CAlXvYAAAAJ;;;;tOevwEEAAAAJ;sd0nprMAAAAJ;;3OQplr0AAAAJ;Nun8Dy0AAAAJ;;;ID9QePIAAAAJ;;", "orcid": ";0000-0001-7323-5300;;;;;;;;;;;;;;;;0000-0003-3868-8501;;", "linkedin": "sheng-s-ab198a174/;;;dunangatech/;shayne-redford-longpre/;;;;;;;;;;;;;kurtkeutzer/;;", "or_profile": "~Sheng_Shen2;~Le_Hou1;~Yanqi_Zhou1;~Nan_Du1;~Shayne_Longpre1;~Jason_Wei1;~Hyung_Won_Chung1;~Barret_Zoph1;~William_Fedus2;~Xinyun_Chen1;~Tu_Vu1;~Yuexin_Wu1;~Wuyang_Chen1;~Albert_Webson1;~Yunxuan_Li2;~Vincent_Y_Zhao1;~Hongkun_Yu2;~Kurt_Keutzer1;~Trevor_Darrell2;~Denny_Zhou1", "aff": "University of California, Berkeley;Google Research;Google Brain;Apple/AIML;Massachusetts Institute of Technology;OpenAI;Google Brain;;;;Google;Google;;Google DeepMind;Google;Augment Computing;;University of California, Berkeley;;", "aff_domain": "berkeley.edu;google.com;google.com;apple.com;mit.edu;openai.com;google.com;;;;google.com;google.com;;google.com;google.com;augmentcode.com;;berkeley.edu;;", "position": "PhD student;Software Engineer;Research Scientist;Principal Researcher;PhD student;Researcher;Researcher;;;;Researcher;Software Engineer;;Research Scientist;Researcher;Researcher;;Full Professor;;", "bibtex": "@inproceedings{\nshen2024mixtureofexperts,\ntitle={Mixture-of-Experts Meets Instruction Tuning: A Winning Combination for Large Language Models},\nauthor={Sheng Shen and Le Hou and Yanqi Zhou and Nan Du and Shayne Longpre and Jason Wei and Hyung Won Chung and Barret Zoph and William Fedus and Xinyun Chen and Tu Vu and Yuexin Wu and Wuyang Chen and Albert Webson and Yunxuan Li and Vincent Y Zhao and Hongkun Yu and Kurt Keutzer and Trevor Darrell and Denny Zhou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6mLjDwYte5}\n}", "github": "", "project": "", "reviewers": "YDYY;nXAt;SYmE;8EEX", "pdf_size": 616153, "rating": "5;6;8;8", "confidence": "4;3;3;3", "soundness": "3;3;4;4", "contribution": "3;3;3;4", "presentation": "2;2;3;4", "wc_summary": "36;64;104;34", "wc_strengths": "60;69;109;66", "wc_weaknesses": "151;96;66;4", "wc_questions": "211;87;229;39", "wc_review": "458;316;508;143", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1089;230;468;133", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 59.5, 28.297526393662043 ], "wc_strengths_avg": [ 76.0, 19.32614809008769 ], "wc_weaknesses_avg": [ 79.25, 53.07247403315583 ], "wc_questions_avg": [ 141.5, 80.56519099462248 ], "wc_review_avg": [ 356.25, 141.84212174103996 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 480.0, 372.13371252817177 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 20, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 78, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5132913830946216531&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=6mLjDwYte5", "pdf": "https://openreview.net/pdf?id=6mLjDwYte5", "email": "berkeley.edu;google.com;google.com;apple.com;mit.edu;openai.com;google.com;;;;google.com;google.com;;google.com;google.com;augmentcode.com;;berkeley.edu;;", "author_num": 20, "aff_unique_index": "0;1;1;2;3;4;1;1;1;1;1;5;0", "aff_unique_norm": "University of California, Berkeley;Google;Apple;Massachusetts Institute of Technology;OpenAI;Augment Computing", "aff_unique_dep": ";Google Research;Artificial Intelligence and Machine Learning;;;", "aff_unique_url": "https://www.berkeley.edu;https://research.google;https://www.apple.com;https://web.mit.edu;https://openai.com;", "aff_unique_abbr": "UC Berkeley;Google Research;Apple;MIT;OpenAI;", "aff_campus_unique_index": "0;1;1;1;1;1;1;0", "aff_campus_unique": "Berkeley;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom;" }, { "id": "6muJekoPR7", "title": "TROJFSL: TROJAN INSERTION IN FEW SHOT PROMPT LEARNING", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Prompt-tuning emerges as one of the most effective solutions to adapting a pre-trained language model (PLM) to processing new downstream natural language processing tasks, especially with only few input samples. The success of prompt-tuning motivates adversaries to create backdoor attacks against prompt-tuning. However, prior prompt-based backdoor attacks cannot be implemented through few-shot prompt-tuning, i.e., they require either a full-model fine-tuning or a large training dataset. We find it is difficult to build a prompt-based backdoor via few-shot prompt-tuning, i.e., freezing the PLM and tuning a soft prompt with a limited set of input samples. A backdoor design via few-shot prompt-tuning introduces an imbalanced poisoned dataset, easily suffers from the overfitting issue, and lack attention awareness. To mitigate these issues, we propose TrojFSL to perform backdoor attacks in the setting of few-shot prompt-tuning. TrojFSL consists of three modules, i.e., balanced poison learning, selective token poisoning, and trojan-trigger attention. Compared to prior prompt-based backdoor attacks, TrojFSL improves the ASR by 9% - 48% and the CDA by 4% - 9% across various PLMs and a wide range of downstream tasks.", "keywords": "Pre-trained Language Model;Few-Shot;Prompt;Trojan Attack", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Mengxin Zheng;Jiaqi Xue;Xun Chen;Yanshan Wang;Qian Lou;Lei Jiang", "authorids": "~Mengxin_Zheng1;~Jiaqi_Xue1;~Xun_Chen1;~Yanshan_Wang1;~Qian_Lou1;~Lei_Jiang1", "gender": "F;M;;;M;M", "homepage": "https://mxzheng.github.io/;https://jqxue1999.github.io;;;https://qlou.org;https://www.jianglei.org", "dblp": "327/9609;;;45/11295;207/3962.html;96/1994-1.html", "google_scholar": "CwLrXMAAAAAJ;NI2jppcAAAAJ;;;SBYgXLoAAAAJ;-1sXorAAAAAJ", "orcid": ";;;;;", "linkedin": "mengxin-zheng-86bb91171/;;;;;", "or_profile": "~Mengxin_Zheng1;~Jiaqi_Xue1;~Xun_Chen1;~Yanshan_Wang1;~Qian_Lou1;~Lei_Jiang1", "aff": "University of Central Florida;University of Central Florida;;University of Pittsburgh;University of Central Florida;Indiana University", "aff_domain": "ucf.edu;ucf.edu;;pitt.edu;ucf.edu;iu.edu", "position": "Assistant Professor;PhD student;;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nzheng2024trojfsl,\ntitle={{TROJFSL}: {TROJAN} {INSERTION} {IN} {FEW} {SHOT} {PROMPT} {LEARNING}},\nauthor={Mengxin Zheng and Jiaqi Xue and Xun Chen and Yanshan Wang and Qian Lou and Lei Jiang},\nyear={2024},\nurl={https://openreview.net/forum?id=6muJekoPR7}\n}", "github": "", "project": "", "reviewers": "x2RS;oqR6;P9fg", "site": "https://openreview.net/forum?id=6muJekoPR7", "pdf_size": 517370, "rating": "3;5;6", "confidence": "4;4;3", "soundness": "2;3;3", "contribution": "2;2;2", "presentation": "3;3;3", "wc_summary": "40;41;69", "wc_strengths": "14;46;50", "wc_weaknesses": "234;227;267", "wc_questions": "5;3;38", "wc_review": "293;317;424", "wc_reply_reviewers": "0;73;0", "wc_reply_authors": "811;785;640", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 50.0, 13.4412301024373 ], "wc_strengths_avg": [ 36.666666666666664, 16.110727964792765 ], "wc_weaknesses_avg": [ 242.66666666666666, 17.441967269268172 ], "wc_questions_avg": [ 15.333333333333334, 16.048537489614297 ], "wc_review_avg": [ 344.6666666666667, 56.94636853117931 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 34.41253001774532 ], "wc_reply_authors_avg": [ 745.3333333333334, 75.23444837454596 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8mTSEP-ATeIJ:scholar.google.com/&scioq=TROJFSL:+TROJAN+INSERTION+IN+FEW+SHOT+PROMPT+LEARNING&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Central Florida;University of Pittsburgh;Indiana University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucf.edu;https://www.pitt.edu;https://www.indiana.edu", "aff_unique_abbr": "UCF;Pitt;IU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "6oC3djD3hU", "title": "ROBUST DIFFUSION GAN USING SEMI-UNBALANCED OPTIMAL TRANSPORT", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Diffusion models, a type of generative model, have demonstrated great potential for synthesizing highly detailed images. By integrating with GAN, advanced diffusion models like DDGAN \\citep{xiao2022DDGAN} could approach real-time performance for expansive practical applications. While DDGAN has effectively addressed the challenges of generative modeling, namely producing high-quality samples, covering different data modes, and achieving faster sampling, it remains susceptible to performance drops caused by datasets that are corrupted with outlier samples. This work introduces a robust training technique based on semi-unbalanced optimal transport to mitigate the impact of outliers effectively. Through comprehensive evaluations, we demonstrate that our robust diffusion GAN (RDGAN) outperforms vanilla DDGAN in terms of the aforementioned generative modeling criteria, i.e., image quality, mode coverage of distribution, and inference speed, and exhibits improved robustness when dealing with both clean and corrupted datasets.", "keywords": "optimal transport;diffusion model;generative model;robust generation", "primary_area": "generative models", "supplementary_material": "/attachment/fbe3abf538f0c15c3424986cf379b1e7cc53ab6d.zip", "author": "Quan Dao;B\u00ecnh H\u1eefu T\u1ea1;Tung Pham;Anh Tuan Tran", "authorids": "~Quan_Dao1;~B\u00ecnh_H\u1eefu_T\u1ea11;~Tung_Pham1;~Anh_Tuan_Tran2", "gender": "M;M;M;M", "homepage": "https://github.com/quandao10;;;https://sites.google.com/site/anhttranusc/", "dblp": "334/7610;;38/10862-1;150/5269-1", "google_scholar": "g0RS3_kAAAAJ;qBvM8_sAAAAJ;KcUuEKsAAAAJ;FYZ5ODQAAAAJ", "orcid": "0009-0006-0996-0472;;;0000-0002-3120-4036", "linkedin": ";;;https://linkedin.com/in/anh-tran-97814b19", "or_profile": "~Quan_Dao1;~B\u00ecnh_H\u1eefu_T\u1ea11;~Tung_Pham1;~Anh_Tuan_Tran2", "aff": "VinAI Research;VinAI Research;VinAI Research;VinAI Research", "aff_domain": "vinai.io;vinai.io;vinai.io;vinai.io", "position": "Intern;Intern;Researcher;Research Scientist", "bibtex": "@misc{\ndao2024robust,\ntitle={{ROBUST} {DIFFUSION} {GAN} {USING} {SEMI}-{UNBALANCED} {OPTIMAL} {TRANSPORT}},\nauthor={Quan Dao and B{\\`\\i}nh H\u1eefu T\u1ea1 and Tung Pham and Anh Tuan Tran},\nyear={2024},\nurl={https://openreview.net/forum?id=6oC3djD3hU}\n}", "github": "", "project": "", "reviewers": "ov4k;gFqX;P1f3;MvTD", "site": "https://openreview.net/forum?id=6oC3djD3hU", "pdf_size": 1292589, "rating": "3;5;5;5", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "contribution": "2;2;2;2", "presentation": "2;2;2;2", "wc_summary": "43;76;37;16", "wc_strengths": "55;66;31;26", "wc_weaknesses": "93;51;219;149", "wc_questions": "63;213;34;2", "wc_review": "254;406;321;193", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 43.0, 21.529050141610984 ], "wc_strengths_avg": [ 44.5, 16.560495161679196 ], "wc_weaknesses_avg": [ 128.0, 63.0 ], "wc_questions_avg": [ 78.0, 80.87335778858201 ], "wc_review_avg": [ 293.5, 79.1722805027113 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17369468804813068491&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "VinAI Research", "aff_unique_dep": "", "aff_unique_url": "https://www.vinai.io/", "aff_unique_abbr": "VinAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Vietnam" }, { "title": "Large Language Model Cascades with Mixture of Thought Representations for Cost-Efficient Reasoning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19383", "id": "6okaSfANzh", "author_site": "Murong Yue, Jie Zhao, Min Zhang, Liang Du, Ziyu Yao", "tldr": "", "abstract": "Large language models (LLMs) such as GPT-4 have exhibited remarkable performance in a variety of tasks, but this strong performance often comes with the high expense of using paid API services. In this paper, we are motivated to study building an LLM \"cascade\" to save the cost of using LLMs, particularly for performing (e.g., mathematical, causal) reasoning tasks. Our cascade pipeline follows the intuition that simpler questions can be addressed by a weaker but more affordable LLM, whereas only the most challenging questions necessitate the stronger and more expensive LLM. To realize this decision-making, we consider the \"answer consistency\" of the weaker LLM as a signal of the question difficulty and propose several methods for answering sampling and consistency checking, including one leveraging a mixture of two thought representations (i.e., Chain-of-Thought and Program-of-Thought). Through experiments on six reasoning benchmark datasets, with GPT-3.5-turbo and GPT-4 being the weaker and stronger LLMs, respectively, our cascade pipeline demonstrates comparable performance but reduces about 60% of the cost compared with fully using the stronger LLM.", "keywords": "Large Language Models;Natural Language Processing;Reasoning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/2a26302d325a3f2262f6ea00a260917a943e3e4d.zip", "author": "Murong Yue;Jie Zhao;Min Zhang;Liang Du;Ziyu Yao", "authorids": "~Murong_Yue1;~Jie_Zhao1;~Min_Zhang18;~Liang_Du3;~Ziyu_Yao1", "gender": "M;;F;;F", "homepage": "https://murongyue.github.io/;;https://gabriellamin.github.io/Min-homepage.github.io/;;http://ziyuyao.org", "dblp": "354/7400;;;;", "google_scholar": "ivm3dVEAAAAJ;;PxmM3oEAAAAJ;aq4dG-AAAAAJ;4lYrMNUAAAAJ", "orcid": ";;;;0009-0007-4571-3505", "linkedin": "murong-yue-480a78177/;;min-zhang-905479292/;;", "or_profile": "~Murong_Yue1;~Jie_Zhao1;~Min_Zhang18;~Liang_Du3;~Ziyu_Yao1", "aff": "George Mason University;;Virginia Polytechnic Institute and State University;Microsoft;George Mason University", "aff_domain": "gmu.edu;;vt.edu;microsoft.com;gmu.edu", "position": "PhD student;;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nyue2024large,\ntitle={Large Language Model Cascades with Mixture of Thought Representations for Cost-Efficient Reasoning},\nauthor={Murong Yue and Jie Zhao and Min Zhang and Liang Du and Ziyu Yao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6okaSfANzh}\n}", "github": "", "project": "", "reviewers": "6oKd;BfsR;ijvt", "pdf_size": 1040600, "rating": "6;6;8", "confidence": "3;3;4", "soundness": "3;4;4", "contribution": "3;3;4", "presentation": "3;3;4", "wc_summary": "78;92;164", "wc_strengths": "95;146;46", "wc_weaknesses": "141;379;6", "wc_questions": "37;140;50", "wc_review": "351;757;266", "wc_reply_reviewers": "0;73;14", "wc_reply_authors": "436;1092;387", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 111.33333333333333, 37.67698973585278 ], "wc_strengths_avg": [ 95.66666666666667, 40.827550610940264 ], "wc_weaknesses_avg": [ 175.33333333333334, 154.1997261850862 ], "wc_questions_avg": [ 75.66666666666667, 45.79907810814051 ], "wc_review_avg": [ 458.0, 214.25374364679527 ], "wc_reply_reviewers_avg": [ 29.0, 31.63331577098213 ], "wc_reply_authors_avg": [ 638.3333333333334, 321.41389031728056 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3633402015473153407&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=6okaSfANzh", "pdf": "https://openreview.net/pdf?id=6okaSfANzh", "email": "gmu.edu;;vt.edu;microsoft.com;gmu.edu", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "George Mason University;Virginia Tech;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.gmu.edu;https://www.vt.edu;https://www.microsoft.com", "aff_unique_abbr": "GMU;VT;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Semantic Invariant Robust Watermark for Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19382", "id": "6p8lpe4MNf", "author_site": "Aiwei Liu, Leyi Pan, Xuming Hu, Shiao Meng, Lijie Wen", "tldr": "", "abstract": "Watermark algorithms for large language models (LLMs) have achieved extremely high accuracy in detecting text generated by LLMs. Such algorithms typically involve adding extra watermark logits to the LLM's logits at each generation step. However, prior algorithms face a trade-off between attack robustness and security robustness. This is because the watermark logits for a token are determined by a certain number of preceding tokens; a small number leads to low security robustness, while a large number results in insufficient attack robustness. In this work, we propose a semantic invariant watermarking method for LLMs that provides both attack robustness and security robustness. The watermark logits in our work are determined by the semantics of all preceding tokens. Specifically, we utilize another embedding LLM to generate semantic embeddings for all preceding tokens, and then these semantic embeddings are transformed into the watermark logits through our trained watermark model.\nSubsequent analyses and experiments demonstrated the attack robustness of our method in semantically invariant settings: synonym substitution and text paraphrasing settings. Finally, we also show that our watermark possesses adequate security robustness.", "keywords": "Watermark algorithms;Large Language Models;Robustness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/dacdf0a0c94dc157d062c579a985cdd676b56f8d.zip", "author": "Aiwei Liu;Leyi Pan;Xuming Hu;Shiao Meng;Lijie Wen", "authorids": "~Aiwei_Liu1;~Leyi_Pan1;~Xuming_Hu1;~Shiao_Meng1;~Lijie_Wen1", "gender": "M;F;M;M;M", "homepage": "https://exlaw.github.io/;;https://xuminghu.github.io/;https://github.com/msa30;https://www.thss.tsinghua.edu.cn/en/faculty/lijiewen.htm", "dblp": "321/4365;353/1437.html;262/3664;333/0475;36/172-1", "google_scholar": "UCOOmcEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;dbBKbXoAAAAJ;https://scholar.google.com.hk/citations?user=2rd5iDIAAAAJ;https://scholar.google.com.tw/citations?user=f3C0jUIAAAAJ", "orcid": ";;0000-0001-6075-4224;;0000-0003-0358-3160", "linkedin": "%E7%91%B7%E7%8E%AE-%E5%88%98-0722731a6/;%E4%B9%90%E6%80%A1-%E6%BD%98-806718275/;;;", "or_profile": "~Aiwei_Liu1;~Leyi_Pan1;~Xuming_Hu1;~Shiao_Meng1;~Lijie_Wen1", "aff": "Chinese University of Hong Kong;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "cuhk.hk;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Visiting Scholar;Undergrad student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nliu2024a,\ntitle={A Semantic Invariant Robust Watermark for Large Language Models},\nauthor={Aiwei Liu and Leyi Pan and Xuming Hu and Shiao Meng and Lijie Wen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6p8lpe4MNf}\n}", "github": "", "project": "", "reviewers": "yZti;FTnC;1s48;1hfF", "pdf_size": 822321, "rating": "3;5;6;8", "confidence": "4;3;3;5", "soundness": "1;3;2;3", "contribution": "1;3;3;3", "presentation": "3;3;3;3", "wc_summary": "58;109;46;125", "wc_strengths": "35;74;56;134", "wc_weaknesses": "249;103;81;298", "wc_questions": "53;135;132;339", "wc_review": "395;421;315;896", "wc_reply_reviewers": "139;236;16;441", "wc_reply_authors": "1089;936;464;1095", "reply_reviewers": "1;2;1;2", "reply_authors": "2;3;1;3", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.5, 33.26033673912518 ], "wc_strengths_avg": [ 74.75, 36.88749788207381 ], "wc_weaknesses_avg": [ 182.75, 92.71562705391146 ], "wc_questions_avg": [ 164.75, 105.84038690405472 ], "wc_review_avg": [ 506.75, 228.103457886986 ], "wc_reply_reviewers_avg": [ 208.0, 155.48151015474477 ], "wc_reply_authors_avg": [ 896.0, 257.42668859308276 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4181210050035454, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "openreview": "https://openreview.net/forum?id=6p8lpe4MNf", "pdf": "https://openreview.net/pdf?id=6p8lpe4MNf", "email": "cuhk.hk;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Chinese University of Hong Kong;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CUHK;THU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Diverse Behaviors: A Benchmark for Imitation Learning with Human Demonstrations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19381", "id": "6pPYRXKPpw", "author_site": "Xiaogang Jia, Denis Blessing, Xinkai Jiang, Moritz Reuss, Atalay Donat, Rudolf Lioutikov, Gerhard Neumann", "tldr": "", "abstract": "Imitation learning with human data has demonstrated remarkable success in teaching robots in a wide range of skills. However, the inherent diversity in human behavior leads to the emergence of multi-modal data distributions, thereby presenting a formidable challenge for existing imitation learning algorithms. Quantifying a model's capacity to capture and replicate this diversity effectively is still an open problem. In this work, we introduce simulation benchmark environments and the corresponding *Datasets with Diverse human Demonstrations for Imitation Learning (D3IL)*, designed explicitly to evaluate a model's ability to learn multi-modal behavior. Our environments are designed to involve multiple sub-tasks that need to be solved, consider manipulation of multiple objects which increases the diversity of the behavior and can only be solved by policies that rely on closed loop sensory feedback. Other available datasets are missing at least one of these challenging properties.\nTo address the challenge of diversity quantification, we introduce tractable metrics that provide valuable insights into a model's ability to acquire and reproduce diverse behaviors. These metrics offer a practical means to assess the robustness and versatility of imitation learning algorithms. Furthermore, we conduct a thorough evaluation of state-of-the-art methods on the proposed task suite. This evaluation serves as a benchmark for assessing their capability to learn diverse behaviors. Our findings shed light on the effectiveness of these methods in tackling the intricate problem of capturing and generalizing multi-modal human behaviors, offering a valuable reference for the design of future imitation learning algorithms.", "keywords": "Imitation Learning;Benchmark;Datasets;Diverse Behaviors", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/cd40700eabe3ab1ed74ee745d330703687a0bd4f.zip", "author": "Xiaogang Jia;Denis Blessing;Xinkai Jiang;Moritz Reuss;Atalay Donat;Rudolf Lioutikov;Gerhard Neumann", "authorids": "~Xiaogang_Jia1;~Denis_Blessing1;~Xinkai_Jiang1;~Moritz_Reuss1;~Atalay_Donat1;~Rudolf_Lioutikov1;~Gerhard_Neumann2", "gender": "M;M;M;M;M;M;M", "homepage": "https://xiaogangjia.github.io/Personal_Website/;;;;https://www.ias.informatik.tu-darmstadt.de/Team/AtalayDonat;https://rudolf.intuitive-robots.net;https://alr.anthropomatik.kit.edu/", "dblp": "23/10777;219/1435;;321/1769;;151/9451;60/4878", "google_scholar": "E7Tja9gAAAAJ;https://scholar.google.de/citations?view_op=list_works;1BfDuRMAAAAJ;NLuzkPIAAAAJ;LG_x9Y8AAAAJ;hvjV43MAAAAJ;https://scholar.google.com.tw/citations?user=GL360kMAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;rudolf-lioutikov-74830730a/;", "or_profile": "~Xiaogang_Jia1;~Denis_Blessing1;~Xinkai_Jiang1;~Moritz_Reuss1;~Atalay_Donat1;~Rudolf_Lioutikov1;~Gerhard_Neumann1", "aff": "Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "position": "PhD student;PhD student;PhD student;PhD student;MS student;Tenure-Track Professor;Full Professor", "bibtex": "@inproceedings{\njia2024towards,\ntitle={Towards Diverse Behaviors: A Benchmark for Imitation Learning with Human Demonstrations},\nauthor={Xiaogang Jia and Denis Blessing and Xinkai Jiang and Moritz Reuss and Atalay Donat and Rudolf Lioutikov and Gerhard Neumann},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6pPYRXKPpw}\n}", "github": "", "project": "", "reviewers": "S4Qj;DMVD;K6Tb", "pdf_size": 4672377, "rating": "6;8;8", "confidence": "4;4;5", "soundness": "3;3;3", "contribution": "3;3;2", "presentation": "3;3;3", "wc_summary": "264;50;160", "wc_strengths": "46;116;132", "wc_weaknesses": "242;94;152", "wc_questions": "119;14;41", "wc_review": "671;274;485", "wc_reply_reviewers": "0;23;0", "wc_reply_authors": "645;822;418", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 158.0, 87.37657962329875 ], "wc_strengths_avg": [ 98.0, 37.345236197762446 ], "wc_weaknesses_avg": [ 162.66666666666666, 60.889699913495676 ], "wc_questions_avg": [ 58.0, 44.51965857910413 ], "wc_review_avg": [ 476.6666666666667, 162.1816539837009 ], "wc_reply_reviewers_avg": [ 7.666666666666667, 10.842303978193728 ], "wc_reply_authors_avg": [ 628.3333333333334, 165.3528214320988 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11692037980249187412&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6pPYRXKPpw", "pdf": "https://openreview.net/pdf?id=6pPYRXKPpw", "email": "kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kit.edu;https://www.kit.edu", "aff_unique_abbr": "KIT;KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "6qtDu7hVPF", "title": "Generative Reinforcement Learning with Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "In reinforcement learning, Transformers have been shown to be powerful models for multi-task policy distillation and, to a lesser extent, policy improvement via return interventions within frameworks such as Decision Transformers. These recent results are somewhat atypical for reinforcement learning, as they do not rely on the learning of a value function, which is usually at the heart of most traditional approaches. In this paper, we explore a principled approach to purely generative value function approximation with Transformers, opening the way for existing techniques to be applied for policy improvement. Importantly, unlike other RL methods, this generative approach allows us to kickstart the learning process by fine-tuning strong pretrained state predictors, such as foundation models, substantially shortening the training time. We showcase the potential of our approach by constructing an action-value function for chess that can play at the level of an expert human and over 400 Elo stronger than direct behavioural cloning.", "keywords": "reinforcement learning;transformers;policy evaluation;policy improvement;sequence modeling;compression", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/7260ce54541f8a6748700bcd1c93f6756cf663ce.zip", "author": "Gregoire Deletang;Anian Ruoss;Li Kevin Wenliang;Elliot Catt;Tim Genewein;Jordi Grau-Moya;Marcus Hutter;Joel Veness", "authorids": "~Gregoire_Deletang1;~Anian_Ruoss1;~Li_Kevin_Wenliang1;~Elliot_Catt1;~Tim_Genewein1;~Jordi_Grau-Moya2;~Marcus_Hutter1;~Joel_Veness2", "gender": ";M;;M;M;;;", "homepage": ";;https://kevin-w-li.github.io/;;http://tim.inversetemperature.net/;;http://www.hutter1.net/;", "dblp": ";259/2083;255/7009;204/2511;116/3039;116/3023;h/MarcusHutter;", "google_scholar": ";gFkwD3kAAAAJ;https://scholar.google.co.uk/citations?user=MW45NMEAAAAJ;d1JYeMIAAAAJ;https://scholar.google.de/citations?user=peNTK9oAAAAJ;;https://scholar.google.com.tw/citations?user=7hmCntEAAAAJ;", "orcid": ";;;0000-0001-9411-927X;;;0000-0002-3263-4097;", "linkedin": ";anian-ruoss;;;;jordi-g-9a1b02104;hutter1/;", "or_profile": "~Gregoire_Deletang1;~Anian_Ruoss1;~Li_Kevin_Wenliang1;~Elliot_Catt1;~Tim_Genewein1;~Jordi_Grau-Moya2;~Marcus_Hutter1;~Joel_Veness2", "aff": ";Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Australian National University;", "aff_domain": ";deepmind.com;deepmind.com;deepmind.com;google.com;deepmind.com;anu.edu.au;", "position": ";Researcher;Researcher;Researcher;Researcher;Researcher;Full Professor;", "bibtex": "@misc{\ndeletang2024generative,\ntitle={Generative Reinforcement Learning with Transformers},\nauthor={Gregoire Deletang and Anian Ruoss and Li Kevin Wenliang and Elliot Catt and Tim Genewein and Jordi Grau-Moya and Marcus Hutter and Joel Veness},\nyear={2024},\nurl={https://openreview.net/forum?id=6qtDu7hVPF}\n}", "github": "", "project": "", "reviewers": "izzx;A7j8;FxJd;MRfp;vuG2", "site": "https://openreview.net/forum?id=6qtDu7hVPF", "pdf_size": 1329677, "rating": "3;3;3;5;6", "confidence": "4;4;3;4;3", "soundness": "3;2;2;2;3", "contribution": "1;2;2;3;3", "presentation": "3;2;2;2;3", "wc_summary": "51;84;34;51;67", "wc_strengths": "44;28;49;20;183", "wc_weaknesses": "119;112;483;162;172", "wc_questions": "116;246;2;152;153", "wc_review": "330;470;568;385;575", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "190;301;347;275;191", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 57.4, 16.906803364326446 ], "wc_strengths_avg": [ 64.8, 60.02466159837971 ], "wc_weaknesses_avg": [ 209.6, 138.68035188879495 ], "wc_questions_avg": [ 133.8, 78.71060919596543 ], "wc_review_avg": [ 465.6, 97.32132345996945 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 260.8, 61.85919495111458 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3227486121839514, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JO8l9macY-wJ:scholar.google.com/&scioq=Generative+Reinforcement+Learning+with+Transformers&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Google;Australian National University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.anu.edu.au", "aff_unique_abbr": "DeepMind;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United Kingdom;Australia" }, { "id": "6r0BOIb771", "title": "Sequential Bayesian Continual Learning with Meta-Learned Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the present era of deep learning, continual learning research is mainly focused on mitigating forgetting when training a neural network with stochastic gradient descent (SGD) on a non-stationary stream of data.\nOn the other hand, there is a wealth of research on sequential learning in the more classical literature of statistical machine learning.\nMany models in this literature have sequential Bayesian update rules that yield the same learning outcome as the batch training, i.e., they are completely immune to catastrophic forgetting.\nHowever, they suffer from underfitting when modeling complex distributions due to their weak representational power.\nIn this work, we introduce a general meta-continual learning (MCL) framework that combines neural networks' strong representational power and simple statistical models' robustness to forgetting.\nIn our framework, continual learning takes place only in a statistical model in the embedding space via a sequential Bayesian update rule, while meta-learned neural networks bridge the raw data and the embedding space.\nSince our approach is domain-agnostic and model-agnostic, it can be applied to a wide range of problems and easily integrated with existing model architectures.\nCompared to SGD-based MCL methods, our approach demonstrates significantly improved performance and scalability.", "keywords": "sequential Bayes;meta-continual learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/97e27f89dc3297d102b54ba6f24c8d22f9e3a7d4.zip", "author": "Soochan Lee;Hyeonseong Jeon;Jaehyeon Son;Gunhee Kim", "authorids": "~Soochan_Lee1;~Hyeonseong_Jeon2;~Jaehyeon_Son1;~Gunhee_Kim1", "gender": "M;M;M;M", "homepage": "https://soochanlee.com;;https://jaehyeon-son.github.io/;http://vision.snu.ac.kr/gunhee/", "dblp": "230/1398;;359/3097.html;45/115", "google_scholar": "8O3MKJkAAAAJ;;q7SrBsgAAAAJ;https://scholar.google.co.kr/citations?user=CiSdOV0AAAAJ", "orcid": "0000-0002-1425-9262;;0009-0004-2726-1144;0000-0002-9543-7453", "linkedin": ";hs-jeon;jaehyeon-son-a626202b3/;", "or_profile": "~Soochan_Lee1;~Hyeonseong_Jeon2;~Jaehyeon_Son1;~Gunhee_Kim1", "aff": "Seoul National University;Seoul National University, Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;cse.snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;Undergrad student;Researcher;Full Professor", "bibtex": "@misc{\nlee2024sequential,\ntitle={Sequential Bayesian Continual Learning with Meta-Learned Neural Networks},\nauthor={Soochan Lee and Hyeonseong Jeon and Jaehyeon Son and Gunhee Kim},\nyear={2024},\nurl={https://openreview.net/forum?id=6r0BOIb771}\n}", "github": "", "project": "", "reviewers": "awoT;DFZf;y1kN", "site": "https://openreview.net/forum?id=6r0BOIb771", "pdf_size": 5128885, "rating": "5;5;6", "confidence": "2;4;4", "soundness": "3;3;3", "contribution": "2;2;3", "presentation": "3;1;3", "wc_summary": "43;68;342", "wc_strengths": "22;55;193", "wc_weaknesses": "59;345;327", "wc_questions": "33;30;123", "wc_review": "157;498;985", "wc_reply_reviewers": "0;300;341", "wc_reply_authors": "288;1554;1322", "reply_reviewers": "0;1;1", "reply_authors": "1;3;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 151.0, 135.44248471829903 ], "wc_strengths_avg": [ 90.0, 74.06753674856482 ], "wc_weaknesses_avg": [ 243.66666666666666, 130.78566010419064 ], "wc_questions_avg": [ 62.0, 43.15089802078283 ], "wc_review_avg": [ 546.6666666666666, 339.7767240736514 ], "wc_reply_reviewers_avg": [ 213.66666666666666, 152.0095026269374 ], "wc_reply_authors_avg": [ 1054.6666666666667, 550.326771614425 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7291363222291637272&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seoul", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "6rEcB9m9AI", "title": "Promoting Exploration in Memory-Augmented Adam using Critical Momenta", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adaptive gradient-based optimizers, particularly Adam, have left their mark in training large-scale deep learning models. The strength of such optimizers is that they exhibit fast convergence while being more robust to hyperparameter choice. However, they often generalize worse than non-adaptive methods. Recent studies have tied this performance gap to flat minima selection: adaptive methods tend to find solutions in sharper basins of the loss landscape, which in turn hurts generalization. To overcome this issue, we propose a new memory-augmented version of Adam that promotes {exploration} towards flatter minima by using a buffer of critical momentum terms during training. Intuitively, the use of the buffer makes the optimizer overshoot outside the basin of attraction if it is not wide enough. We empirically show that our method improves model performance on standard supervised and online learning tasks.", "keywords": "Adaptive optimization;deep learning;memory-augmented optimizers;momentum", "primary_area": "optimization", "supplementary_material": "", "author": "Pranshu Malviya;Goncalo Mordido;Aristide Baratin;Reza Babanezhad Harikandeh;Jerry Huang;Simon Lacoste-Julien;Razvan Pascanu;Sarath Chandar", "authorids": "~Pranshu_Malviya1;~Goncalo_Mordido1;~Aristide_Baratin1;~Reza_Babanezhad_Harikandeh1;~Jerry_Huang1;~Simon_Lacoste-Julien1;~Razvan_Pascanu1;~Sarath_Chandar1", "gender": "M;;;M;;M;M;M", "homepage": "https://pranshu28.github.io/about/;;;http://babanezhad.ca;;http://www.iro.umontreal.ca/~slacoste/;https://razp.info;http://sarathchandar.in/", "dblp": ";;;37/8904.html;;94/446.html;65/8368.html;45/8542", "google_scholar": ";;;KLrwPsgAAAAJ;;oejm5IUAAAAJ;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;https://scholar.google.co.in/citations?user=yxWtZLAAAAAJ", "orcid": ";;;;;0000-0001-6485-6180;;", "linkedin": "pranshumalviya2/;;;;;simon-lacoste-julien-355b9a3;;", "or_profile": "~Pranshu_Malviya1;~Goncalo_Mordido1;~Aristide_Baratin1;~Reza_Babanezhad_Harikandeh1;~Jerry_Huang1;~Simon_Lacoste-Julien1;~Razvan_Pascanu1;~Sarath_Chandar1", "aff": "\u00c9cole Polytechnique de Montr\u00e9al, Universit\u00e9 de Montr\u00e9al;;;Samsung;;Samsung - SAIT AI Lab, Montreal;Google DeepMind;\u00c9cole Polytechnique de Montr\u00e9al", "aff_domain": "polymtl.ca;;;samsung.com;;samsung.com;google.com;polymtl.ca", "position": "PhD student;;;Research Scientist;;VP Lab Director;Research Scientist;Assistant Professor", "bibtex": "@misc{\nmalviya2024promoting,\ntitle={Promoting Exploration in Memory-Augmented Adam using Critical Momenta},\nauthor={Pranshu Malviya and Goncalo Mordido and Aristide Baratin and Reza Babanezhad Harikandeh and Jerry Huang and Simon Lacoste-Julien and Razvan Pascanu and Sarath Chandar},\nyear={2024},\nurl={https://openreview.net/forum?id=6rEcB9m9AI}\n}", "github": "", "project": "", "reviewers": "qx25;V8UZ;Jwku;GnW1", "site": "https://openreview.net/forum?id=6rEcB9m9AI", "pdf_size": 5510489, "rating": "3;5;5;6", "confidence": "4;4;4;1", "soundness": "2;3;2;3", "contribution": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "106;68;65;80", "wc_strengths": "72;36;22;53", "wc_weaknesses": "167;154;167;16", "wc_questions": "127;105;2;25", "wc_review": "472;363;256;174", "wc_reply_reviewers": "0;0;0;44", "wc_reply_authors": "705;456;733;201", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.75, 16.161296358893985 ], "wc_strengths_avg": [ 45.75, 18.713297411199342 ], "wc_weaknesses_avg": [ 126.0, 63.72989879169745 ], "wc_questions_avg": [ 64.75, 52.470825226977325 ], "wc_review_avg": [ 316.25, 112.14806061631204 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 523.75, 215.2874531875929 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17004626632720092258&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "\u00c9cole Polytechnique de Montr\u00e9al;Samsung;Google", "aff_unique_dep": ";Samsung;Google DeepMind", "aff_unique_url": "https://www.polymtl.ca;https://www.samsung.com;https://deepmind.com", "aff_unique_abbr": "Polytechnique Montr\u00e9al;Samsung;DeepMind", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Montr\u00e9al;;Montreal", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "Canada;South Korea;United Kingdom" }, { "id": "6rvliexcMV", "title": "Emergent representations in networks trained with the Forward-Forward algorithm", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The Backpropagation algorithm, widely used to train neural networks, has often been criticised for its lack of biological realism. In an attempt to find a more biologically plausible alternative, and avoid to back-propagate gradients in favour of using local learning rules, the recently introduced Forward-Forward algorithm replaces the traditional forward and backward passes of Backpropagation with two forward passes. In this work, we show that internal representations obtained with the Forward-Forward algorithm can organize into robust, category-specific ensembles, composed by an extremely low number of active units (high sparsity). This situation is reminiscent of what has been observed in cortical sensory areas, where neuronal ensembles are suggested to serve as the functional building blocks for perception and action. Interestingly, while these ensembles do not typically arise in models trained with standard Backpropagation, they can manifest in networks optimized by Backpropagation, given the same training objective as that of the Forward-Forward algorithm. These findings suggest that the learning procedure proposed by Forward-Forward may surpass Backpropagation in its capacity to model learning in the cortex, even when a backward pass is used, and may inspire new approaches to compare representations in biological and artificial neural networks.", "keywords": "Forward-Forward;Representations;Sensory cortex;Ensembles;Sparsity;Backpropagation", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Niccolo Tosato;Lorenzo Basile;Emanuele Ballarin;Giuseppe De Alteriis;Alberto Cazzaniga;Alessio ansuini", "authorids": "~Niccolo_Tosato1;~Lorenzo_Basile1;~Emanuele_Ballarin1;~Giuseppe_De_Alteriis1;~Alberto_Cazzaniga1;~Alessio_ansuini1", "gender": "M;;M;M;M;M", "homepage": ";;https://ballarin.cc/;;https://areasciencepark-rit.gitlab.io/lade/alberto.cazzaniga/;", "dblp": ";348/5790;348/6393;;339/6443;232/2196", "google_scholar": ";EUE33IQAAAAJ;https://scholar.google.com/citations?hl=en;Nm1c1QkAAAAJ;AmafJqIAAAAJ;6lhdu6kAAAAJ", "orcid": ";;0000-0003-3673-0665;0000-0003-3116-6006;0000-0001-6271-3303;0000-0002-3117-3532", "linkedin": "https://linkedin.com/in/niccolo-tosato;lorebasile/;emaballarin;giuseppe-de-alteriis-664875253/;alberto-cazzaniga-4155b6164/;alessioansuini/", "or_profile": "~Niccolo_Tosato1;~Lorenzo_Basile1;~Emanuele_Ballarin1;~Giuseppe_De_Alteriis1;~Alberto_Cazzaniga1;~Alessio_ansuini1", "aff": "University of Trieste;University of Trieste;University of Trieste;King's College London, University of London;AREA Science Park;AREA Science Park", "aff_domain": "units.it;units.it;units.it;kcl.ac.uk;areasciencepark.it;areasciencepark.it", "position": "MS student;PhD student;PhD student;PhD student;Researcher;Researcher", "bibtex": "@misc{\ntosato2024emergent,\ntitle={Emergent representations in networks trained with the Forward-Forward algorithm},\nauthor={Niccolo Tosato and Lorenzo Basile and Emanuele Ballarin and Giuseppe De Alteriis and Alberto Cazzaniga and Alessio ansuini},\nyear={2024},\nurl={https://openreview.net/forum?id=6rvliexcMV}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=6rvliexcMV", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12816113402618674086&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;2;2", "aff_unique_norm": "University of Trieste;King's College London;Area Science Park", "aff_unique_dep": ";;", "aff_unique_url": "https://www.units.it;https://www.kcl.ac.uk;https://www.area-science-park.org/", "aff_unique_abbr": "UniTS;KCL;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "Italy;United Kingdom" }, { "id": "6sfRRcynDy", "title": "Out-of-Distribution Detection with Hyperspherical Energy", "track": "main", "status": "Reject", "tldr": "", "abstract": "The ability to detect if inputs are out-of-distribution (OOD) is essential to guarantee the reliability and safety of machine learning models that are deployed in an open environment. Recent studies have shown that an energy-based score is effective. However, unconstrained energy scores from a model trained with cross-entropy loss may not necessarily reflect the log-likelihood. To address this limitation, we introduce a novel hyperspherical energy score that connects energy with hyperspherical representations. By modeling hyperspherical representations using von Mises-Fisher distribution, our method provides a more accurate interpretation from a log-likelihood perspective, making it an efficient OOD detection indicator. Our method consistently achieves competitive performance on popular OOD detection benchmarks. On the large-scale ImageNet-1k benchmark, our method is more than 10 times faster than the KNN-based score, while simultaneously reducing the average FPR95 by 11.85%.", "keywords": "Hyperspherical energy;model reliability", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/b9389ef50e67203a371ee55824c200af143cbea4.pdf", "author": "Jirayu Burapacheep;Yixuan Li", "authorids": "~Jirayu_Burapacheep1;~Yixuan_Li1", "gender": ";F", "homepage": "https://top34051.github.io/;http://pages.cs.wisc.edu/~sharonli/", "dblp": "330/2284;144/6087-1", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";liyixuan", "or_profile": "~Jirayu_Burapacheep1;~Yixuan_Li1", "aff": "Stanford University;Cornell University", "aff_domain": "stanford.edu;cornell.edu", "position": "MS student;Graduate Student", "bibtex": "@misc{\nburapacheep2024outofdistribution,\ntitle={Out-of-Distribution Detection with Hyperspherical Energy},\nauthor={Jirayu Burapacheep and Yixuan Li},\nyear={2024},\nurl={https://openreview.net/forum?id=6sfRRcynDy}\n}", "github": "", "project": "", "reviewers": "A9ru;aQu1;uYkE;fiAA", "site": "https://openreview.net/forum?id=6sfRRcynDy", "pdf_size": 2597262, "rating": "3;5;5;6", "confidence": "5;3;3;4", "soundness": "1;3;3;3", "contribution": "1;2;2;2", "presentation": "2;3;2;4", "wc_summary": "88;148;98;68", "wc_strengths": "23;49;31;38", "wc_weaknesses": "516;171;126;128", "wc_questions": "262;276;299;127", "wc_review": "889;644;554;361", "wc_reply_reviewers": "123;20;93;38", "wc_reply_authors": "705;749;666;551", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 100.5, 29.47456530637899 ], "wc_strengths_avg": [ 35.25, 9.54921462739214 ], "wc_weaknesses_avg": [ 235.25, 163.0849088665165 ], "wc_questions_avg": [ 241.0, 67.1304699819687 ], "wc_review_avg": [ 612.0, 189.81438301667237 ], "wc_reply_reviewers_avg": [ 68.5, 41.391424232562954 ], "wc_reply_authors_avg": [ 667.75, 73.52338063500616 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6225430174794673, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GM_vH_bqG4oJ:scholar.google.com/&scioq=Out-of-Distribution+Detection+with+Hyperspherical+Energy&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.cornell.edu", "aff_unique_abbr": "Stanford;Cornell", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "6ssOs9BBxa", "title": "A Competition Winning Deep Reinforcement Learning Agent in microRTS", "track": "main", "status": "Reject", "tldr": "", "abstract": "Scripted agents have predominantly won the five\nprevious iterations of the IEEE microRTS ($\\mu$RTS) competitions hosted at CIG and\nCoG. Despite Deep Reinforcement Learning (DRL) algorithms making significant strides\nin real-time strategy (RTS) games, their adoption in this primarily academic\ncompetition has been limited due to the considerable training resources required and the complexity\ninherent in creating and debugging such agents. \\agentName\\ is the first DRL agent\nto win the IEEE microRTS competition. In a benchmark without performance\nconstraints, \\agentName\\ regularly defeated the two\nprior competition winners. This first competition-winning DRL submission can be\na benchmark for future microRTS competitions and a starting point for future DRL\nresearch. Iteratively fine-tuning the base policy and transfer learning to specific maps were \ncritical to \\agentName's winning performance. These strategies can be used in\neconomically training future DRL agents. Further work in Imitation Learning using Behavior Cloning and\nfine-tuning these models with DRL has proven promising as an efficient way\nto bootstrap models with novel behaviors.", "keywords": "reinforcement learning;microRTS;PPO;RTS;imitation learning;behavior cloning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/53aa2a4210ecec81e72e23dc3c3aed3e34c76bb1.zip", "author": "Scott Goodfriend", "authorids": "~Scott_Goodfriend1", "gender": "", "homepage": "", "dblp": "125/2125", "google_scholar": "HwatFwIAAAAJ", "orcid": "", "linkedin": "scottgoodfriend/", "or_profile": "~Scott_Goodfriend1", "aff": "Anthropic", "aff_domain": "anthropic.com", "position": "Researcher", "bibtex": "@misc{\ngoodfriend2024a,\ntitle={A Competition Winning Deep Reinforcement Learning Agent in micro{RTS}},\nauthor={Scott Goodfriend},\nyear={2024},\nurl={https://openreview.net/forum?id=6ssOs9BBxa}\n}", "github": "", "project": "", "reviewers": "Pjfo;7eJX;nGmd;AVMG", "site": "https://openreview.net/forum?id=6ssOs9BBxa", "pdf_size": 1009225, "rating": "3;5;5;6", "confidence": "4;4;4;5", "soundness": "2;4;3;3", "contribution": "2;2;3;2", "presentation": "2;3;2;3", "wc_summary": "107;118;66;103", "wc_strengths": "48;39;107;61", "wc_weaknesses": "31;118;134;119", "wc_questions": "46;32;64;118", "wc_review": "232;307;371;401", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "130;150;355;125", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 98.5, 19.551214796017153 ], "wc_strengths_avg": [ 63.75, 26.166533969939543 ], "wc_weaknesses_avg": [ 100.5, 40.623269193899205 ], "wc_questions_avg": [ 65.0, 32.63433774416144 ], "wc_review_avg": [ 327.75, 64.87439787774527 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 190.0, 95.72094859538323 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5650866779967715802&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Anthropic", "aff_unique_dep": "", "aff_unique_url": "https://www.anthropic.com", "aff_unique_abbr": "Anthropic", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "6t8SUcA4sI", "title": "Ratio-Residual Diffusion Model for Image Restoration", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Most existing diffusion-based image restoration methods suffer from poor interpretability and inefficient sampling, due to their direct incorporation of degraded images as conditions within the original diffusion models. Recently, some researches have tried to build a new diffusion model by transferring the discrepancies between degraded and clear images, however, they cannot effectively model diverse degradation. To address these issues, we propose a universal diffusion model for image restoration that can cover different types of degradation. Specifically, our method consists of a Markov chain that convert a high-quality image to its low-quality counterpart. The transition kernel of this Markov chain is constructed through the ratio and residual between the high-quality and low-quality images, which provides a general expression that can effectively handle various degradation processes. Moreover, we analyze the characteristics of different degradation, and design a mean schedule that enables flexible control over the diffusion speed pertaining to different degradation, which yields better restoration performance. Extensive experiments have demonstrate that our method surpasses existing image restoration methods and achieves superior performance on multiple image restoration tasks, including deraining, dehazing, denoising, deblurring and low-light enhancement.", "keywords": "Diffusion Model;Image Restoration", "primary_area": "generative models", "supplementary_material": "", "author": "Zizheng Yang;Jie Huang;Hu Yu;Man Zhou;Bing Li;Feng Zhao", "authorids": "~Zizheng_Yang1;~Jie_Huang4;~Hu_Yu2;~Man_Zhou4;~Bing_Li16;~Feng_Zhao6", "gender": "M;M;M;M;M;M", "homepage": ";;https://yuhuustc.github.io/;https://www.ustc.edu.cn/;https://bivlab123.github.io/;", "dblp": "308/0448;;;;181/2734-4;165/8237", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=zh-CN;;https://scholar.google.co.uk/citations?hl=en;", "orcid": ";0000-0002-3518-3404;0000-0003-0598-8989;;0000-0001-6767-8105;0000-0003-2872-605X", "linkedin": ";;;;;", "or_profile": "~Zizheng_Yang1;~Jie_Huang4;~Hu_Yu2;~Bing_Li16;~Feng_Zhao6;~man_zhou1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;PhD student;PhD student;MS student;Full Professor;Postdoc", "bibtex": "@misc{\nyang2024ratioresidual,\ntitle={Ratio-Residual Diffusion Model for Image Restoration},\nauthor={Zizheng Yang and Jie Huang and Hu Yu and Man Zhou and Bing Li and Feng Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=6t8SUcA4sI}\n}", "github": "", "project": "", "reviewers": "Cv16;RMrE;xayc", "site": "https://openreview.net/forum?id=6t8SUcA4sI", "pdf_size": 8034218, "rating": "3;5;8", "confidence": "4;2;4", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "2;3;3", "wc_summary": "86;91;49", "wc_strengths": "38;41;39", "wc_weaknesses": "97;269;57", "wc_questions": "220;5;27", "wc_review": "441;406;172", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 75.33333333333333, 18.732028424302822 ], "wc_strengths_avg": [ 39.333333333333336, 1.247219128924647 ], "wc_weaknesses_avg": [ 141.0, 91.97100992522951 ], "wc_questions_avg": [ 84.0, 96.58502299356078 ], "wc_review_avg": [ 339.6666666666667, 119.4161723646434 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.11470786693528094, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J1TmWKB0T4UJ:scholar.google.com/&scioq=Ratio-Residual+Diffusion+Model+for+Image+Restoration&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6tDPefQyvB", "title": "Rotation-Equivariance and Position Encodings for Enhancing Local Descriptors", "track": "main", "status": "Reject", "tldr": "", "abstract": "Keypoint extraction and description are crucial issues in robot vision. In recent years, deep learning based keypoint extraction have exhibited robustness to variations in lighting and viewpoint. However, due to the lack of rotational invariance in traditional convolutional networks, performance of deep learning-based keypoint significantly deteriorates under large rotations. Group-equivariant neural networks based Keypoint address the issue of rotational equivariance, but their overall performance also suffers. This paper addresses the problem from the perspective of keypoint description and proposes a fusion of locally rotation-equivariant descriptions with globally encoded positional information and a directional uncertainty weighted descriptor loss. This effectively enhances the performance of keypoint extraction and description. Validation is conducted on rotated-HPatches, rotated-MegaDepth and rotated-YFCC100M datasets.", "keywords": "Rotation equivariance;keypoint extraction", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Shuai Su;Qijun Chen", "authorids": "~Shuai_Su1;~Qijun_Chen2", "gender": "M;M", "homepage": ";http://rail.tongji.edu.cn", "dblp": ";75", "google_scholar": "rQRbFegAAAAJ;", "orcid": ";0000-0001-5644-1188", "linkedin": ";", "or_profile": "~Shuai_Su1;~Qijun_Chen2", "aff": "Tongji University;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@misc{\nsu2024rotationequivariance,\ntitle={Rotation-Equivariance and Position Encodings for Enhancing Local Descriptors},\nauthor={Shuai Su and Qijun Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=6tDPefQyvB}\n}", "github": "", "project": "", "reviewers": "QDcf;rhbE;FmdE;JePm", "site": "https://openreview.net/forum?id=6tDPefQyvB", "pdf_size": 3654861, "rating": "3;5;6;6", "confidence": "5;5;3;4", "soundness": "2;3;3;3", "contribution": "2;3;3;2", "presentation": "2;2;3;2", "wc_summary": "64;80;53;41", "wc_strengths": "78;73;57;57", "wc_weaknesses": "215;309;69;192", "wc_questions": "152;9;14;33", "wc_review": "509;471;193;323", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "268;82;103;68", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 59.5, 14.361406616345072 ], "wc_strengths_avg": [ 66.25, 9.41740410091868 ], "wc_weaknesses_avg": [ 196.25, 85.5493278757934 ], "wc_questions_avg": [ 52.0, 58.42516581063335 ], "wc_review_avg": [ 374.0, 125.49501982150527 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 130.25, 80.49961180030621 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7385489458759963, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:H0YALXUc6JUJ:scholar.google.com/&scioq=Rotation-Equivariance+and+Position+Encodings+for+Enhancing+Local+Descriptors&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Tongji University", "aff_unique_dep": "", "aff_unique_url": "https://www.tongji.edu.cn", "aff_unique_abbr": "Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "6tK0ayRF8H", "title": "Angle-optimized Text Embeddings", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "High-quality text embedding is pivotal in improving semantic textual similarity (STS) tasks, which are crucial components in Large Language Model (LLM) applications. However, a common challenge existing text embedding models face is the problem of vanishing gradients, primarily due to their reliance on the cosine function in the optimization objective, which has saturation zones. To address this issue, this paper proposes a novel angle-optimized text embedding model called AnglE. The core idea of AnglE is to introduce angle optimization in a complex space. This novel approach effectively mitigates the adverse effects of the saturation zone in the cosine function, which can impede gradient and hinder optimization processes. To set up a comprehensive STS evaluation, we experimented on existing short-text STS datasets and a newly collected long-text STS dataset from GitHub Issues. Furthermore, we examine domain-specific STS scenarios with limited labeled data and explore how AnglE works with LLM-annotated data. Extensive experiments were conducted on various tasks including short-text STS, long-text STS, and domain-specific STS tasks. The results show that AnglE outperforms the state-of-the-art (SOTA) STS models that ignore the cosine saturation zone. These findings demonstrate the ability of AnglE to generate high-quality text embeddings and the usefulness of angle optimization in STS.", "keywords": "NLP;Text Embedding;Semantic Textual Similarity", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/109e890770ab9fc45789a2d0e3fa935ad038e519.zip", "author": "Xianming LI;Jing Li", "authorids": "~Xianming_LI1;~Jing_Li18", "gender": "M;F", "homepage": ";http://www4.comp.polyu.edu.hk/~jing1li/", "dblp": "175/5398.html;181/2820-49", "google_scholar": "WwCp3OcAAAAJ;jvjOLx4AAAAJ", "orcid": "0009-0009-2610-7934;0000-0002-8044-2284", "linkedin": ";jing-li-b815b7a5/", "or_profile": "~Xianming_LI1;~Jing_Li18", "aff": "Hong Kong Polytechnic University;The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nli2024angleoptimized,\ntitle={Angle-optimized Text Embeddings},\nauthor={Xianming LI and Jing Li},\nyear={2024},\nurl={https://openreview.net/forum?id=6tK0ayRF8H}\n}", "github": "", "project": "", "reviewers": "7GFK;NPkC;3nS6;DPjM", "site": "https://openreview.net/forum?id=6tK0ayRF8H", "pdf_size": 677126, "rating": "5;5;5;6", "confidence": "4;4;3;3", "soundness": "3;2;2;3", "contribution": "2;2;2;3", "presentation": "3;3;2;2", "wc_summary": "80;56;72;80", "wc_strengths": "50;62;26;57", "wc_weaknesses": "153;139;106;166", "wc_questions": "28;1;56;15", "wc_review": "311;258;260;318", "wc_reply_reviewers": "0;0;18;27", "wc_reply_authors": "1112;938;995;880", "reply_reviewers": "0;0;1;1", "reply_authors": "4;3;3;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.0, 9.797958971132712 ], "wc_strengths_avg": [ 48.75, 13.808964479641476 ], "wc_weaknesses_avg": [ 141.0, 22.34949663862701 ], "wc_questions_avg": [ 25.0, 20.285462775100793 ], "wc_review_avg": [ 286.75, 27.8691137282835 ], "wc_reply_reviewers_avg": [ 11.25, 11.691342951089922 ], "wc_reply_authors_avg": [ 981.25, 85.74198213244198 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 381, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9499271582100007919&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "6tazBqPem3", "title": "Capacity Analysis of Vector Symbolic Architectures", "track": "main", "status": "Reject", "tldr": "", "abstract": "Hyperdimensional computing (HDC) is a biologically-inspired framework which represents symbols with high-dimensional vectors, and uses vector operations to manipulate them. The ensemble of a particular vector space and a prescribed set of vector operations (e.g., addition-like for \"bundling\" and outer-product-like for \"binding\") form a vector symbolic architecture (VSA). While VSAs have been employed in numerous learning applications and have been studied empirically, many theoretical questions about VSAs remain open. In this paper, we analyze the representation capacities of four common VSAs: MAP-I, MAP-B, and two VSAs based on sparse binary vectors. \"Representation capacity\" here refers to bounds on the dimensions of the VSA vectors required to perform certain symbolic tasks, such as testing for set membership and estimating set intersection sizes for two sets of symbols, to a given degree of accuracy. We also analyze the ability of a novel variant of a Hopfield network (a simple model of associative memory) to perform some of the same tasks that are typically asked of VSAs. In addition to providing new bounds on VSA capacities, our analyses establish and leverage connections between VSAs, \"sketching\" (dimensionality reduction) algorithms, and Bloom filters.", "keywords": "Hyperdimensional computing;Vector Symbolic Architectures;representation learning;sketching;dimensionality reduction", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Kenneth L. Clarkson;Shashanka Ubaru;Elizabeth Yang", "authorids": "~Kenneth_L._Clarkson1;~Shashanka_Ubaru1;elizabeth_yang@berkeley.edu", "gender": "M;M;", "homepage": "http://researcher.watson.ibm.com/researcher/view.php?person=us-klclarks;http://shashankaubaru.github.io/;", "dblp": "89/2783;164/7307;", "google_scholar": "https://scholar.google.com/citations?hl=en;NmhyylsAAAAJ;", "orcid": "0000-0002-2880-2465;;", "linkedin": ";;", "or_profile": "~Kenneth_L._Clarkson1;~Shashanka_Ubaru1;elizabeth_yang@berkeley.edu", "aff": "International Business Machines;International Business Machines;", "aff_domain": "ibm.com;ibm.com;", "position": "Research Staff Member;Researcher;", "bibtex": "@misc{\nclarkson2024capacity,\ntitle={Capacity Analysis of Vector Symbolic Architectures},\nauthor={Kenneth L. Clarkson and Shashanka Ubaru and Elizabeth Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=6tazBqPem3}\n}", "github": "", "project": "", "reviewers": "29HD;tEi4;q9j4", "site": "https://openreview.net/forum?id=6tazBqPem3", "pdf_size": 511602, "rating": "3;3;5", "confidence": "2;2;3", "soundness": "3;2;3", "contribution": "2;1;2", "presentation": "2;2;1", "wc_summary": "55;31;88", "wc_strengths": "46;16;62", "wc_weaknesses": "314;487;240", "wc_questions": "43;46;107", "wc_review": "458;580;497", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 58.0, 23.366642891095847 ], "wc_strengths_avg": [ 41.333333333333336, 19.067132861433457 ], "wc_weaknesses_avg": [ 347.0, 103.50201286287464 ], "wc_questions_avg": [ 65.33333333333333, 29.48822740612863 ], "wc_review_avg": [ 511.6666666666667, 50.87457343528516 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10289256151536939400&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards Establishing Guaranteed Error for Learned Database Operations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19380", "id": "6tqgL8VluV", "author_site": "Sepanta Zeighami, Cyrus Shahabi", "tldr": "", "abstract": "Machine learning models have demonstrated substantial performance enhancements over non-learned alternatives in various fundamental data management operations, including indexing (locating items in an array), cardinality estimation (estimating the number of matching records in a database), and range-sum estimation (estimating aggregate attribute values for query-matched records). However, real-world systems frequently favor less efficient non-learned methods due to their ability to offer (worst-case) error guarantees \u2014 an aspect where learned approaches often fall short. The primary objective of these guarantees is to ensure system reliability, ensuring that the chosen approach consistently delivers the desired level of accuracy across all databases. In this paper, we embark on the first theoretical study of such guarantees for learned methods, presenting the necessary conditions for such guarantees to hold when using machine learning to perform indexing, cardinality estimation and range-sum estimation. Specifically, we present the first known lower bounds on the model size required to achieve the desired accuracy for these three key database operations. Our results bound the required model size for given average and worst-case errors in performing database operations, serving as the first theoretical guidelines governing how model size must change based on data size to be able to guarantee an accuracy level. More broadly, our established guarantees pave the way for the broader adoption and integration of learned models into real-world systems.", "keywords": "Learned Indexing;Learned Cardinality Estimation;Machine learning for Data Management", "primary_area": "learning theory", "supplementary_material": "", "author": "Sepanta Zeighami;Cyrus Shahabi", "authorids": "~Sepanta_Zeighami2;~Cyrus_Shahabi1", "gender": "M;M", "homepage": "https://szeighami.github.io/;https://infolab.usc.edu/Shahabi/", "dblp": ";s/CyrusShahabi", "google_scholar": ";jEdhxGMAAAAJ", "orcid": ";0000-0001-9118-0681", "linkedin": ";cyrus-shahabi-9791256b/", "or_profile": "~Sepanta_Zeighami2;~Cyrus_Shahabi1", "aff": "University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzeighami2024towards,\ntitle={Towards Establishing Guaranteed Error for Learned Database Operations},\nauthor={Sepanta Zeighami and Cyrus Shahabi},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6tqgL8VluV}\n}", "github": "", "project": "", "reviewers": "Px8G;q6av;EAu4;Aapp", "pdf_size": 415907, "rating": "3;5;8;8", "confidence": "3;4;2;3", "soundness": "2;2;3;3", "contribution": "2;2;4;3", "presentation": "3;3;2;3", "wc_summary": "67;52;86;83", "wc_strengths": "30;39;124;91", "wc_weaknesses": "211;326;252;94", "wc_questions": "81;66;215;39", "wc_review": "389;483;677;307", "wc_reply_reviewers": "0;123;0;0", "wc_reply_authors": "1425;1393;553;546", "reply_reviewers": "0;2;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 13.619838471876236 ], "wc_strengths_avg": [ 71.0, 38.451267859460756 ], "wc_weaknesses_avg": [ 220.75, 83.98623399105355 ], "wc_questions_avg": [ 100.25, 67.93885118251559 ], "wc_review_avg": [ 464.0, 137.84411485442533 ], "wc_reply_reviewers_avg": [ 30.75, 53.26056233274298 ], "wc_reply_authors_avg": [ 979.25, 429.9060217070703 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5000000000000001, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6119200784656701599&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6tqgL8VluV", "pdf": "https://openreview.net/pdf?id=6tqgL8VluV", "email": "usc.edu;usc.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "6u6GjS0vKZ", "title": "Coloring Deep CNN Layers with Activation Hue Loss", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper proposes a novel hue-like angular parameter to model the structure of deep convolutional neural network (CNN) activation space, referred to as the activation hue, for the purpose of regularizing models for more effective learning. The activation hue generalizes the notion of color hue angle in standard 3-channel RGB intensity space to $N$-channel activation space. A series of observations based on nearest neighbor indexing of activation vectors with pre-trained networks indicate that class-informative activations are concentrated about an angle $\\theta$ in both the $(x,y)$ image plane and in multi-channel activation space. A regularization term in the form of hue-like angular $\\theta$ labels is proposed to complement standard one-hot loss. Training from scratch using combined one-hot + activation hue loss improves classification performance modestly for a wide variety of classification tasks, including ImageNet.", "keywords": "supervised representation learning;general machine learning;representation learning for computer vision;visualization", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Louis-Fran\u00e7ois Bouchard;Mohsen Ben Lazreg;Matthew Toews", "authorids": "~Louis-Fran\u00e7ois_Bouchard1;~Mohsen_Ben_Lazreg1;~Matthew_Toews4", "gender": "M;M;", "homepage": "https://www.louisbouchard.ai;;http://www.matthewtoews.com", "dblp": ";;54/2036", "google_scholar": "45O9knIAAAAJ;;https://scholar.google.ca/citations?user=CebI4YoAAAAJ", "orcid": ";;", "linkedin": "whats-ai/;mohsen-ben-lazreg/;", "or_profile": "~Louis-Fran\u00e7ois_Bouchard1;~Mohsen_Ben_Lazreg1;~Matthew_Toews4", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec", "aff_domain": "mila.umontreal.ca;;etsmtl.ca", "position": "PhD student;;Full Professor", "bibtex": "@misc{\nbouchard2024coloring,\ntitle={Coloring Deep {CNN} Layers with Activation Hue Loss},\nauthor={Louis-Fran{\\c{c}}ois Bouchard and Mohsen Ben Lazreg and Matthew Toews},\nyear={2024},\nurl={https://openreview.net/forum?id=6u6GjS0vKZ}\n}", "github": "", "project": "", "reviewers": "1Awe;UKJD;2Rz4;Kfzo", "site": "https://openreview.net/forum?id=6u6GjS0vKZ", "pdf_size": 8488315, "rating": "3;3;5;6", "confidence": "4;4;3;3", "soundness": "2;1;3;3", "contribution": "2;1;2;3", "presentation": "2;2;2;3", "wc_summary": "33;336;116;41", "wc_strengths": "27;2;82;31", "wc_weaknesses": "251;2;129;71", "wc_questions": "150;2;49;4", "wc_review": "461;342;376;147", "wc_reply_reviewers": "0;178;37;18", "wc_reply_authors": "705;526;298;223", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 131.5, 122.42650856738503 ], "wc_strengths_avg": [ 35.5, 29.055980451535273 ], "wc_weaknesses_avg": [ 113.25, 91.35747095886576 ], "wc_questions_avg": [ 51.25, 60.030721301680195 ], "wc_review_avg": [ 331.5, 115.00108695138495 ], "wc_reply_reviewers_avg": [ 58.25, 70.36467508629596 ], "wc_reply_authors_avg": [ 438.0, 190.30107724340397 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:L3_m4Sk9v_gJ:scholar.google.com/&scioq=Coloring+Deep+CNN+Layers+with+Activation+Hue+Loss&hl=en&as_sdt=0,6", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Montreal;Universit\u00e9 du Qu\u00e9bec", "aff_unique_dep": "Montreal Institute for Learning Algorithms;", "aff_unique_url": "https://www.mila.quebec;https://www.etsmtl.ca", "aff_unique_abbr": "MILA;ETS", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Montreal;\u00c9cole de technologie sup\u00e9rieure", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "6uUmpPvqUU", "title": "The Closeness of In-Context Learning and Weight Shifting for Softmax Regression", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) are known for their exceptional performance in natural language processing, making them highly effective in many human life-related tasks. The attention mechanism in the Transformer architecture is a critical component of LLMs, as it allows the model to selectively focus on specific input parts. The softmax unit, which is a key part of the attention mechanism, normalizes the attention scores. Hence, the performance of LLMs in various NLP tasks depends significantly on the crucial role played by the attention mechanism with the softmax unit. \n\nIn-context learning is one of the celebrated abilities of recent LLMs. Without further parameter updates, Transformers can learn to predict based on few in-context examples. However, the reason why Transformers becomes in-context learners is not well understood. Recently, in-context learning has been studied from a mathematical perspective with simplified linear self-attention without softmax unit. Based on a linear regression formulation $ \\min_x \\| Ax - b \\|_2 $,\nexisting works show linear Transformers' capability of learning linear functions in context. The capability of Transformers with softmax unit approaching full Transformers, however, remains unexplored. \n\nIn this work, we study the in-context learning based on a softmax regression formulation $ \\min_{x} \\| \\langle \\exp(Ax), {\\bf 1}_n \\rangle^{-1} \\exp(Ax) - b \\|_2 $. We show the upper bounds of the data transformations induced by a single self-attention layer with softmax unit and by gradient-descent on a $ \\ell_2 $ regression loss for softmax prediction function. Our theoretical results imply that when training self-attention-only Transformers for fundamental regression tasks, the models learned by gradient-descent and Transformers show great similarity.", "keywords": "In-Context Learning;Softmax Regression;Attention Computation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Shuai Li;Zhao Song;Yu Xia;Tong Yu;Tianyi Zhou", "authorids": "~Shuai_Li3;~Zhao_Song3;~Yu_Xia9;~Tong_Yu3;~Tianyi_Zhou4", "gender": "F;M;M;;", "homepage": "http://shuaili8.github.io;https://www.youtube.com/@zhaosong2031;https://andree-9.github.io/;https://www.linkedin.com/in/tong-yu-42790744;", "dblp": "57/2281-10;76/4051-2;28/4326-7;32/1593-1;", "google_scholar": "https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ;yDZct7UAAAAJ;sTVqEUMAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;0000-0002-5991-2050;", "linkedin": ";;;tong-yu-42790744;", "or_profile": "~Shuai_Li3;~Zhao_Song3;~Yu_Xia9;~Tong_Yu3;~Tianyi_Zhou4", "aff": "John Hopcroft Center, Shanghai Jiao Tong University;Adobe;University of Michigan;Adobe Research;", "aff_domain": "sjtu.edu.cn;adobe.com;umich.edu;adobe.com;", "position": "Assistant Professor;Researcher;MS student;Senior Research Scientist;", "bibtex": "@misc{\nli2024the,\ntitle={The Closeness of In-Context Learning and Weight Shifting for Softmax Regression},\nauthor={Shuai Li and Zhao Song and Yu Xia and Tong Yu and Tianyi Zhou},\nyear={2024},\nurl={https://openreview.net/forum?id=6uUmpPvqUU}\n}", "github": "", "project": "", "reviewers": "ahEK;f4vr;QA3Q", "site": "https://openreview.net/forum?id=6uUmpPvqUU", "pdf_size": 553034, "rating": "5;5;5", "confidence": "2;2;2", "soundness": "2;2;3", "contribution": "2;2;2", "presentation": "1;1;2", "wc_summary": "82;66;249", "wc_strengths": "77;18;11", "wc_weaknesses": "31;75;104", "wc_questions": "204;29;3", "wc_review": "394;188;367", "wc_reply_reviewers": "15;6;0", "wc_reply_authors": "387;268;236", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 2.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 132.33333333333334, 82.75398614098431 ], "wc_strengths_avg": [ 35.333333333333336, 29.601051032391098 ], "wc_weaknesses_avg": [ 70.0, 30.011109054259666 ], "wc_questions_avg": [ 78.66666666666667, 89.25743044076997 ], "wc_review_avg": [ 316.3333333333333, 91.41237455739908 ], "wc_reply_reviewers_avg": [ 7.0, 6.164414002968976 ], "wc_reply_authors_avg": [ 297.0, 64.96665811527222 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1635458034096028391&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Shanghai Jiao Tong University;Adobe;University of Michigan", "aff_unique_dep": "John Hopcroft Center;Adobe Inc.;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.adobe.com;https://www.umich.edu", "aff_unique_abbr": "SJTU;Adobe;UM", "aff_campus_unique_index": "0", "aff_campus_unique": "Shanghai;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;United States" }, { "id": "6ujgouOiAA", "title": "Use Your INSTINCT: INSTruction optimization usIng Neural bandits Coupled with Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) have shown remarkable instruction-following capabilities and achieved impressive performances in various applications. However, the performances of LLMs depend heavily on the instructions given to them, which are typically manually tuned with substantial human efforts. Recent work has used the query-efficient Bayesian optimization (BO) algorithm to automatically optimize the instructions given to black-box LLMs. However, BO usually falls short when optimizing highly sophisticated (e.g., high-dimensional) objective functions, such as the functions mapping an instruction to the performance of an LLM. This is mainly due to the limited expressive power of the Gaussian process (GP) model which is used by BO as a surrogate to model the objective function. Meanwhile, it has been repeatedly shown that neural networks (NNs), especially pre-trained transformers, possess strong expressive power and can model highly complex functions. So, we adopt a neural bandit algorithm which replaces the GP in BO by an NN surrogate to optimize instructions for black-box LLMs. More importantly, the neural bandit algorithm allows us to naturally couple the NN surrogate with the hidden representation learned by a pre-trained transformer (i.e., an open-source LLM), which significantly boosts its performance. These motivate us to propose our INSTruction optimization usIng Neural bandits Coupled with Transformers (INSTINCT) algorithm. We perform instruction optimization for ChatGPT and use extensive experiments to show that our INSTINCT consistently outperforms the existing methods in different tasks, such as in various instruction induction tasks and the task of improving the zero-shot chain-of-thought instruction.", "keywords": "instruction optimization;prompt optimization;large language models", "primary_area": "generative models", "supplementary_material": "/attachment/752b27f36e44b6ff7be7c3ce208c106e7e4e2f1b.zip", "author": "Xiaoqiang Lin;Zhaoxuan Wu;Zhongxiang Dai;Wenyang Hu;Yao Shu;See-Kiong Ng;Patrick Jaillet;Bryan Kian Hsiang Low", "authorids": "~Xiaoqiang_Lin1;~Zhaoxuan_Wu1;~Zhongxiang_Dai1;~Wenyang_Hu1;~Yao_Shu1;~See-Kiong_Ng1;~Patrick_Jaillet1;~Bryan_Kian_Hsiang_Low1", "gender": "M;M;M;;M;M;M;M", "homepage": "https://xqlin98.github.io/;https://zhaoxuanwu.github.io/;https://daizhongxiang.github.io/;https://scholar.google.com/citations?user=EecZzYsAAAAJ;https://yao.notion.site;https://www.comp.nus.edu.sg/~ngsk/;http://web.mit.edu/jaillet/www/;http://www.comp.nus.edu.sg/~lowkh", "dblp": "269/4573;298/5083;172/4968;258/0545;44/1338;00/5480;https://dblp.uni-trier.de/pers/hd/j/Jaillet:Patrick;97/4877", "google_scholar": "nqKwA60AAAAJ;Th_mPm8AAAAJ;1v8xOIYAAAAJ;EecZzYsAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.tw/citations?user=_wsommYAAAAJ;ND0FM6EAAAAJ;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ", "orcid": ";0009-0002-5659-6387;;0009-0008-6189-7890;;0000-0001-6565-7511;0000-0002-8585-6566;", "linkedin": ";zhaoxuanwu/;;;yao-shu-a5640514b;seekiong/?originalSubdomain=sg;patrick-jaillet-1260445/;", "or_profile": "~Xiaoqiang_Lin1;~Zhaoxuan_Wu1;~Zhongxiang_Dai1;~Wenyang_Hu1;~Yao_Shu1;~See-Kiong_Ng1;~Patrick_Jaillet1;~Bryan_Kian_Hsiang_Low1", "aff": "National University of Singapore;National University of Singapore;Massachusetts Institute of Technology;National University of Singapore;Guangming Lab;National University of Singapore;Massachusetts Institute of Technology;National University of Singapore", "aff_domain": "u.nus.edu;u.nus.edu;mit.edu;u.nus.edu;gml.ac.cn;nus.edu.sg;mit.edu;nus.edu.sg", "position": "PhD student;PhD student;Postdoc;PhD student;Researcher;Full Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nlin2024use,\ntitle={Use Your {INSTINCT}: {INST}ruction optimization usIng Neural bandits Coupled with Transformers},\nauthor={Xiaoqiang Lin and Zhaoxuan Wu and Zhongxiang Dai and Wenyang Hu and Yao Shu and See-Kiong Ng and Patrick Jaillet and Bryan Kian Hsiang Low},\nyear={2024},\nurl={https://openreview.net/forum?id=6ujgouOiAA}\n}", "github": "", "project": "", "reviewers": "16Xs;TRUy;fbJ6;zDza", "site": "https://openreview.net/forum?id=6ujgouOiAA", "pdf_size": 1879662, "rating": "3;5;6;8", "confidence": "5;5;4;3", "soundness": "2;2;3;3", "contribution": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "120;37;208;86", "wc_strengths": "21;20;190;127", "wc_weaknesses": "117;125;259;96", "wc_questions": "10;4;17;142", "wc_review": "268;186;674;451", "wc_reply_reviewers": "0;0;98;36", "wc_reply_authors": "1246;1309;2406;1646", "reply_reviewers": "0;0;1;1", "reply_authors": "5;5;6;5", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 112.75, 62.407431448506195 ], "wc_strengths_avg": [ 89.5, 72.50689622373861 ], "wc_weaknesses_avg": [ 149.25, 64.24319030060695 ], "wc_questions_avg": [ 43.25, 57.198666942508375 ], "wc_review_avg": [ 394.75, 187.60780234307953 ], "wc_reply_reviewers_avg": [ 33.5, 40.03436024217197 ], "wc_reply_authors_avg": [ 1651.75, 461.2582655085977 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 5.25, 0.4330127018922193 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9198662110077999, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3800541950041619856&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;2;0;1;0", "aff_unique_norm": "National University of Singapore;Massachusetts Institute of Technology;Guangming Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://web.mit.edu;", "aff_unique_abbr": "NUS;MIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2;0;1;0", "aff_country_unique": "Singapore;United States;China" }, { "title": "ImplicitSLIM and How it Improves Embedding-based Collaborative Filtering", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19379", "id": "6vF0ZJGor4", "author_site": "Ilya Shenbin, Sergey Nikolenko", "tldr": "", "abstract": "We present ImplicitSLIM, a novel unsupervised learning approach for sparse high-dimensional data, with applications to collaborative filtering. Sparse linear methods (SLIM) and their variations show outstanding performance, but they are memory-intensive and hard to scale. ImplicitSLIM improves embedding-based models by extracting embeddings from SLIM-like models in a computationally cheap and memory-efficient way, without explicit learning of heavy SLIM-like models. We show that ImplicitSLIM improves performance and speeds up convergence for both state of the art and classical collaborative filtering methods. The source code for ImplicitSLIM, related models, and applications is available at https://github.com/ilya-shenbin/ImplicitSLIM.", "keywords": "collaborative filtering;representation learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/25b252b964ba36bd97eaafd7c4f8c92e5a674a88.zip", "author": "Ilya Shenbin;Sergey Nikolenko", "authorids": "~Ilya_Shenbin1;~Sergey_Nikolenko1", "gender": "Not Specified;M", "homepage": ";http://logic.pdmi.ras.ru/~sergey/", "dblp": "https://dblp.uni-trier.de/pid/234/8529;50/1870.html", "google_scholar": "SluT_kMAAAAJ;https://scholar.google.ru/citations?hl=ru", "orcid": "0000-0002-6778-225X;0000-0001-7787-2251", "linkedin": ";", "or_profile": "~Ilya_Shenbin1;~Sergey_Nikolenko1", "aff": "St. Petersburg Department of Steklov Mathematical Institute;Steklov Institute of Mathematics at St. Petersburg", "aff_domain": "pdmi.ras.ru;pdmi.ras.ru", "position": "Researcher;Assistant Professor", "bibtex": "@inproceedings{\nshenbin2024implicitslim,\ntitle={Implicit{SLIM} and How it Improves Embedding-based Collaborative Filtering},\nauthor={Ilya Shenbin and Sergey Nikolenko},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6vF0ZJGor4}\n}", "github": "", "project": "", "reviewers": "EMBd;uknn;dkLJ;Fj6S", "pdf_size": 371124, "rating": "3;3;6;8", "confidence": "1;4;4;4", "soundness": "2;3;4;3", "contribution": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "33;15;62;120", "wc_strengths": "11;41;73;22", "wc_weaknesses": "12;266;95;23", "wc_questions": "18;6;5;45", "wc_review": "74;328;235;210", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;804;376;420", "reply_reviewers": "0;0;0;0", "reply_authors": "0;1;1;1", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 57.5, 39.790074139161895 ], "wc_strengths_avg": [ 36.75, 23.519938350259338 ], "wc_weaknesses_avg": [ 99.0, 101.5504800579495 ], "wc_questions_avg": [ 18.5, 16.132265804901678 ], "wc_review_avg": [ 211.75, 90.87457015029013 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 400.0, 284.68930433017675 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5443310539518175, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15121309870541548515&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6vF0ZJGor4", "pdf": "https://openreview.net/pdf?id=6vF0ZJGor4", "email": "pdmi.ras.ru;pdmi.ras.ru", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Steklov Mathematical Institute;Steklov Institute of Mathematics", "aff_unique_dep": "Department of Steklov Mathematical Institute;Mathematics", "aff_unique_url": "http://www.mi.ras.ru;http://www.pdmi.ras.ru", "aff_unique_abbr": "SMI;PDMI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "St. Petersburg", "aff_country_unique_index": "0;0", "aff_country_unique": "Russian Federation" }, { "id": "6vtGG0WMne", "title": "Regulating Imbalanced Deep Models with User-Specified Metrics", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep learning models implemented in real-world applications still face challenges from imbalanced data. Existing methods address the imbalance problem by balancing the models between the minority class and the majority class. However, practical applications may require an imbalanced optimization strategy that selectively unbalances the models and makes them more suitable for the applications than the balanced models. In this work, we first give a formal definition to accurately quantify the degree of imbalance of a model. Then, we propose a bias adjustment method that can efficiently optimize the model to a specified imbalance state according to application metrics or requirements so that this method has wide applicability. Finally, we introduce a training strategy that is advantageous to select the optimal representation parameters of the model during traditional training process. Extensive experiments verify the effectiveness and efficiency of our method, and compared with state-of-the-art algorithms, our method has significant improvement in different metrics including accuracy, F1 value and G-means.", "keywords": "Imbalance learning;Deep learning;Imbalance metrics;Classification", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Yuqi Liu;Bin Cao;JING FAN", "authorids": "~Yuqi_Liu1;~Bin_Cao3;~JING_FAN2", "gender": "M;M;F", "homepage": ";http://www.cs.zjut.edu.cn/staffs-en/bincao.html;http://www.cs.zjut.edu.cn/staffs/jingfan.html", "dblp": "35/9071;17/1169-4;", "google_scholar": ";m4CUeVAAAAAJ;", "orcid": "0000-0003-0092-7001;;", "linkedin": ";;", "or_profile": "~Yuqi_Liu1;~Bin_Cao3;~JING_FAN2", "aff": ";Zhejiang University of Technology;Zhejiang University of Technology", "aff_domain": ";zjut.edu.cn;zjut.edu.cn", "position": ";Associate Professor;Full Professor", "bibtex": "@misc{\nliu2024regulating,\ntitle={Regulating Imbalanced Deep Models with User-Specified Metrics},\nauthor={Yuqi Liu and Bin Cao and JING FAN},\nyear={2024},\nurl={https://openreview.net/forum?id=6vtGG0WMne}\n}", "github": "", "project": "", "reviewers": "b3Zi;TH73;HH3t;tgc4", "site": "https://openreview.net/forum?id=6vtGG0WMne", "pdf_size": 274889, "rating": "3;3;6;6", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "contribution": "2;2;4;3", "presentation": "2;2;3;2", "wc_summary": "85;102;94;262", "wc_strengths": "88;46;44;99", "wc_weaknesses": "197;350;45;89", "wc_questions": "96;2;46;34", "wc_review": "466;500;229;484", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 135.75, 73.1381398450904 ], "wc_strengths_avg": [ 69.25, 24.57005290999594 ], "wc_weaknesses_avg": [ 170.25, 117.5954399626108 ], "wc_questions_avg": [ 44.5, 33.80458548777074 ], "wc_review_avg": [ 419.75, 110.78441903083664 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Gw9ETUy433wJ:scholar.google.com/&scioq=Regulating+Imbalanced+Deep+Models+with+User-Specified+Metrics&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.zjut.edu.cn", "aff_unique_abbr": "ZJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "6werMQy1uz", "title": "Rethinking the Buyer\u2019s Inspection Paradox in Information Markets with Language Agents", "track": "main", "status": "Reject", "tldr": "", "abstract": "This work addresses the long-standing buyer's inspection paradox for information markets. The paradox is that buyers need to access information to determine its value, while sellers need to limit access to prevent theft. To study this, we introduce an open-source simulated digital marketplace where intelligent agents, powered by language models, buy and sell information on behalf of external participants. The central mechanism enabling this marketplace is the agents' dual capabilities: they not only have the capacity to assess the quality of privileged information but also come equipped with the ability to forget. This feature allows vendors to grant temporary access to proprietary information, significantly reducing the risk of unauthorized retention while enabling agents to accurately gauge the information's relevance to specific queries or tasks. To perform well, agents must make rational decisions, strategically explore the marketplace through generated sub-queries, and synthesize answers from purchased information. Concretely, our experiments (a) uncover biases in language models leading to irrational behavior and evaluate techniques to mitigate these biases, (b) investigate how price affects demand in the context of informational goods, and (c) show that inspection and higher budgets both lead to higher quality outcomes.", "keywords": "Agents;Economics;Language Models", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Martin Weiss;Nasim Rahaman;Manuel Wuthrich;Yoshua Bengio;Li Erran Li;Bernhard Sch\u00f6lkopf;Christopher Pal", "authorids": "~Martin_Weiss4;~Nasim_Rahaman1;~Manuel_Wuthrich1;~Yoshua_Bengio1;~Li_Erran_Li1;~Bernhard_Sch\u00f6lkopf1;~Christopher_Pal1", "gender": "M;M;M;M;;;", "homepage": "https://www.martincsweiss.com/;;;http://yoshuabengio.org;http://www.cs.columbia.edu/~lierranli/;;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ&hl=en&oi=ao", "dblp": "12/3210;222/3165;https://dblp.uni-trier.de/pers/hd/w/W=uuml=thrich:Manuel;56/953;l/ErranLLi.html;;45/1217", "google_scholar": "t7lQYWwAAAAJ;https://scholar.google.de/citations?user=iH9DuY0AAAAJ;;kukA0LcAAAAJ;GkMfzy4AAAAJ;;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ", "orcid": ";;;;;;", "linkedin": "martin-clyde-weiss/;https://de.linkedin.com/in/nasim-rahaman/de;;yoshuabengio/?originalSubdomain=ca;;;", "or_profile": "~Martin_Weiss4;~Nasim_Rahaman1;~Manuel_Wuthrich1;~Yoshua_Bengio1;~Li_Erran_Li1;~Bernhard_Sch\u00f6lkopf1;~Christopher_Pal1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems;University of Montreal;Columbia University;;Polytechnique Montreal", "aff_domain": "mila.umontreal.ca;tuebingen.mpg.de;mpg.tuebingen.de;umontreal.ca;columbia.edu;;polymtl.ca", "position": "PhD student;PhD student;Postdoc;Full Professor;Adjunct Professor;;Full Professor", "bibtex": "@misc{\nweiss2024rethinking,\ntitle={Rethinking the Buyer{\\textquoteright}s Inspection Paradox in Information Markets with Language Agents},\nauthor={Martin Weiss and Nasim Rahaman and Manuel Wuthrich and Yoshua Bengio and Li Erran Li and Bernhard Sch{\\\"o}lkopf and Christopher Pal},\nyear={2024},\nurl={https://openreview.net/forum?id=6werMQy1uz}\n}", "github": "", "project": "", "reviewers": "ovK6;mENG;STDL;UxN6", "site": "https://openreview.net/forum?id=6werMQy1uz", "pdf_size": 1759772, "rating": "5;5;6;6", "confidence": "3;2;4;4", "soundness": "2;2;2;3", "contribution": "3;2;3;3", "presentation": "4;3;2;3", "wc_summary": "133;41;42;60", "wc_strengths": "87;43;177;63", "wc_weaknesses": "245;144;365;103", "wc_questions": "113;8;5;2", "wc_review": "578;236;589;228", "wc_reply_reviewers": "0;0;114;0", "wc_reply_authors": "426;314;715;319", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 69.0, 37.71604433129222 ], "wc_strengths_avg": [ 92.5, 51.212791370906544 ], "wc_weaknesses_avg": [ 214.25, 101.22098349650629 ], "wc_questions_avg": [ 32.0, 46.8134596029817 ], "wc_review_avg": [ 407.75, 175.81577716462195 ], "wc_reply_reviewers_avg": [ 28.5, 49.363448015713004 ], "wc_reply_authors_avg": [ 443.5, 163.00996902030255 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10665352199526562800&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;2;3", "aff_unique_norm": "University of Montreal;Max Planck Institute for Intelligent Systems;Columbia University;Polytechnique Montreal", "aff_unique_dep": "Montreal Institute for Learning Algorithms;Intelligent Systems;;", "aff_unique_url": "https://www.umontreal.ca;https://www.mpi-is.mpg.de;https://www.columbia.edu;https://www.polymtl.ca", "aff_unique_abbr": "UM;MPI-IS;Columbia;PolyMTL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;1;1;0;2;0", "aff_country_unique": "Canada;Germany;United States" }, { "title": "Chain of Hindsight aligns Language Models with Feedback", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19378", "id": "6xfe4IVcOu", "author_site": "Hao Liu, Carmelo Sferrazza, Pieter Abbeel", "tldr": "", "abstract": "Learning from human preferences is important for language models to match human needs and to align with human and social values. \nPrior works have achieved remarkable successes by learning from human feedback to understand and follow instructions. Nonetheless, these methods are either founded on hand-picked model generations that are favored by human annotators, rendering them inefficient in terms of data utilization and challenging to apply in general, or they depend on reinforcement learning, which often suffers from imperfect reward functions and relies on extremely challenging optimizations. In this work, we propose a novel technique, Chain of Hindsight, that is easy to optimize and can learn from any form of feedback, regardless of its polarity. Our idea is inspired by how humans learn from extensive feedback presented in the form of languages. We convert all types of feedback into sequences of sentences, which are then used to fine-tune the model, allowing us to take advantage of the language comprehension capabilities of language models.\nWe condition the model on a sequence of model generations paired with feedback. By doing so, the model is trained to generate outputs based on feedback, while learning to identify and correct negative attributes or errors. Applying our method to large language models, we observed that Chain of Hindsight significantly surpasses previous methods in aligning language models with human preferences. We report significant improvements on summarization and dialogue benchmarks, with our approach markedly preferred in human evaluations.", "keywords": "Reinforcement Learning;Reinforcement Learning from Human Feedback;RLHF", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/db2a0718e6995981e24187ef60a5d65b25d77f54.pdf", "author": "Hao Liu;Carmelo Sferrazza;Pieter Abbeel", "authorids": "~Hao_Liu1;~Carmelo_Sferrazza1;~Pieter_Abbeel2", "gender": ";M;M", "homepage": "https://sferrazza.cc;https://people.eecs.berkeley.edu/~pabbeel/;https://haoliu.ai", "dblp": "190/8406;;09/3214-55", "google_scholar": "x0_lwNYAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;wtK4Yh4AAAAJ", "orcid": "0000-0002-7432-7634;;", "linkedin": "csferrazza/;;", "or_profile": "~Carmelo_Sferrazza1;~Pieter_Abbeel2;~Hao_Liu10", "aff": "University of California, Berkeley;Covariant;University of California, Berkeley", "aff_domain": "berkeley.edu;covariant.ai;berkeley.edu", "position": "Postdoc;Founder;PhD student", "bibtex": "@inproceedings{\nliu2024chain,\ntitle={Chain of Hindsight aligns Language Models with Feedback},\nauthor={Hao Liu and Carmelo Sferrazza and Pieter Abbeel},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6xfe4IVcOu}\n}", "github": "", "project": "", "reviewers": "us9M;KdNj;WQPV;aon5", "pdf_size": 1966600, "rating": "6;6;8;8", "confidence": "4;3;3;4", "soundness": "4;3;4;3", "contribution": "3;4;4;4", "presentation": "4;3;4;4", "wc_summary": "40;219;150;103", "wc_strengths": "50;54;95;68", "wc_weaknesses": "294;224;47;97", "wc_questions": "30;115;14;2", "wc_review": "414;612;306;270", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "634;537;207;194", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 128.0, 65.44845299928792 ], "wc_strengths_avg": [ 66.75, 17.62632973707232 ], "wc_weaknesses_avg": [ 165.5, 98.32217450809354 ], "wc_questions_avg": [ 40.25, 44.285296657016985 ], "wc_review_avg": [ 400.5, 133.11179511974137 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 393.0, 195.5850198762676 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 155, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12944415322991485426&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=6xfe4IVcOu", "pdf": "https://openreview.net/pdf?id=6xfe4IVcOu", "email": "berkeley.edu;covariant.ai;berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;Covariant", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "id": "6yJuDK1DsK", "title": "FEATHER: Lifelong Test-Time Adaptation with Lightweight Adapters", "track": "main", "status": "Reject", "tldr": "", "abstract": "Lifelong/continual test-time adaptation (TTA) refers to the problem where a pre-trained source domain model needs to be continually adapted at inference time to handle non-stationary test distributions. Continuously updating the source model over long horizons can result in significant drift in the source model, forgetting the source domain knowledge. Moreover, most of the existing approaches for lifelong TTA require adapting all the parameters, which can incur significant computational cost and memory consumption, limiting their applicability on edge devices for faster inference. We present FEATHER (liFelong tEst-time Adaptation wiTH lightwEight adapteRs), a novel lightweight approach that introduces only a small number of additional parameters to a pre-trained source model which can be unsupervisedly and efficiently adapted during test-time for the new test distribution(s), keeping the rest of the source model frozen. FEATHER disentangles the source domain knowledge from the target domain knowledge, making it robust against error accumulation over time. Another distinguishing aspect of FEATHER is that, unlike some recent approaches for lifelong TTA that require access to the source data for warm-starting the adaptation at test time, FEATHER does not have such a requirement. FEATHER is also orthogonal to the existing lifelong TTA approaches and can be augmented with these approaches, resulting in a significant reduction in the number of additional parameters needed to handle the lifelong TTA setting. Through extensive experiments on CIFAR-10C, CIFAR-100C, ImageNetC, and ImageNet3DCC Robustbench benchmark datasets, we demonstrate that, with substantially (85% to 94%) fewer trainable parameters, FEATHER achieves better/similar performance compared to existing SOTA lifelong TTA methods, resulting in faster adaptation and inference at test-time. The source code for FEATHER will be released upon publication.", "keywords": "test-time adaptation;source free test-time domain adaptation;parameter efficient test-time adaptation", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/78bc76b1914db6734493d73e557dff6dd6f6cf57.zip", "author": "Dhanajit Brahma;Abhinav Joshi;Ashutosh Modi;Piyush Rai", "authorids": "~Dhanajit_Brahma1;~Abhinav_Joshi1;~Ashutosh_Modi1;~Piyush_Rai1", "gender": "M;M;M;M", "homepage": "https://sites.google.com/view/dhanajit/;https://www.cse.iitk.ac.in/users/ajoshi/;https://ashutosh-modi.github.io/;http://cse.iitk.ac.in/users/piyush/", "dblp": "235/5479;308/0603;139/0873;02/525", "google_scholar": "3PfwL2IAAAAJ;;AWu6f60AAAAJ;https://scholar.google.com.tw/citations?user=D50grEgAAAAJ", "orcid": ";0000-0001-6756-1126;;", "linkedin": ";;dr-ashutosh-modi-3907835/;", "or_profile": "~Dhanajit_Brahma1;~Abhinav_Joshi1;~Ashutosh_Modi1;~Piyush_Rai1", "aff": "Indian Institute of Technology, Kanpur;Indian Institute of Technology, Kanpur;IIT Kanpur;IIT Kanpur, IIT Kanpur", "aff_domain": "iitk.ac.in;iitk.ac.in;iitk.ac.in;cse.iitk.ac.in", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nbrahma2024feather,\ntitle={{FEATHER}: Lifelong Test-Time Adaptation with Lightweight Adapters},\nauthor={Dhanajit Brahma and Abhinav Joshi and Ashutosh Modi and Piyush Rai},\nyear={2024},\nurl={https://openreview.net/forum?id=6yJuDK1DsK}\n}", "github": "", "project": "", "reviewers": "bPzr;F1KC;M9Xj;CBjP", "site": "https://openreview.net/forum?id=6yJuDK1DsK", "pdf_size": 719544, "rating": "3;5;5;5", "confidence": "4;4;4;4", "soundness": "2;3;2;2", "contribution": "2;1;2;2", "presentation": "2;2;2;3", "wc_summary": "84;113;95;100", "wc_strengths": "21;68;22;137", "wc_weaknesses": "119;453;305;110", "wc_questions": "36;134;4;194", "wc_review": "260;768;426;541", "wc_reply_reviewers": "223;0;0;0", "wc_reply_authors": "968;1134;819;371", "reply_reviewers": "2;0;0;0", "reply_authors": "4;3;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 10.41633332799983 ], "wc_strengths_avg": [ 62.0, 47.281074437876306 ], "wc_weaknesses_avg": [ 246.75, 142.26098375872422 ], "wc_questions_avg": [ 92.0, 75.90783885739337 ], "wc_review_avg": [ 498.75, 184.78010580146338 ], "wc_reply_reviewers_avg": [ 55.75, 96.5618325219649 ], "wc_reply_authors_avg": [ 823.0, 283.7542951216774 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:41xGd55N9ZkJ:scholar.google.com/&scioq=FEATHER:+Lifelong+Test-Time+Adaptation+with+Lightweight+Adapters&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Kanpur", "aff_unique_dep": "", "aff_unique_url": "https://www.iitk.ac.in", "aff_unique_abbr": "IIT Kanpur", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Kanpur", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "6yXAKleluj", "title": "Probabilistic Sampling-Enhanced Temporal-Spatial GCN: A Scalable Framework for Transaction Anomaly Detection in Ethereum Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "The rapid evolution of the Ethereum network necessitates sophisticated techniques to ensure its robustness against potential threats and to maintain transparency. While Graph Neural Networks (GNNs) have pioneered anomaly detection in such platforms, capturing the intricacies of both spatial and temporal transactional patterns has remained a challenge. This study presents a fusion of Graph Convolutional Networks (GCNs) with Temporal Random Walks (TRW) enhanced by probabilistic sampling to bridge this gap. Our approach, unlike traditional GCNs, leverages the strengths of TRW to discern complex temporal sequences in Ethereum transactions, thereby providing a more nuanced transaction anomaly detection mechanism. Preliminary evaluations demonstrate that our TRW-GCN framework substantially advances the performance metrics over conventional GCNs in detecting anomalies and transaction bursts. This research not only underscores the potential of temporal cues in Ethereum transactional data but also offers a scalable and effective methodology for ensuring the security and transparency of decentralized platforms. By harnessing both spatial relationships and time-based transactional sequences as node features, our model introduces an additional layer of granularity, making the detection process more robust and less prone to false positives. This work lays the foundation for future research aimed at optimizing and enhancing the transparency of blockchain technologies, and serves as a testament to the significance of considering both time and space dimensions in the ever-evolving landscape of the decentralized platforms.", "keywords": "Probabilistic Sampling;Temporal Random Walk;Graph Convolutional Networks;Transaction Anomaly Detection;Ethereum Networks", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/af09f14e2fba3fa490917fd9f40136d38e98ab03.zip", "author": "Stefan Behfar;Jon Crowcroft", "authorids": "~Stefan_Behfar1;~Jon_Crowcroft1", "gender": "Not Specified;M", "homepage": "https://scholar.google.com/citations?user=ucIHQQcAAAAJ&hl=en;https://www.cst.cam.ac.uk/people/jac22", "dblp": ";c/JonCrowcroft", "google_scholar": "ucIHQQcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-7013-0121", "linkedin": ";", "or_profile": "~Stefan_Behfar1;~Jon_Crowcroft1", "aff": ";university of cambridge", "aff_domain": ";cst.cam.ac.uk", "position": ";Full Professor", "bibtex": "@misc{\nbehfar2024probabilistic,\ntitle={Probabilistic Sampling-Enhanced Temporal-Spatial {GCN}: A Scalable Framework for Transaction Anomaly Detection in Ethereum Networks},\nauthor={Stefan Behfar and Jon Crowcroft},\nyear={2024},\nurl={https://openreview.net/forum?id=6yXAKleluj}\n}", "github": "", "project": "", "reviewers": "fmda;Ja7Q;RjEZ;XLac", "site": "https://openreview.net/forum?id=6yXAKleluj", "pdf_size": 1760287, "rating": "1;5;5;5", "confidence": "5;3;4;4", "soundness": "1;3;3;2", "contribution": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "51;46;164;86", "wc_strengths": "36;18;86;29", "wc_weaknesses": "165;269;160;53", "wc_questions": "8;49;1;24", "wc_review": "260;382;411;192", "wc_reply_reviewers": "258;29;0;8", "wc_reply_authors": "1259;1413;1360;1389", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.0, 1.7320508075688772 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 86.75, 47.187789734209844 ], "wc_strengths_avg": [ 42.25, 26.06122598804592 ], "wc_weaknesses_avg": [ 161.75, 76.3916716664847 ], "wc_questions_avg": [ 20.5, 18.445866745696716 ], "wc_review_avg": [ 311.25, 89.1666277258482 ], "wc_reply_reviewers_avg": [ 73.75, 106.90270108841965 ], "wc_reply_authors_avg": [ 1355.25, 58.6531115969136 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-9znvuTjFb0J:scholar.google.com/&scioq=Probabilistic+Sampling-Enhanced+Temporal-Spatial+GCN:+A+Scalable+Framework+for+Transaction+Anomaly+Detection+in+Ethereum+Networks&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Towards Optimal Regret in Adversarial Linear MDPs with Bandit Feedback", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19377", "id": "6yv8UHVJn4", "author_site": "Haolin Liu, Chen-Yu Wei, Julian Zimmert", "tldr": "", "abstract": "We study online reinforcement learning in linear Markov decision processes with adversarial losses and bandit feedback. We introduce two algorithms that achieve improved regret performance compared to existing approaches. The first algorithm, although computationally inefficient, achieves a regret of $\\widetilde{O}(\\sqrt{K})$ without relying on simulators, where $K$ is the number of episodes. This is the first rate-optimal result in the considered setting. The second algorithm is computationally efficient and achieves a regret of $\\widetilde{O}(K^{\\frac{3}{4}})$ . These results significantly improve over the prior state-of-the-art: a computationally inefficient algorithm by Kong et al. (2023) with $\\widetilde{O}(K^{\\frac{4}{5}}+1/\\lambda_{\\min})$ regret, and a computationally efficient algorithm by Sherman et al. (2023b) with $\\widetilde{O}(K^{\\frac{6}{7}})$ regret.", "keywords": "adversarial MDPs;policy optimization;bandit feedback", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/97cfc74718dacde70485e3eecc6104b316b73157.pdf", "author": "Haolin Liu;Chen-Yu Wei;Julian Zimmert", "authorids": "~Haolin_Liu8;~Chen-Yu_Wei1;~Julian_Zimmert1", "gender": "M;M;", "homepage": "https://liuhl2000.github.io/;https://bahh723.github.io/;", "dblp": ";183/1729;190/7636", "google_scholar": ";2L2cR-kAAAAJ;", "orcid": "0000-0002-8247-9742;;", "linkedin": ";;", "or_profile": "~Haolin_Liu8;~Chen-Yu_Wei1;~Julian_Zimmert1", "aff": "University of Virginia, Charlottesville;University of Virginia, Charlottesville;Google", "aff_domain": "virginia.edu;virginia.edu;google.com", "position": "PhD student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nliu2024towards,\ntitle={Towards Optimal Regret in Adversarial Linear {MDP}s with Bandit Feedback},\nauthor={Haolin Liu and Chen-Yu Wei and Julian Zimmert},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=6yv8UHVJn4}\n}", "github": "", "project": "", "reviewers": "vRkG;eGft;B4ir;QJAu", "pdf_size": 516977, "rating": "6;8;8;8", "confidence": "4;3;3;3", "soundness": "3;4;3;3", "contribution": "3;4;4;2", "presentation": "4;3;3;4", "wc_summary": "64;51;41;68", "wc_strengths": "29;112;152;70", "wc_weaknesses": "10;69;84;20", "wc_questions": "105;25;23;31", "wc_review": "208;257;300;189", "wc_reply_reviewers": "0;0;10;9", "wc_reply_authors": "560;260;371;386", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 56.0, 10.700467279516348 ], "wc_strengths_avg": [ 90.75, 45.95310109230932 ], "wc_weaknesses_avg": [ 45.75, 31.403622402519108 ], "wc_questions_avg": [ 46.0, 34.19064199455752 ], "wc_review_avg": [ 238.5, 43.31570154112709 ], "wc_reply_reviewers_avg": [ 4.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 394.25, 107.36008336434915 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13159267592321160767&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=6yv8UHVJn4", "pdf": "https://openreview.net/pdf?id=6yv8UHVJn4", "email": "virginia.edu;virginia.edu;google.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Virginia;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.virginia.edu;https://www.google.com", "aff_unique_abbr": "UVA;Google", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Charlottesville;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "70A6oo3Il2", "title": "AdaFlood: Adaptive Flood Regularization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Although neural networks are conventionally optimized towards zero training loss, it has been recently learned that targeting a non-zero training loss threshold, referred to as a flood level, often enables better test time generalization. \nCurrent approaches, however, apply the same constant flood level to all training samples, which inherently assumes all the samples have the same difficulty.\nWe present AdaFlood, a novel flood regularization method that adapts the flood level of each training sample according to the difficulty of the sample. \nIntuitively, since training samples are not equal in difficulty, the target training loss should be conditioned on the instance. \nExperiments on datasets covering four diverse input modalities — text, images, asynchronous event sequences, and tabular — demonstrate the versatility of AdaFlood across data domains and noise levels.", "keywords": "Flood;Overfitting;Regularization", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/782078da05d714c872ff19ac77ca1124a583d641.pdf", "author": "Wonho Bae;Yi Ren;Mohamed Osama Ahmed;Frederick Tung;Danica J. Sutherland;Gabriel L. Oliveira", "authorids": "~Wonho_Bae1;~Yi_Ren6;~Mohamed_Osama_Ahmed2;~Frederick_Tung1;~Danica_J._Sutherland1;~Gabriel_L._Oliveira1", "gender": "M;M;M;M;M;F", "homepage": "https://won-bae.github.io/;https://joshua-ren.github.io/;;;https://sites.google.com/view/gabriel-leivas-oliveira/home;http://www.djsutherland.ml", "dblp": "259/5393;;https://dblp.org/pers/hd/a/Ahmed:Mohamed_Osama;10/7697;117/2073;92/10966", "google_scholar": "https://scholar.google.ca/citations?user=EEwA__kAAAAJ;5QNce38AAAAJ;https://scholar.google.ca/citations?user=jyVyVj4AAAAJ;https://scholar.google.ca/citations?user=T4EeZ9gAAAAJ;5anRZEcAAAAJ;https://scholar.google.co.uk/citations?user=uO_NqicAAAAJ", "orcid": ";;0000-0001-6758-1178;;0000-0003-0099-9873;0000-0002-1525-3532", "linkedin": "wonho-bae/;;mohamed-osama-ahmed-91439a154/;;;", "or_profile": "~Wonho_Bae1;~Yi_Ren6;~Mohamed_Osama_Ahmed2;~Frederick_Tung1;~Gabriel_L._Oliveira1;~Danica_J._Sutherland2", "aff": "University of British Columbia;University of British Columbia;;Borealis AI;Borealis AI;University of British Columbia", "aff_domain": "cs.ubc.ca;ubc.ca;;borealisai.com;borealisai.com;cs.ubc.ca", "position": "PhD student;PhD student;;Researcher;Senior Machine Learning Researcher;Assistant Professor", "bibtex": "@misc{\nbae2024adaflood,\ntitle={AdaFlood: Adaptive Flood Regularization},\nauthor={Wonho Bae and Yi Ren and Mohamed Osama Ahmed and Frederick Tung and Danica J. Sutherland and Gabriel L. Oliveira},\nyear={2024},\nurl={https://openreview.net/forum?id=70A6oo3Il2}\n}", "github": "", "project": "", "reviewers": "o6wC;vFax;R2kv", "site": "https://openreview.net/forum?id=70A6oo3Il2", "pdf_size": 942269, "rating": "3;5;5", "confidence": "4;3;3", "soundness": "3;2;2", "contribution": "2;2;2", "presentation": "3;3;3", "wc_summary": "72;191;38", "wc_strengths": "65;38;25", "wc_weaknesses": "161;548;37", "wc_questions": "1;9;3", "wc_review": "299;786;103", "wc_reply_reviewers": "54;0;0", "wc_reply_authors": "1537;2134;417", "reply_reviewers": "1;0;0", "reply_authors": "3;3;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.33333333333333, 65.59640911581127 ], "wc_strengths_avg": [ 42.666666666666664, 16.659998666133067 ], "wc_weaknesses_avg": [ 248.66666666666666, 217.63016539278027 ], "wc_questions_avg": [ 4.333333333333333, 3.39934634239519 ], "wc_review_avg": [ 396.0, 287.14572374783273 ], "wc_reply_reviewers_avg": [ 18.0, 25.45584412271571 ], "wc_reply_authors_avg": [ 1362.6666666666667, 711.719201808004 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13033532297663653967&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of British Columbia;Borealis AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.ubc.ca;https://www.borealisai.com", "aff_unique_abbr": "UBC;Borealis AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Continuous Invariance Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19376", "id": "70IgE3tRbu", "author_site": "LIN Yong, Fan Zhou, Lu Tan, Lintao Ma, Jianmeng Liu, Yansu HE, Yuan Yuan, Yu Liu, James Zhang, Yujiu Yang, Hao Wang", "tldr": "", "abstract": "Invariance learning methods aim to learn invariant features in the hope that they generalize under distributional shift. Although many tasks are naturally characterized by continuous domains, current invariance learning techniques generally assume categorically indexed domains. For example, auto-scaling in cloud computing often needs a CPU utilization prediction model that generalizes across different times (e.g., time of a day and date of a year), where `time' is a continuous domain index. In this paper, we start by theoretically showing that existing invariance learning methods can fail for continuous domain problems. Specifically, the naive solution of splitting continuous domains into discrete ones ignores the underlying relationship among domains, and therefore potentially leads to suboptimal performance. To address this challenge, we then propose Continuous Invariance Learning (CIL), which extracts invariant features across continuously indexed domains. CIL is a novel adversarial procedure which measures and controls the conditional independence between the labels and continuous domain indices given the extracted features. Our theoretical analysis demonstrates that CIL learns features that satisfy the invariant constraint with infinite samples. Empirical results on both synthetic and real-world datasets (including data collected from production systems) show that CIL consistently outperforms strong baselines among all the tasks.", "keywords": "Causality; Domain Generalization; Invariance Learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "LIN Yong;Fan Zhou;Lu Tan;Lintao Ma;Jianmeng Liu;Yansu HE;Yuan Yuan;Yu Liu;James Y. Zhang;Yujiu Yang;Hao Wang", "authorids": "~LIN_Yong1;~Fan_Zhou10;~Lu_Tan1;~Lintao_Ma1;~Jianmeng_Liu1;~Yansu_HE1;~Yuan_Yuan5;~Yu_Liu28;~James_Y._Zhang1;~Yujiu_Yang2;~Hao_Wang3", "gender": ";M;;;M;F;F;M;M;M;", "homepage": ";;;;https://www.jmliu.site/;;https://yyuanad.github.io/;;https://scholar.google.com/citations?user=Ywakh_sAAAAJ;https://sites.google.com/view/iigroup-thu;", "dblp": ";;67/10146;;239/4809;;64/5845-2;97/2274-71.html;151/3086;30/3847;", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;yeJ3AbsAAAAJ;;;;9tI89HMAAAAJ;zbRvnkwAAAAJ;Ywakh_sAAAAJ;4gH3sxsAAAAJ;", "orcid": ";;;;;0000-0001-6433-0007;0000-0002-6609-0542;0009-0008-1719-8371;0000-0001-6519-676X;0000-0002-6427-1024;", "linkedin": ";https://www.linkedin.com/mwlite/in/moutozf;;;jianmeng-liu/;;yuan-yuan-96451747/;;jamesymzhang/;;", "or_profile": "~LIN_Yong1;~Fan_Zhou10;~Lu_Tan1;~Lintao_Ma1;~Jianmeng_Liu1;~Yansu_HE1;~Yuan_Yuan5;~Yu_Liu28;~James_Y._Zhang1;~Yujiu_Yang2;~Hao_Wang3", "aff": ";AntGroup;Tsinghua University;;Hong Kong University of Science and Technology;The Chinese University of Hong Kong;Boston College;Ant Group;Ant Group;Tsinghua University;", "aff_domain": ";antgroup.com;tsinghua.edu.cn;;ust.hk;cuhk.edu.hk;bc.edu;antgroup.com;alipay.com;tsinghua.edu.cn;", "position": ";Researcher;MS student;;Undergrad student;PhD student;Assistant Professor;Researcher;managing director;Full Professor;", "bibtex": "@inproceedings{\nyong2024continuous,\ntitle={Continuous Invariance Learning},\nauthor={LIN Yong and Fan Zhou and Lu Tan and Lintao Ma and Jianmeng Liu and Yansu HE and Yuan Yuan and Yu Liu and James Y. Zhang and Yujiu Yang and Hao Wang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=70IgE3tRbu}\n}", "github": "", "project": "", "reviewers": "dGY4;Zzch;9c4M;MYYo", "pdf_size": 1901073, "rating": "5;5;8;8", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "contribution": "2;3;3;3", "presentation": "3;2;2;4", "wc_summary": "106;38;71;152", "wc_strengths": "32;75;63;69", "wc_weaknesses": "344;152;251;200", "wc_questions": "10;210;148;92", "wc_review": "492;475;533;513", "wc_reply_reviewers": "0;0;98;0", "wc_reply_authors": "943;1676;2380;1006", "reply_reviewers": "0;0;1;0", "reply_authors": "3;4;6;2", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 91.75, 42.28696607703135 ], "wc_strengths_avg": [ 59.75, 16.57369904396722 ], "wc_weaknesses_avg": [ 236.75, 71.13148037261702 ], "wc_questions_avg": [ 115.0, 73.60027173862879 ], "wc_review_avg": [ 503.25, 21.821720830401986 ], "wc_reply_reviewers_avg": [ 24.5, 42.4352447854375 ], "wc_reply_authors_avg": [ 1501.25, 583.0211724285834 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13991504235650354242&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=70IgE3tRbu", "pdf": "https://openreview.net/pdf?id=70IgE3tRbu", "email": ";antgroup.com;tsinghua.edu.cn;;ust.hk;cuhk.edu.hk;bc.edu;antgroup.com;alipay.com;tsinghua.edu.cn;", "author_num": 11, "aff_unique_index": "0;1;2;3;4;0;0;1", "aff_unique_norm": "Ant Group;Tsinghua University;Hong Kong University of Science and Technology;Chinese University of Hong Kong;Boston College", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.antgroup.com;https://www.tsinghua.edu.cn;https://www.ust.hk;https://www.cuhk.edu.hk;https://www.bostoncollege.edu", "aff_unique_abbr": "AntGroup;THU;HKUST;CUHK;BC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "70PPJo3DwI", "title": "Towards Out-of-federation Generalization in Federated Learning", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "Federated Learning (FL) is widely employed to tackle distributed healthcare data. Existing methods primarily focus on addressing in-federation data heterogeneity. However, we observe that they can suffer from significant performance degradation when applied to unseen clients for out-of-federation (OOF) generalization. The recent attempts to address generalization to unseen clients generally fail to scale up to large-scale distributed settings due to high communication overhead and convergence difficulty. And the communication efficient methods often yield poor OOF robustness. To achieve OOF-resiliency in a scalable manner, we propose Topology-aware Federated Learning (TFL) that leverages client topology - a graph representing client relationships - to effectively train robust models against OOF data. We formulate a novel optimization problem for TFL, consisting of two key modules: Client Topology Learning, which infers the client relationships in a privacy-preserving manner, and Learning on Client Topology, which leverages the learned topology to identify influential clients and harness this information into the FL optimization process to efficiently build robust models. Empirical evaluation on a variety of real-world datasets verifies TFL's superior OOF robustness and communication efficiency.", "keywords": "Federated Learning; Data Heterogeneity; Robustness; Topology-aware", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/1c30298a88f91d186a09f450dab518337f3b8ef2.pdf", "author": "Mengmeng Ma;Tang Li;Weisong Shi;Xi Peng", "authorids": "~Mengmeng_Ma1;~Tang_Li1;~Weisong_Shi1;~Xi_Peng1", "gender": "M;M;M;Not Specified", "homepage": "https://mengmenm.top/;https://tangli0305.github.io/;http://weisongshi.org/;https://deep-real.github.io/dr_xipeng.html", "dblp": "150/6565-2;01/1190-5;s/WeisongShi;149/7762-5", "google_scholar": "ycXTxwoAAAAJ;mQFL3DYAAAAJ;4rPcoCEAAAAJ;DWw4v0kAAAAJ", "orcid": "0000-0002-2804-2718;0000-0002-3134-4151;;0000-0002-7772-001X", "linkedin": ";tang-li-613132180/;;xi-peng-74b540b6/", "or_profile": "~Mengmeng_Ma1;~Tang_Li1;~Weisong_Shi1;~Xi_Peng1", "aff": "University of Delaware;University of Delaware;University of Delaware;University of Delaware", "aff_domain": "udel.edu;udel.edu;udel.edu;udel.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nma2024towards,\ntitle={Towards Out-of-federation Generalization in Federated Learning},\nauthor={Mengmeng Ma and Tang Li and Weisong Shi and Xi Peng},\nyear={2024},\nurl={https://openreview.net/forum?id=70PPJo3DwI}\n}", "github": "", "project": "", "reviewers": "Nx6V;RF1L;L1rQ;Q4J6", "site": "https://openreview.net/forum?id=70PPJo3DwI", "pdf_size": 4228748, "rating": "5;6;6;6", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "98;156;91;63", "wc_strengths": "66;34;28;27", "wc_weaknesses": "263;297;137;119", "wc_questions": "3;60;92;60", "wc_review": "430;547;348;269", "wc_reply_reviewers": "0;20;0;136", "wc_reply_authors": "829;586;628;505", "reply_reviewers": "0;1;0;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 102.0, 33.81567683782183 ], "wc_strengths_avg": [ 38.75, 15.958931668504631 ], "wc_weaknesses_avg": [ 204.0, 77.20751258783046 ], "wc_questions_avg": [ 53.75, 32.080952292598795 ], "wc_review_avg": [ 398.5, 102.91379888042225 ], "wc_reply_reviewers_avg": [ 39.0, 56.595052787324086 ], "wc_reply_authors_avg": [ 637.0, 119.34194568549651 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7l2C0DzjiosJ:scholar.google.com/&scioq=Towards+Out-of-federation+Generalization+in+Federated+Learning&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Delaware", "aff_unique_dep": "", "aff_unique_url": "https://www.udel.edu", "aff_unique_abbr": "UD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "70rlVBPX6Y", "title": "Neural Architecture Search for TinyML with Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deploying Deep Neural Networks (DNNs) on microcontrollers (TinyML) is a common trend to process the increasing amount of sensor data generated at the edge, but in practice, resource and latency constraints make it difficult to find optimal DNN candidates. Neural Architecture Search (NAS) is an excellent approach to automate this search and can easily be combined with DNN compression techniques commonly used in TinyML. However, many NAS techniques are not only computationally expensive, especially hyperparameter optimization (HPO), but also often focus on optimizing only a single objective, e.g., maximizing accuracy, without considering additional objectives such as memory consumption or computational complexity of a model, which are key to making deployment at the edge feasible. In this paper we propose a novel NAS strategy for TinyML based on multi-objective Bayesian optimization (MOBOpt) and an ensemble of competing parametric policies trained using Augmented Random Search (ARS) Reinforcement Learning (RL) agents. Our methodology aims at efficiently finding tradeoffs between a DNN's predictive accuracy, memory consumption on a given target system, and computational complexity. Our experiments show that we outperform existing MOBOpt approaches consistently on different data sets and architectures such as ResNet-18 and MobileNetV3.", "keywords": "Hyperparameter Optimization;TinyML;Microcontrollers;Reinforcement Learning;Augmented Random Search;Multi-Objective Optimization", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Mark Deutel;Georgios Kontes;Christopher Mutschler;J\u00fcrgen Teich", "authorids": "~Mark_Deutel1;~Georgios_Kontes1;~Christopher_Mutschler1;~J\u00fcrgen_Teich1", "gender": "M;;M;M", "homepage": "https://markdeutel.github.io/;;https://www.cmutschler.de;", "dblp": "271/4795;;118/7748;https://dblp.uni-trier.de/pid/t/JurgenTeich.html", "google_scholar": "hd-tlcgAAAAJ;;https://scholar.google.de/citations?user=gKDSp8YAAAAJ;", "orcid": "0000-0001-8932-5212;;0000-0001-8108-0230;0000-0001-6285-5862", "linkedin": ";;christopher-mutschler-28431576/;", "or_profile": "~Mark_Deutel1;~Georgios_Kontes1;~Christopher_Mutschler1;~J\u00fcrgen_Teich1", "aff": "Friedrich-Alexander Universit\u00e4t Erlangen-N\u00fcrnberg;;Fraunhofer IIS;Friedrich-Alexander Universit\u00e4t Erlangen-N\u00fcrnberg", "aff_domain": "fau.de;;fraunhofer.de;fau.de", "position": "PhD student;;Principal Researcher;Full Professor", "bibtex": "@misc{\ndeutel2024neural,\ntitle={Neural Architecture Search for Tiny{ML} with Reinforcement Learning},\nauthor={Mark Deutel and Georgios Kontes and Christopher Mutschler and J{\\\"u}rgen Teich},\nyear={2024},\nurl={https://openreview.net/forum?id=70rlVBPX6Y}\n}", "github": "", "project": "", "reviewers": "FN6N;KEWJ;CC3e;4rfG;obe6", "site": "https://openreview.net/forum?id=70rlVBPX6Y", "pdf_size": 722207, "rating": "3;5;5;5;5", "confidence": "4;4;3;4;2", "soundness": "3;2;2;3;3", "contribution": "2;2;3;2;2", "presentation": "3;2;3;2;3", "wc_summary": "204;35;64;99;160", "wc_strengths": "44;22;23;49;15", "wc_weaknesses": "222;106;37;107;22", "wc_questions": "113;3;71;62;24", "wc_review": "583;166;195;317;221", "wc_reply_reviewers": "0;0;17;0;42", "wc_reply_authors": "668;300;273;508;238", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 112.4, 61.91801030394953 ], "wc_strengths_avg": [ 30.6, 13.365627557282897 ], "wc_weaknesses_avg": [ 98.8, 70.73160538260107 ], "wc_questions_avg": [ 54.6, 38.29673615335908 ], "wc_review_avg": [ 296.4, 152.00999967107427 ], "wc_reply_reviewers_avg": [ 11.8, 16.4730082255792 ], "wc_reply_authors_avg": [ 397.4, 164.81941633193586 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.37500000000000017, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xzjnoYAjOY8J:scholar.google.com/&scioq=Neural+Architecture+Search+for+TinyML+with+Reinforcement+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Friedrich-Alexander University Erlangen-N\u00fcrnberg;Fraunhofer Institute for Integrated Circuits", "aff_unique_dep": ";", "aff_unique_url": "https://www fau.de;https://www.iis.fraunhofer.de/", "aff_unique_abbr": "FAU;Fraunhofer IIS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Erlangen-N\u00fcrnberg;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "70xhiS0AQS", "title": "TaskBench: Benchmarking Large Language Models for Task Automation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, the incredible progress of large language models (LLMs) has ignited the spark of task automation, which decomposes the complex tasks described by user instructions into sub-tasks, and invokes external tools to execute them, and plays a central role in autonomous agents. Therefore, there has been an urgent demand to formulate a systematic and standardized benchmark to foster the development of LLMs in task automation. To this end, we introduce TaskBench to evaluate task automation. Specifically, the process of task automation can be formulated as three critical stages (i.e., task decomposition, tool invocation, and parameter prediction) to fulfill user intent, that renders its data collection more challenging than common NLP tasks. Here, we introduce the concept of Tool Graph to represent the decomposed tasks in user intent, and adopt a back-instruct method to generate user instruction. Moreover, the mechanism of task automation also drives us to formulate more advanced metrics to measure the capability of LLMs. Therefore, we further propose TaskEval to evaluate the capability of LLMs in our curated datasets from different aspects, including task decomposition, tool invocation, and parameter prediction. Experimental results demonstrate that TaskBench can effectively be utilized to reflect the capability of LLMs in task automation. The code and datasets of TaskBench are available in the supplementary material.", "keywords": "LLM;Task Automation;Autonomous Agents", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/a21af03466ad37a670d3d2faffedd09a5008f3bc.zip", "author": "Yongliang Shen;Kaitao Song;Xu Tan;Wenqi Zhang;Kan Ren;Siyu Yuan;Weiming Lu;Dongsheng Li;Yueting Zhuang", "authorids": "~Yongliang_Shen1;~Kaitao_Song1;~Xu_Tan1;~Wenqi_Zhang2;~Kan_Ren1;~Siyu_Yuan2;~Weiming_Lu1;~Dongsheng_Li2;~Yueting_Zhuang1", "gender": "M;M;M;;M;;;M;M", "homepage": ";;https://tan-xu.github.io/;;https://saying.ren;;;http://recmind.cn;https://person.zju.edu.cn/yzhuang", "dblp": "221/5612-1.html;222/2082;96/10484-3;;28/7458;;;254/0830-2.html;", "google_scholar": "UT3NzFAAAAAJ;https://scholar.google.com.hk/citations?user=LLk9dR8AAAAJ;tob-U1oAAAAJ;;USnQVWgAAAAJ;;;VNg5rA8AAAAJ;1RD7UJAAAAAJ", "orcid": ";;0000-0001-5631-0639;;;;;0000-0003-3103-8442;", "linkedin": ";;;;;;;;", "or_profile": "~Yongliang_Shen1;~Kaitao_Song1;~Xu_Tan1;~Wenqi_Zhang2;~Kan_Ren1;~Siyu_Yuan2;~Weiming_Lu1;~Dongsheng_Li2;~Yueting_Zhuang1", "aff": "Zhejiang University;Microsoft;Microsoft;;ShanghaiTech University;;;Microsoft Research Asia;Zhejiang University", "aff_domain": "zju.edu.cn;microsoft.com;microsoft.com;;shanghaitech.edu.cn;;;microsoft.com;zju.edu.cn", "position": "Assistant Professor;Researcher;Principal Researcher;;Assistant Professor;;;Principal Researcher;Full Professor", "bibtex": "@misc{\nshen2024taskbench,\ntitle={TaskBench: Benchmarking Large Language Models for Task Automation},\nauthor={Yongliang Shen and Kaitao Song and Xu Tan and Wenqi Zhang and Kan Ren and Siyu Yuan and Weiming Lu and Dongsheng Li and Yueting Zhuang},\nyear={2024},\nurl={https://openreview.net/forum?id=70xhiS0AQS}\n}", "github": "", "project": "", "reviewers": "CSek;peHw;LVvj;BEYM", "site": "https://openreview.net/forum?id=70xhiS0AQS", "pdf_size": 840721, "rating": "3;5;5;6", "confidence": "4;4;4;4", "soundness": "1;2;3;3", "contribution": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "68;70;204;41", "wc_strengths": "68;82;135;26", "wc_weaknesses": "251;227;115;207", "wc_questions": "49;40;32;13", "wc_review": "436;419;486;287", "wc_reply_reviewers": "171;43;0;23", "wc_reply_authors": "1597;628;368;1037", "reply_reviewers": "2;1;0;1", "reply_authors": "6;3;1;4", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.75, 63.53886605849998 ], "wc_strengths_avg": [ 77.75, 38.95109112720721 ], "wc_weaknesses_avg": [ 200.0, 51.487862647424 ], "wc_questions_avg": [ 33.5, 13.275918047351754 ], "wc_review_avg": [ 407.0, 73.5289058806127 ], "wc_reply_reviewers_avg": [ 59.25, 66.28866796067032 ], "wc_reply_authors_avg": [ 907.5, 464.04768073981364 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 1.8027756377319946 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6584401630894271152&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;2;1;0", "aff_unique_norm": "Zhejiang University;Microsoft;ShanghaiTech University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.zju.edu.cn;https://www.microsoft.com;https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ZJU;Microsoft;ShanghaiTech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "71kocBuhNO", "title": "LogicBench: Towards Systematic Evaluation of Logical Reasoning Ability of Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently developed large language models (LLMs) have been shown to perform remarkably well on a wide range of language understanding tasks. But, can they really \"reason\" over the natural language? This question has been receiving significant research attention and a number of reasoning skills such as commonsense, numerical, and qualitative have been studied. However, the crucial skill pertaining to 'logical reasoning' has remained underexplored. Existing work investigating this reasoning ability has focused only on a couple of inference rules (such as modus ponens and modus tollens) of propositional and first-order logic. To enable systematic evaluation of logical reasoning, we introduce LogicBench, a natural language question-answering dataset encompassing 25 different reasoning patterns spanning over propositional, first-order, and non-monotonic logics. Key steps of our dataset construction consist of (1) controlled generation of sentences and their negations containing different ontologies, (2) (context, question, answer) triplets creation using heuristically designed templates, and (3) semantic variations of triplets adding more diversity. We present a comprehensive evaluation with a range of LLMs such as GPT-4, GPT-3, ChatGPT, and FLAN-T5 using chain-of-thought prompting in both zero-shot and few-shot settings. Experimental results show that existing LLMs do not fare well on LogicBench; especially, they struggle on instances requiring complex reasoning steps. Furthermore, we also show that LLMs trained using our data exhibit a better understanding of logical reasoning leading to performance improvements on several existing logical reasoning datasets such as LogicNLI, FOLIO, LogiQA, and ReClor.", "keywords": "Logical Reasoning;Large Language Models;Prompting", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Mihir Parmar;Neeraj Varshney;Nisarg Patel;Man Luo;Santosh Mashetty;Arindam Mitra;Chitta Baral", "authorids": "~Mihir_Parmar1;~Neeraj_Varshney1;~Nisarg_Patel1;~Man_Luo2;~Santosh_Mashetty1;~Arindam_Mitra1;~Chitta_Baral1", "gender": "M;M;;;M;M;M", "homepage": ";https://nrjvarshney.github.io/;;;;https://ari9dam.github.io/;http://chitta.orissalinks.com", "dblp": "253/6105;139/3970;304/0895;;;04/2864;b/ChittaBaral", "google_scholar": "2UPwJC4AAAAJ;Ju9nR0IAAAAJ;https://scholar.google.com/citations?hl=en;;eQN-aNAAAAAJ;https://scholar.google.com/;9Yd716IAAAAJ", "orcid": ";;0000-0001-5964-4204;;;;0000-0002-7549-723X", "linkedin": "mihir-parmar-b44003157/;neerajvarshney97/;https://linkedin.com/in/nisarg-p-patel;;santoshmashetty/;;chitta-baral-8a8438b", "or_profile": "~Mihir_Parmar1;~Neeraj_Varshney1;~Nisarg_Patel1;~Man_Luo2;~Santosh_Mashetty1;~Arindam_Mitra1;~Chitta_Baral1", "aff": "Arizona State University;Arizona State University;Arizona State University;;Arizona State University;Microsoft Research;Arizona State University", "aff_domain": "asu.edu;asu.edu;asu.edu;;asu.edu;research.microsoft.com;asu.edu", "position": "PhD student;PhD student;MS student;;PhD student;Researcher;Full Professor", "bibtex": "@misc{\nparmar2024logicbench,\ntitle={LogicBench: Towards Systematic Evaluation of Logical Reasoning Ability of Large Language Models},\nauthor={Mihir Parmar and Neeraj Varshney and Nisarg Patel and Man Luo and Santosh Mashetty and Arindam Mitra and Chitta Baral},\nyear={2024},\nurl={https://openreview.net/forum?id=71kocBuhNO}\n}", "github": "", "project": "", "reviewers": "8r5S;sePi;5vLY;Z2AS;1d9G", "site": "https://openreview.net/forum?id=71kocBuhNO", "pdf_size": 1400306, "rating": "5;5;5;6;6", "confidence": "2;2;4;5;5", "soundness": "3;3;1;4;3", "contribution": "2;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "102;92;112;74;66", "wc_strengths": "52;87;196;94;57", "wc_weaknesses": "74;123;290;86;219", "wc_questions": "58;1;199;538;100", "wc_review": "286;303;797;792;442", "wc_reply_reviewers": "0;32;632;0;0", "wc_reply_authors": "325;389;2371;592;646", "reply_reviewers": "0;1;2;0;0", "reply_authors": "1;1;5;2;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 1.3564659966250538 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.2, 17.092688495377196 ], "wc_strengths_avg": [ 97.2, 52.02845375369135 ], "wc_weaknesses_avg": [ 158.4, 83.19759611911873 ], "wc_questions_avg": [ 179.2, 190.7075247597745 ], "wc_review_avg": [ 524.0, 227.4036059520605 ], "wc_reply_reviewers_avg": [ 132.8, 249.90750288856879 ], "wc_reply_authors_avg": [ 864.6, 762.7189783924351 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8427009716003842, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12451407874556115043&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Arizona State University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.asu.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "ASU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "71mqtQdKB9", "title": "Discrete Diffusion Language Modeling by Estimating the Ratios of the Data Distribution", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite their groundbreaking performance for many generative modeling tasks, diffusion models have fallen short on discrete data domains such as natural language. Crucially, standard diffusion models rely on the well-established theory of score matching, but efforts to generalize this to discrete structures have not yielded the same empirical gains. In this work, we bridge this gap by proposing score entropy, a novel discrete score matching loss that is more stable than existing methods, forms an ELBO for maximum likelihood training, and can be efficiently optimized with a denoising variant. Combined with architectural improvements, we scale to the GPT-2 language modeling experiments, achieving highly competitive performance. When comparing similarly sized-architectures, our score entropy discrete diffusion model attains comparable zero-shot perplexities despite reporting an upper bound (within $15$ percent of and sometimes outperforming GPT-2), can trade off speed for generation quality ($4\\times$ lower generative perplexity when matching function evaluations and $16\\times$ fewer function evaluations when matching generative perplexity compared to standard autoregressive sampling), and enables arbitrary infilling beyond standard autoregressive left to right prompting.", "keywords": "Diffusion Models;Discrete Diffusion Models;Language Modeling;Transformers", "primary_area": "generative models", "supplementary_material": "", "author": "Aaron Lou;Chenlin Meng;Stefano Ermon", "authorids": "~Aaron_Lou1;~Chenlin_Meng1;~Stefano_Ermon1", "gender": "M;F;M", "homepage": "https://aaronlou.com;https://chenlin9.github.io/;http://cs.stanford.edu/~ermon/", "dblp": "232/3858;227/2517;47/8135", "google_scholar": ";nEFU7wIAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Aaron_Lou1;~Chenlin_Meng1;~Stefano_Ermon1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nlou2024discrete,\ntitle={Discrete Diffusion Language Modeling by Estimating the Ratios of the Data Distribution},\nauthor={Aaron Lou and Chenlin Meng and Stefano Ermon},\nyear={2024},\nurl={https://openreview.net/forum?id=71mqtQdKB9}\n}", "github": "", "project": "", "reviewers": "XAVS;DQWg;dGxc;4UNx;LEE7", "site": "https://openreview.net/forum?id=71mqtQdKB9", "pdf_size": 2383382, "rating": "5;6;6;8;8", "confidence": "3;3;4;4;4", "soundness": "3;2;3;3;4", "contribution": "2;3;3;3;4", "presentation": "2;2;3;3;3", "wc_summary": "84;94;71;219;178", "wc_strengths": "78;139;18;155;35", "wc_weaknesses": "136;188;135;895;233", "wc_questions": "108;135;128;325;109", "wc_review": "406;556;352;1594;555", "wc_reply_reviewers": "0;22;361;876;0", "wc_reply_authors": "599;790;1957;1690;222", "reply_reviewers": "0;1;4;2;0", "reply_authors": "2;3;4;4;1", "rating_avg": [ 6.6, 1.2 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "contribution_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 129.2, 58.506068061355826 ], "wc_strengths_avg": [ 85.0, 54.50504563799576 ], "wc_weaknesses_avg": [ 317.4, 291.0894020743455 ], "wc_questions_avg": [ 161.0, 82.67284923117141 ], "wc_review_avg": [ 692.6, 457.87841180820044 ], "wc_reply_reviewers_avg": [ 251.8, 340.9295528404659 ], "wc_reply_authors_avg": [ 1051.6, 661.6375442793433 ], "reply_reviewers_avg": [ 1.4, 1.4966629547095764 ], "reply_authors_avg": [ 2.8, 1.16619037896906 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.748455199183749, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9661892450312922778&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "71oyMJiUm2", "title": "TransFace: Unit-Based Audio-Visual Speech Synthesizer for Talking Head Translation", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "Direct speech-to-speech translation achieves high-quality results through the introduction of discrete units obtained from self-supervised learning. This approach circumvents delays and cascading errors associated with model cascading. However, talking head translation, converting audio-visual speech (i.e., talking head video) from one language into another, still confronts several challenges compared to audio speech: (1) Existing methods invariably rely on cascading, synthesizing via both audio and text, resulting in delays and cascading errors. (2) Talking head translation has a limited set of reference frames. If the generated translation exceeds the length of the original speech, the video sequence needs to be supplemented by repeating frames, leading to jarring video transitions. In this work, we propose a model for talking head translation, \\textbf{TransFace}, which can directly translate audio-visual speech into audio-visual speech in other languages. It consists of a speech-to-unit translation model to convert audio speech into discrete units and a unit-based audio-visual speech synthesizer, Unit2Lip, to re-synthesize synchronized audio-visual speech from discrete units in parallel. Furthermore, we introduce a Bounded Duration Predictor, ensuring isometric talking head translation and preventing duplicate reference frames. Experiments demonstrate that our proposed Unit2Lip model significantly improves synchronization (1.601 and 0.982 on LSE-C for the original and generated audio speech, respectively) and boosts inference speed by a factor of $\\times$4.35 on LRS2. Additionally, TransFace achieves impressive BLEU scores of 61.93 and 47.55 for Es-En and Fr-En on LRS3-T and 100\\% isochronous translations.", "keywords": "Speech-To-Speech Translation;Talking Head Generation", "primary_area": "generative models", "supplementary_material": "/attachment/7fba415b085f897824957ea167e5279f8b4847b3.pdf", "author": "Xize Cheng;Rongjie Huang;Linjun Li;Tao Jin;Zehan Wang;Aoxiong Yin;Minglei Li;Xinyu Duan;changpeng yang;Zhou Zhao", "authorids": "~Xize_Cheng1;~Rongjie_Huang1;~Linjun_Li2;~Tao_Jin2;~Zehan_Wang2;~Aoxiong_Yin1;~Minglei_Li1;~Xinyu_Duan1;~changpeng_yang1;~Zhou_Zhao3", "gender": "M;M;;M;M;;M;M;M;", "homepage": "https://exgc.github.io/;;;https://hugddygff.github.io/;https://github.com/12zehan17;;;;https://www.researchgate.net/profile/Changpeng-Yang-2;", "dblp": "334/2167;212/8936-1;;88/4850-4.html;126/7826-1;;136/7341-1.html;31/5936;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;iRHBUsgAAAAJ;;;euXK0lkAAAAJ;;KqU6kVcAAAAJ;Z1XYinwAAAAJ;;", "orcid": "0000-0001-9708-3225;;;0000-0003-3564-1628;0009-0007-7509-7563;;0000-0002-1427-3507;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Xize_Cheng1;~Rongjie_Huang1;~Linjun_Li2;~Tao_Jin2;~Zehan_Wang2;~Aoxiong_Yin1;~Minglei_Li1;~Xinyu_Duan1;~changpeng_yang1;~Zhou_Zhao3", "aff": "Zhejiang University;Zhejiang University;;Zhejiang University;Zhejiang University;;Huawei Cloud Computing Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;", "aff_domain": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;;huawei.com;huawei.com;huawei.com;", "position": "PhD student;MS student;;Assistant Professor;PhD student;;Researcher;Researcher;Director;", "bibtex": "@misc{\ncheng2024transface,\ntitle={TransFace: Unit-Based Audio-Visual Speech Synthesizer for Talking Head Translation},\nauthor={Xize Cheng and Rongjie Huang and Linjun Li and Tao Jin and Zehan Wang and Aoxiong Yin and Minglei Li and Xinyu Duan and changpeng yang and Zhou Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=71oyMJiUm2}\n}", "github": "", "project": "", "reviewers": "hwnr;Qjzu;hZGE;d3Xc", "site": "https://openreview.net/forum?id=71oyMJiUm2", "pdf_size": 2356840, "rating": "5;6;6;8", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "61;68;104;196", "wc_strengths": "45;64;38;124", "wc_weaknesses": "132;96;98;259", "wc_questions": "23;206;57;28", "wc_review": "261;434;297;607", "wc_reply_reviewers": "634;20;23;172", "wc_reply_authors": "3361;1141;885;661", "reply_reviewers": "3;1;1;1", "reply_authors": "10;5;3;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 107.25, 53.774413060488165 ], "wc_strengths_avg": [ 67.75, 33.840619084171614 ], "wc_weaknesses_avg": [ 146.25, 66.64973743384141 ], "wc_questions_avg": [ 78.5, 74.74790966976936 ], "wc_review_avg": [ 399.75, 135.95472592006502 ], "wc_reply_reviewers_avg": [ 212.25, 251.13181299867207 ], "wc_reply_authors_avg": [ 1512.0, 1080.9454195286642 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 5.0, 3.082207001484488 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17335056964216082744&as_sdt=5,28&sciodt=0,28&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;1;1;1", "aff_unique_norm": "Zhejiang University;Huawei", "aff_unique_dep": ";Cloud Computing Technologies", "aff_unique_url": "https://www.zju.edu.cn;https://www.huawei.com/en/cloud", "aff_unique_abbr": "ZJU;Huawei Cloud", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "71yRyuNYPu", "title": "LLMSelect: Knowledge-based Feature Selection with Large Language Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "How can we leverage the implicit prior knowledge and reasoning capabilities of large language models (LLMs) for standard supervised learning tasks? In this work, we demonstrate that pretrained LLMs can be used to augment traditional machine learning models by selecting high-signal features without looking at the training data. Providing only the candidate feature names and a minimal description of the prediction task, we prompt the LLM to directly output a set of numerical feature importance scores in text and use them for feature selection. In a series of real-world prediction tasks, we show that LLM-based feature selection can lead to strong downstream predictive performance, competitive with that achieved with standard selection methods such as the LASSO and sequential feature selection. We investigate the sensitivity of this approach to various prompt-design and sampling strategies and to the scale of the pretrained LLM, and find that the simple setting of zero-shot prompting with zero-temperature sampling can be sufficient for strong downstream performance, given a large enough LLM. We also demonstrate that the LLM-generated feature importance scores exhibit nontrivial rank correlation with commonly used feature importance measures such as Shapley values, which illustrate the capabilities of LLMs to effectively distill prior knowledge into meaningful numerical scores.", "keywords": "Large Language Models;Feature Selection;Machine Learning with Prior Knowledge", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Daniel P Jeong;Zachary Chase Lipton;Pradeep Kumar Ravikumar", "authorids": "~Daniel_P_Jeong1;~Zachary_Chase_Lipton1;~Pradeep_Kumar_Ravikumar1", "gender": ";Unspecified;M", "homepage": "http://djeong.com;http://zacklipton.com;http://www.cs.cmu.edu/~pradeepr/", "dblp": "145/9964;;94/3594", "google_scholar": ";MN9Kfg8AAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Daniel_P_Jeong1;~Zachary_Chase_Lipton1;~Pradeep_Kumar_Ravikumar1", "aff": "Machine Learning Department, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\njeong2024llmselect,\ntitle={{LLMS}elect: Knowledge-based Feature Selection with Large Language Models},\nauthor={Daniel P Jeong and Zachary Chase Lipton and Pradeep Kumar Ravikumar},\nyear={2024},\nurl={https://openreview.net/forum?id=71yRyuNYPu}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=71yRyuNYPu", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:19KcixoO1aMJ:scholar.google.com/&scioq=LLMSelect:+Knowledge-based+Feature+Selection+with+Large+Language+Models&hl=en&as_sdt=0,21", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Machine Learning Department", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "72MSbSZtHv", "title": "RedMotion: Motion Prediction via Redundancy Reduction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Predicting the future motion of traffic agents is vital for self-driving vehicles to ensure their safe operation.\nWe introduce RedMotion, a transformer model for motion prediction that incorporates two types of redundancy reduction.\nThe first type of redundancy reduction is induced by an internal transformer decoder and reduces a variable-sized set of road environment tokens, such as road graphs with agent data, to a fixed-sized embedding.\nThe second type of redundancy reduction is a self-supervised learning objective and applies the redundancy reduction principle to embeddings generated from augmented views of road environments.\nOur experiments reveal that our representation learning approach can outperform PreTraM, Traj-MAE, and GraphDINO in a semi-supervised setting.\nOur RedMotion model achieves results that are competitive with those of Scene Transformer or MTR++.\nWe provide an anonymized open source implementation that is accessible via Colab: https://colab.research.google.com/drive/16pwsmOTYdPpbNWf2nm1olXcx1ZmsXHB8", "keywords": "Motion prediction;self-supervised learning;trajectory forecasting;self-driving", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "", "author": "Royden Wagner;Omer Sahin Tas;Marvin Klemp;Carlos Fernandez", "authorids": "~Royden_Wagner1;~Omer_Sahin_Tas1;~Marvin_Klemp1;~Carlos_Fernandez1", "gender": "M;;M;", "homepage": ";https://www.omersahintas.com;https://www.linkedin.com/in/marvin-klemp-371428201/;https://www.mrt.kit.edu", "dblp": "318/1159;122/3330.html;;79/11147.html", "google_scholar": "tvltjqQAAAAJ;https://scholar.google.de/citations?user=opaVrnQAAAAJ;ZOCUUfoAAAAJ;OIF2_EMAAAAJ", "orcid": ";;;0000-0002-0417-6762", "linkedin": "royden-wagner-35843919b;;;", "or_profile": "~Royden_Wagner1;~Omer_Sahin_Tas1;~Marvin_Klemp1;~Carlos_Fernandez1", "aff": "Karlsruhe Institute of Technology;FZI Research Center for Information Technology;Karlsruhe Institute of Technology;Karlsruher Institut f\u00fcr Technologie", "aff_domain": "kit.edu;fzi.de;kit.edu;kit.edu", "position": "PhD student;Research Scientist;PhD student;Postdoc", "bibtex": "@misc{\nwagner2024redmotion,\ntitle={RedMotion: Motion Prediction via Redundancy Reduction},\nauthor={Royden Wagner and Omer Sahin Tas and Marvin Klemp and Carlos Fernandez},\nyear={2024},\nurl={https://openreview.net/forum?id=72MSbSZtHv}\n}", "github": "", "project": "", "reviewers": "NoCd;4aEu;1Zub", "site": "https://openreview.net/forum?id=72MSbSZtHv", "pdf_size": 2670915, "rating": "3;5;8", "confidence": "5;2;4", "soundness": "1;2;3", "contribution": "1;2;3", "presentation": "2;3;4", "wc_summary": "24;111;95", "wc_strengths": "16;120;90", "wc_weaknesses": "206;623;155", "wc_questions": "62;111;17", "wc_review": "308;965;357", "wc_reply_reviewers": "136;248;0", "wc_reply_authors": "438;1189;200", "reply_reviewers": "1;1;0", "reply_authors": "2;3;1", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 76.66666666666667, 37.80946383586463 ], "wc_strengths_avg": [ 75.33333333333333, 43.70608907489004 ], "wc_weaknesses_avg": [ 328.0, 209.63301266737545 ], "wc_questions_avg": [ 63.333333333333336, 38.38691906829142 ], "wc_review_avg": [ 543.3333333333334, 298.8336586724386 ], "wc_reply_reviewers_avg": [ 128.0, 101.40348448976823 ], "wc_reply_authors_avg": [ 609.0, 421.4743962172158 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.21677749238103003, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4906014090893876145&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Karlsruhe Institute of Technology;FZI Research Center for Information Technology;Karlsruher Institut f\u00fcr Technologie", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kit.edu;https://www.fzi.de;https://www.kit.edu", "aff_unique_abbr": "KIT;FZI;KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "73dhbcXxtV", "title": "LOLAMEME: LOGIC, LANGUAGE, MEMORY, MECHANISTIC FRAMEWORK", "track": "main", "status": "Reject", "tldr": "", "abstract": "The performance of Large Language Models have achieved superhuman breadth\nwith unprecedented depth. At the same time, the language models are mostly\nblack box models and the underlying mechanisms for performance have been\nevaluated using synthetic or mechanistic schemes. We extend current mechanis-\ntic schemes to incorporate Logic, memory, and nuances of Language such as la-\ntent structure. The proposed framework is called LOLAMEME and we provide\ntwo instantiations of LOLAMEME: LoLa and MeMe languages. We then con-\nsider two generative language model architectures: transformer-based GPT-2 and\nconvolution-based Hyena. We propose the hybrid architecture T HEX and use LO-\nLAMEME framework is used to compare three architectures. T HEX outperforms\nGPT-2 and Hyena on select tasks.", "keywords": "LOGIC;LANGUAGE;MEMORY;MECHANISTIC;FRAMEWORK;LLM;GENERATIVE;AI", "primary_area": "generative models", "supplementary_material": "", "author": "Jay Desai;Xiaobo Guo;Srinivasan H. Sengamedu", "authorids": "~Jay_Desai1;~Xiaobo_Guo1;~Srinivasan_H._Sengamedu1", "gender": "M;M;", "homepage": ";;", "dblp": ";14/2107;38/2372", "google_scholar": "https://scholar.google.com/citations?hl=en;z9rwAaIAAAAJ;X9fVMRUAAAAJ", "orcid": ";0000-0002-6817-626X;0000-0003-1847-8398", "linkedin": "djaym7/;;srinivasan-h-sengamedu", "or_profile": "~Jay_Desai1;~Xiaobo_Guo1;~Srinivasan_H._Sengamedu1", "aff": "Amazon;Dartmouth College;Amazon", "aff_domain": "amazon.com;dartmouth.edu;amazon.com", "position": "Researcher;PhD student;Applied Science Manager", "bibtex": "@misc{\ndesai2024lolameme,\ntitle={{LOLAMEME}: {LOGIC}, {LANGUAGE}, {MEMORY}, {MECHANISTIC} {FRAMEWORK}},\nauthor={Jay Desai and Xiaobo Guo and Srinivasan H. Sengamedu},\nyear={2024},\nurl={https://openreview.net/forum?id=73dhbcXxtV}\n}", "github": "", "project": "", "reviewers": "JYqN;5Z2e;iHzK", "site": "https://openreview.net/forum?id=73dhbcXxtV", "pdf_size": 283801, "rating": "3;3;3", "confidence": "3;3;3", "soundness": "2;1;2", "contribution": "2;1;2", "presentation": "1;1;2", "wc_summary": "55;36;107", "wc_strengths": "56;1;21", "wc_weaknesses": "120;77;291", "wc_questions": "31;1;4", "wc_review": "262;115;423", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "263;207;654", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 66.0, 30.011109054259666 ], "wc_strengths_avg": [ 26.0, 22.73030282830976 ], "wc_weaknesses_avg": [ 162.66666666666666, 92.42774956088073 ], "wc_questions_avg": [ 12.0, 13.490737563232042 ], "wc_review_avg": [ 266.6666666666667, 125.78376507147318 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 374.6666666666667, 198.83717515148473 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:o2kYI_rQgecJ:scholar.google.com/&scioq=LOLAMEME:+LOGIC,+LANGUAGE,+MEMORY,+MECHANISTIC+FRAMEWORK&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Amazon;Dartmouth College", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.dartmouth.edu", "aff_unique_abbr": "Amazon;Dartmouth", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "73lu1yw6At", "title": "Complexity of Formal Explainability for Sequential Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "This work contributes to formal explainability in AI (FXAI) for sequential\n models, including\n Recurrent Neural Networks (RNN), Transformers, and automata models from\n formal\nlanguage theory (e.g. finite-state automata). We study two common notions\n of explainability in FXAI: (1) abductive explanations (a.k.a. minimum sufficient \n reasons), and (2) counterfactual (a.k.a. contrastive) explanations.\n To account for various forms of sequential data (e.g. texts, time series,\n and videos), our models take a sequence of rational numbers as\n input. \n We first observe that\n simple RNN and Transformers suffer from NP-hard complexity (or sometimes \nundecidability) for both types of explanations. The works on extraction of \nautomata from RNN hinge on the assumption that automata are more interpretable\n than RNN. Interestingly, it turns out that generating abductive explanations\n for DFA is computationally intractable (PSPACE-complete), for features that\n are represented by regular languages. On the positive side, \n we show that deterministic finite automata (DFA) admit polynomial-time \n complexity for counterfactual explanations. \n However, DFA are a highly inexpressive model\n for classifying sequences of numbers. To address this limitation,\nwe provide two expressive extensions of finite automata, while preserving \n PTIME explainability and admitting automata learning algorithms: (1) \n deterministic interval automata, and (2) \n deterministic register automata with a fixed number of registers.", "keywords": "Logic-based explanation;sequential models;Computational Complexity;RNN;Automata;Transformers", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "", "author": "Anthony Widjaja Lin;Micha Schrader;Marvin K\u00fcnnemann;Pravriti Jaipuriyar", "authorids": "~Anthony_Widjaja_Lin1;~Micha_Schrader1;~Marvin_K\u00fcnnemann1;~Pravriti_Jaipuriyar1", "gender": "M;M;;", "homepage": "https://anthonywlin.github.io/;https://github.com/MichaSchrader;;https://github.com/jaipuriyarp", "dblp": "38/2655;309/6099;;", "google_scholar": "https://scholar.google.co.uk/citations?user=__5nnYUAAAAJ;;;", "orcid": "0000-0003-4715-5096;;;0000-0002-8306-9251", "linkedin": ";;;pravriti-jaipuriyar-08a271102", "or_profile": "~Anthony_Widjaja_Lin1;~Micha_Schrader1;~Marvin_K\u00fcnnemann1;~Pravriti_Jaipuriyar1", "aff": "Universit\u00e4t Kaiserslautern;Universit\u00e4t Kaiserslautern;;Universit\u00e4t Kaiserslautern", "aff_domain": "uni-kl.de;uni-kl.de;;uni-kl.de", "position": "Full Professor;MS student;;MS student", "bibtex": "@misc{\nlin2024complexity,\ntitle={Complexity of Formal Explainability for Sequential Models},\nauthor={Anthony Widjaja Lin and Micha Schrader and Marvin K{\\\"u}nnemann and Pravriti Jaipuriyar},\nyear={2024},\nurl={https://openreview.net/forum?id=73lu1yw6At}\n}", "github": "", "project": "", "reviewers": "XaKE;ynBt;rsdT;hXKh;56Yr", "site": "https://openreview.net/forum?id=73lu1yw6At", "pdf_size": 412897, "rating": "5;6;6;6;6", "confidence": "4;4;3;4;4", "soundness": "3;4;3;4;3", "contribution": "2;2;2;2;2", "presentation": "2;4;3;4;3", "wc_summary": "94;51;140;59;337", "wc_strengths": "71;99;68;4;115", "wc_weaknesses": "248;104;260;217;193", "wc_questions": "113;46;35;31;346", "wc_review": "526;300;503;311;991", "wc_reply_reviewers": "263;86;0;0;238", "wc_reply_authors": "918;691;374;799;992", "reply_reviewers": "1;1;0;0;2", "reply_authors": "2;1;1;1;3", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 136.2, 105.19961977117599 ], "wc_strengths_avg": [ 71.4, 37.992630864418956 ], "wc_weaknesses_avg": [ 204.4, 55.42779086342879 ], "wc_questions_avg": [ 114.2, 119.64848515547533 ], "wc_review_avg": [ 526.2, 250.62114834945592 ], "wc_reply_reviewers_avg": [ 117.4, 113.39770720786201 ], "wc_reply_authors_avg": [ 754.8, 216.30108645127052 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13966694831767173242&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Kaiserslautern", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-kl.de", "aff_unique_abbr": "Uni KL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "74IIsh2kM6", "title": "SMILE: Audio-Visual Speech Recognition with Siamese Masked Interaction Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Audio-Visual Speech Recognition (AVSR) aims to improve the performance of Automatic Speech Recognition (ASR) by incorporating visual cues in addition to audio information. In this task, the crucial aspect is establishing temporal correspondence while aligning the mutually complementary nature of audio and visual modalities. To this end, we propose the Siamese Masked Interaction LEarning (SMILE) framework, which combines the multimodal early fusion strategy and representation alignment methods between audio and visual modalities. SMILE facilitates global interactions among audio-visual features and enables single-modal and cross-modal local alignment. In addition, we propose an adaptive dynamic multimodal fusion strategy that effectively captures the complementary relationship between the audio and visual modalities. With extensive experiments, our model SMILE, when tested with different model scales, achieves state-of-the-art performance on LRS2 and LRS3 datasets under both low-resource and high-resource settings.", "keywords": "Audio-Visual Speech Recognition;Siamese Masked Interaction Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/2139c645b74e489b635c4b12a2c34acb8a7c9311.zip", "author": "Tianrui Pan;Jie Liu;Jie Tang;Gangshan Wu", "authorids": "~Tianrui_Pan1;~Jie_Liu10;~Jie_Tang4;~Gangshan_Wu1", "gender": "F;M;M;M", "homepage": "https://github.com/pantianrui;https://njulj.github.io/;http://mcg.nju.edu.cn/member/tangj/index.html;http://mcg.nju.edu.cn/member/gswu/en/index.html", "dblp": ";03/2134-40;181/2702-6;78/1123", "google_scholar": ";oab9IRYAAAAJ;sJvNOBwAAAAJ;", "orcid": ";;;0000-0003-1391-1762", "linkedin": ";;;", "or_profile": "~Tianrui_Pan1;~Jie_Liu10;~Jie_Tang4;~Gangshan_Wu1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@misc{\npan2024smile,\ntitle={{SMILE}: Audio-Visual Speech Recognition with Siamese Masked Interaction Learning},\nauthor={Tianrui Pan and Jie Liu and Jie Tang and Gangshan Wu},\nyear={2024},\nurl={https://openreview.net/forum?id=74IIsh2kM6}\n}", "github": "", "project": "", "reviewers": "reNT;brXn;BvrV;iBsX", "site": "https://openreview.net/forum?id=74IIsh2kM6", "pdf_size": 1556514, "rating": "5;5;5;5", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "contribution": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "55;70;81;61", "wc_strengths": "67;46;55;35", "wc_weaknesses": "112;140;91;85", "wc_questions": "310;38;4;141", "wc_review": "544;294;231;322", "wc_reply_reviewers": "83;30;18;0", "wc_reply_authors": "963;381;102;415", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.75, 9.807522622966516 ], "wc_strengths_avg": [ 50.75, 11.755317945508747 ], "wc_weaknesses_avg": [ 107.0, 21.529050141610984 ], "wc_questions_avg": [ 123.25, 119.03649650422345 ], "wc_review_avg": [ 347.75, 118.0007944888508 ], "wc_reply_reviewers_avg": [ 32.75, 30.914195768287424 ], "wc_reply_authors_avg": [ 465.25, 311.98106913721546 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9Ew6tnKk17IJ:scholar.google.com/&scioq=SMILE:+Audio-Visual+Speech+Recognition+with+Siamese+Masked+Interaction+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "74YdSRFORA", "title": "Out of Sight: A Framework for Egocentric Active Speaker Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Current methods for Active Speaker Detection (ASD) have achieved remarkable performance in commercial movies and social media videos. However, the recent release of the Ego4D dataset has shown the limitations of contemporary ASD\nmethods when applied in the egocentric domain. In addition to the inherent challenges of egocentric data, egocentric video brings a novel prediction target to the ASD task, namely the camera wearer\u2019s speech activity. We propose a comprehensive approach to ASD in the egocentric domain that can model all the prediction targets (visible speakers, camera wearer, and global speech activity). Moreover, our proposal is fully instantiated inside a multimodal transformer module, thereby allowing it to operate in an end-to-end fashion over diverse modality encoders. Through extensive experimentation, we show that this flexible attention mechanism allows us to correctly model and estimate the speech activity of all the visible and unseen persons in a scene. Our proposal (ASD-Mixer) achieves state-\nof-the-art performance in the challenging Ego4D Dataset, outperforming previous state-of-the-art by at last 4.41%.", "keywords": "Audiovisual;multimodal;active speaker", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Juan C Leon Alcazar;Chen Zhao;Bernard Ghanem", "authorids": "~Juan_C_Leon_Alcazar1;~Chen_Zhao3;~Bernard_Ghanem1", "gender": "M;;M", "homepage": ";;https://ivul.kaust.edu.sa", "dblp": ";;37/2516", "google_scholar": "wHJ_cBIAAAAJ;;rVsGTeEAAAAJ", "orcid": ";;0000-0002-5534-587X", "linkedin": ";;bernardghanem/", "or_profile": "~Juan_C_Leon_Alcazar1;~Chen_Zhao3;~Bernard_Ghanem1", "aff": "King Abdullah University of Science and Technology;;King Abdullah University of Science and Technology", "aff_domain": "kaust.edu.sa;;kaust.edu.sa", "position": "Postdoc;;Full Professor", "bibtex": "@misc{\nalcazar2024out,\ntitle={Out of Sight: A Framework for Egocentric Active Speaker Detection},\nauthor={Juan C Leon Alcazar and Chen Zhao and Bernard Ghanem},\nyear={2024},\nurl={https://openreview.net/forum?id=74YdSRFORA}\n}", "github": "", "project": "", "reviewers": "1nZN;2WYw;dygL;kwFT", "site": "https://openreview.net/forum?id=74YdSRFORA", "pdf_size": 3385847, "rating": "1;3;6;6", "confidence": "4;4;3;4", "soundness": "1;3;3;3", "contribution": "1;2;2;2", "presentation": "2;2;4;4", "wc_summary": "110;114;154;88", "wc_strengths": "34;26;38;42", "wc_weaknesses": "331;288;160;32", "wc_questions": "39;54;75;2", "wc_review": "514;482;427;164", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "37;519;519;100", "reply_reviewers": "0;0;0;0", "reply_authors": "1;6;3;2", "rating_avg": [ 4.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 116.5, 23.806511714234826 ], "wc_strengths_avg": [ 35.0, 5.916079783099616 ], "wc_weaknesses_avg": [ 202.75, 116.93881947411647 ], "wc_questions_avg": [ 42.5, 26.650515942472857 ], "wc_review_avg": [ 396.75, 137.9336344043758 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 293.75, 226.34859730071224 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 1.8708286933869707 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:B4p96g2SaAcJ:scholar.google.com/&scioq=Out+of+Sight:+A+Framework+for+Egocentric+Active+Speaker+Detection&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kast.kau.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Saudi Arabia" }, { "id": "760br3YEtY", "title": "($\\texttt{PEEP}$) $\\textbf{P}$redicting $\\textbf{E}$nzym$\\textbf{e}$ $\\textbf{P}$romiscuity with its Molecule Mate \u2013 an Attentive Metric Learning Solution", "track": "main", "status": "Reject", "tldr": "", "abstract": "Annotating the functions of proteins (e.g., enzymes) is a fundamental challenge, due to their diverse functionalities and rapidly increased number of protein sequences in databases. Traditional approaches have limited capability and suffer from false positive predictions. Recent machine learning (ML) methods reach satisfactory prediction accuracy but still fail to generalize, especially for less-studied proteins and those with previously uncharacterized functions or promiscuity. To address these pain points, we propose a novel ML algorithm, PEEP, to predict enzyme promiscuity, which integrates biology priors of protein functionality to regularize the model learning. To be specific, at the input level, PEEP fuses the corresponding molecule into protein embeddings to gain their reaction information; at the model level, a tailored self-attention is leveraged to capture importance residues which we found are aligned with the active site in protein pocket structure; at the objective level, we embed functionality label hierarchy into metric learning objectives by imposing larger distance margin between proteins that have less functionality in common. PEEP is extensively validated on three public benchmarks, achieving up to 4.6%,3.1%,3.7% improvements on F-1 scores compared to existing methods. Moreover, it demonstrates impressive generalization to unseen protein sequences with unseen functionalities. Codes are included in the supplement.", "keywords": "Protein Engineering; Metric Learning;", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/c73442cde6e6b9893134513ccd4a1ebfa1e36ae5.zip", "author": "Xuxi Chen;Daniel Jesus Diaz;Chengyue Gong;Alex Dimakis;Adam Klivans;Zhangyang Wang;Tianlong Chen", "authorids": "~Xuxi_Chen1;~Daniel_Jesus_Diaz1;~Chengyue_Gong1;~Alex_Dimakis1;~Adam_Klivans1;~Zhangyang_Wang1;~Tianlong_Chen1", "gender": "Unspecified;M;M;M;M;M;M", "homepage": ";;;https://people.eecs.berkeley.edu/~alexdimakis/;http://www.cs.utexas.edu/~klivans;https://vita-group.github.io;https://tianlong-chen.github.io", "dblp": "267/9662;;209/4862;19/5000.html;k/AdamRKlivans;119/4026;", "google_scholar": "afsDlKYAAAAJ;lVD0CNEAAAAJ;AscakBgAAAAJ;JSFmVQEAAAAJ;;pxFyKAIAAAAJ;LE3ctn0AAAAJ", "orcid": ";0000-0002-7891-2128;;;;;0000-0001-7774-8197", "linkedin": ";aiproteins/;;alex-dimakis-b1b20320/;;;tianlong-chen-783862167/", "or_profile": "~Xuxi_Chen1;~Daniel_Jesus_Diaz1;~Chengyue_Gong1;~Alex_Dimakis1;~Adam_Klivans1;~Zhangyang_Wang1;~Tianlong_Chen1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;University of Texas, Austin;University of Texas at Austin;Harvard University", "aff_domain": "utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu;harvard.edu", "position": "PhD student;Postdoc;grad student;Full Professor;Professor;Associate Professor;Postdoc", "bibtex": "@misc{\nchen2024textttpeep,\ntitle={(\\${\\textbackslash}texttt\\{{PEEP}\\}\\$) \\${\\textbackslash}textbf\\{P\\}\\$redicting \\${\\textbackslash}textbf\\{E\\}\\$nzym\\${\\textbackslash}textbf\\{e\\}\\$ \\${\\textbackslash}textbf\\{P\\}\\$romiscuity with its Molecule Mate {\\textendash} an Attentive Metric Learning Solution},\nauthor={Xuxi Chen and Daniel Jesus Diaz and Chengyue Gong and Alex Dimakis and Adam Klivans and Zhangyang Wang and Tianlong Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=760br3YEtY}\n}", "github": "", "project": "", "reviewers": "QFnk;sx29;nNj8;siYE;fk1t", "site": "https://openreview.net/forum?id=760br3YEtY", "pdf_size": 1786350, "rating": "5;5;6;6;6", "confidence": "2;4;3;4;3", "soundness": "2;3;3;3;3", "contribution": "2;3;2;3;2", "presentation": "3;3;3;4;3", "wc_summary": "24;103;93;89;85", "wc_strengths": "80;73;47;69;31", "wc_weaknesses": "45;50;39;183;88", "wc_questions": "29;5;17;279;71", "wc_review": "178;231;196;620;275", "wc_reply_reviewers": "0;12;0;0;20", "wc_reply_authors": "345;299;277;1001;454", "reply_reviewers": "0;1;0;0;1", "reply_authors": "2;1;1;3;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 78.8, 28.045677028732964 ], "wc_strengths_avg": [ 60.0, 18.2208671582886 ], "wc_weaknesses_avg": [ 81.0, 53.80334562088124 ], "wc_questions_avg": [ 80.2, 101.85951109248464 ], "wc_review_avg": [ 300.0, 163.3927782981855 ], "wc_reply_reviewers_avg": [ 6.4, 8.2365041127896 ], "wc_reply_authors_avg": [ 475.2, 269.88768034128566 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.21821789023599236, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "University of Texas at Austin;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.harvard.edu", "aff_unique_abbr": "UT Austin;Harvard", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "76gh3RShsM", "title": "Mastering Pixel-Based Reinforcement Learning via Positive Unlabeled Policy-Guided Contrast", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Real-world reinforcement learning has received a significant amount of attention very recently. A fundamental yet challenging problem in this learning paradigm is perceiving real-world environmental information, such that \\textit{pixel-based} reinforcement learning emerges, which aims to learn representation from visual observations for policy optimization. In this article, we profoundly elaborate the frameworks of benchmark methods and demonstrate a long-standing \\textit{paradox} challenging current methods: in different training phases, exploring visual semantic information can improve and prevent the performance of the learned feature representations from improving. In practice, we further disclose that the over-redundancy issue generally halts the rise of sample efficiency among baseline methods. To remedy the uncovered deficiency of existing methods, we introduce a novel plug-and-play method for pixel-based reinforcement learning. Our model involves the \\textit{positive unlabeled policy-guided contrast} to learn jointly anti-redundant and policy-optimization-relevant visual semantic information during training. To sufficiently elucidate the proposed method's innate superiority, we revisit the pixel-based reinforcement learning paradigm from the information theory perspective. The theoretical evidence proves that the proposed model can achieve the tighter lower bound of the mutual information between the policy optimization-related information and the information of the representation derived by the encoder. To carry out the evaluation of our model, we conduct extensive benchmark experiments and illustrate the superior performance of our method over existing methods with respect to the pixel observation environments.", "keywords": "Pixel Observation;Reinforcement Learning;Self-Supervised Learning;Contrastive Learning;Visual Control Task", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/72c7b9ced0ccba72249d24204c2a5a1ccca8ca69.zip", "author": "Zehua Zang;Jiangmeng Li;Chuxiong Sun;Jiabao Li;Rui Wang;Lixiang Liu;Fuchun Sun", "authorids": "~Zehua_Zang1;~Jiangmeng_Li1;~Chuxiong_Sun2;~Jiabao_Li2;~Rui_Wang8;~Lixiang_Liu1;~Fuchun_Sun1", "gender": "M;M;;M;F;M;M", "homepage": "https://github.com/ZangZehua;https://jiangmengli.github.io/;;;;https://people.ucas.ac.cn/~liulx;https://www.cs.tsinghua.edu.cn/info/1121/3555.htm", "dblp": "364/6601;293/0997;214/9412;;06/2293;;", "google_scholar": "PWj9Zk8AAAAJ;https://scholar.google.com.sg/citations?user=-kU4VLcAAAAJ;;;;;", "orcid": "0000-0001-8969-3218;0000-0002-3376-1522;0000-0002-4956-6924;0009-0005-4456-3547;0000-0001-5369-9116;;", "linkedin": ";jiangmeng-li-86aaa7125/;;;;;", "or_profile": "~Zehua_Zang1;~Jiangmeng_Li1;~Chuxiong_Sun2;~Jiabao_Li2;~Rui_Wang8;~Lixiang_Liu1;~Fuchun_Sun1", "aff": "Institute of Software Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;Chinese Academy of Sciences, Institute of Software;University of Science and Technology Beijing;;University of Chinese Academy of Sciences;Tsinghua University", "aff_domain": "iscas.ac.cn;iscas.ac.cn;iscas.ac.cn;ustb.edu.cn;;ucas.ac.cn;cs.tsinghua.edu.cn", "position": "PhD student;Assistant Professor;Postdoc;MS student;;Full Professor;Full Professor", "bibtex": "@misc{\nzang2024mastering,\ntitle={Mastering Pixel-Based Reinforcement Learning via Positive Unlabeled Policy-Guided Contrast},\nauthor={Zehua Zang and Jiangmeng Li and Chuxiong Sun and Jiabao Li and Rui Wang and Lixiang Liu and Fuchun Sun},\nyear={2024},\nurl={https://openreview.net/forum?id=76gh3RShsM}\n}", "github": "", "project": "", "reviewers": "LKxU;E7VW;bVmG;kGtJ", "site": "https://openreview.net/forum?id=76gh3RShsM", "pdf_size": 3764375, "rating": "1;5;5;5", "confidence": "4;4;2;3", "soundness": "1;3;3;2", "contribution": "1;2;2;2", "presentation": "1;3;1;2", "wc_summary": "67;86;62;14", "wc_strengths": "8;37;67;14", "wc_weaknesses": "371;267;90;144", "wc_questions": "5;20;35;6", "wc_review": "451;410;254;178", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.7320508075688772 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 57.25, 26.52710877574109 ], "wc_strengths_avg": [ 31.5, 23.178653972998518 ], "wc_weaknesses_avg": [ 218.0, 109.16730279712877 ], "wc_questions_avg": [ 16.5, 12.216791722870616 ], "wc_review_avg": [ 323.25, 111.51092995756066 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_fwTQk54De0J:scholar.google.com/&scioq=Mastering+Pixel-Based+Reinforcement+Learning+via+Positive+Unlabeled+Policy-Guided+Contrast&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "Chinese Academy of Sciences;University of Science and Technology Beijing;University of Chinese Academy of Sciences;Tsinghua University", "aff_unique_dep": "Institute of Software;;;", "aff_unique_url": "http://www.is.cas.cn;http://www.ustb.edu.cn;http://www.ucas.ac.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CAS;USTB;UCAS;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "RetroBridge: Modeling Retrosynthesis with Markov Bridges", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19375", "id": "770DetV8He", "author_site": "Ilia Igashov, Arne Schneuing, Marwin Segler, Michael Bronstein, Bruno Correia", "tldr": "", "abstract": "Retrosynthesis planning is a fundamental challenge in chemistry which aims at designing multi-step reaction pathways from commercially available starting materials to a target molecule. Each step in multi-step retrosynthesis planning requires accurate prediction of possible precursor molecules given the target molecule and confidence estimates to guide heuristic search algorithms. We model single-step retrosynthesis as a distribution learning problem in a discrete state space. First, we introduce the Markov Bridge Model, a generative framework aimed to approximate the dependency between two intractable discrete distributions accessible via a finite sample of coupled data points. Our framework is based on the concept of a Markov bridge, a Markov process pinned at its endpoints. Unlike diffusion-based methods, our Markov Bridge Model does not need a tractable noise distribution as a sampling proxy and directly operates on the input product molecules as samples from the intractable prior distribution. We then address the retrosynthesis planning problem with our novel framework and introduce RetroBridge, a template-free retrosynthesis modeling approach that achieves state-of-the-art results on standard evaluation benchmarks.", "keywords": "Retrosynthesis;Reactions;Chemistry;Drug Discovery;Markov Bridge", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/5425ef5b68639ce9c53f9c8ea9f651d33a1ddbfe.zip", "author": "Ilia Igashov;Arne Schneuing;Marwin Segler;Michael M. Bronstein;Bruno Correia", "authorids": "~Ilia_Igashov1;~Arne_Schneuing1;~Marwin_Segler2;~Michael_M._Bronstein1;~Bruno_Correia1", "gender": "M;Not Specified;M;M;", "homepage": "https://igashov.github.io;https://people.epfl.ch/arne.schneuing?lang=en;http://www.inf.usi.ch/bronstein/;https://people.epfl.ch/bruno.correia/?lang=en;", "dblp": "278/8797;;07/2668;;185/0993", "google_scholar": "ruubko4AAAAJ;lya44IUAAAAJ;UU3N6-UAAAAJ;https://scholar.google.ch/citations?user=Va246xYAAAAJ;imsL94QAAAAJ", "orcid": "0000-0002-6214-2827;0009-0000-9924-6921;;;", "linkedin": ";;mbronstein/;;", "or_profile": "~Ilia_Igashov1;~Arne_Schneuing1;~Michael_M._Bronstein1;~Bruno_Correia1;~Marwin_Segler1", "aff": "EPFL - EPF Lausanne;Microsoft;University of Oxford;;Microsoft", "aff_domain": "epfl.ch;microsoft.com;ox.ac.uk;;microsoft.com", "position": "PhD student;Intern;Full Professor;;Researcher", "bibtex": "@inproceedings{\nigashov2024retrobridge,\ntitle={RetroBridge: Modeling Retrosynthesis with Markov Bridges},\nauthor={Ilia Igashov and Arne Schneuing and Marwin Segler and Michael M. Bronstein and Bruno Correia},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=770DetV8He}\n}", "github": "", "project": "", "reviewers": "CFGE;vNHE;g93h;4mon", "pdf_size": 4535773, "rating": "6;6;8;8", "confidence": "3;5;4;4", "soundness": "3;3;3;4", "contribution": "2;3;4;3", "presentation": "4;2;3;3", "wc_summary": "25;44;58;91", "wc_strengths": "39;59;53;113", "wc_weaknesses": "113;90;47;69", "wc_questions": "189;55;2;36", "wc_review": "366;248;160;309", "wc_reply_reviewers": "84;0;32;18", "wc_reply_authors": "1532;845;214;469", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;1;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 54.5, 24.109126902482387 ], "wc_strengths_avg": [ 66.0, 28.089143810376278 ], "wc_weaknesses_avg": [ 79.75, 24.488517717493643 ], "wc_questions_avg": [ 70.5, 71.00176054155277 ], "wc_review_avg": [ 270.75, 76.35239027037726 ], "wc_reply_reviewers_avg": [ 33.5, 31.284980421921315 ], "wc_reply_authors_avg": [ 765.0, 496.46399668052464 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8692592874490665610&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=770DetV8He", "pdf": "https://openreview.net/pdf?id=770DetV8He", "email": "epfl.ch;microsoft.com;ox.ac.uk;;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "EPFL;Microsoft;University of Oxford", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.epfl.ch;https://www.microsoft.com;https://www.ox.ac.uk", "aff_unique_abbr": "EPFL;Microsoft;Oxford", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Switzerland;United States;United Kingdom" }, { "id": "774elYc5tw", "title": "Unlocking Anticipatory Text Generation: A Constrained Approach for Faithful Decoding with Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated a powerful ability for text generation. However, achieving optimal results with a given prompt or instruction can be challenging, especially for billion-sized models. Additionally, undesired behaviors such as toxicity or hallucinations can manifest. While much larger models (e.g., ChatGPT) may demonstrate strength in mitigating these issues, there is still no guarantee of complete prevention. In this work, we propose formalizing text generation as a future-constrained generation problem to minimize undesirable behaviors and enforce faithfulness to instructions. The estimation of future constraint satisfaction, accomplished using LLMs, guides the text generation process. Our extensive experiments demonstrate the effectiveness of the proposed approach across three distinct text generation tasks: keyword-constrained generation (Lin et al., 2020), toxicity reduction (Gehman et al., 2020), and factual correctness in question-answering (Gao et al., 2023).", "keywords": "LLM decoding;keyword-constrained generation;toxicity reduction;factual correctness", "primary_area": "generative models", "supplementary_material": "", "author": "Lifu Tu;Semih Yavuz;Jin Qu;Jiacheng Xu;Rui Meng;Caiming Xiong;Yingbo Zhou", "authorids": "~Lifu_Tu1;~Semih_Yavuz1;~Jin_Qu1;~Jiacheng_Xu2;~Rui_Meng1;~Caiming_Xiong1;~Yingbo_Zhou1", "gender": "M;M;M;M;M;;M", "homepage": "http://ttic.uchicago.edu/~lifu/;;https://jiacheng-xu.github.io/;http://memray.me;http://cmxiong.com/;;", "dblp": "176/5280.html;;188/6025;;80/7282;72/8614;", "google_scholar": ";;yfbcnfUAAAAJ;s6h8L_UAAAAJ;vaSdahkAAAAJ;H_6RQ7oAAAAJ;krh3p8AAAAAJ", "orcid": ";;;0000-0001-5583-4924;;;", "linkedin": ";jin-qu/;;memray/;caiming-xiong-150a1417;yingbozhou/;semih-yavuz-4303518b", "or_profile": "~Lifu_Tu1;~Jin_Qu1;~Jiacheng_Xu2;~Rui_Meng1;~Caiming_Xiong1;~Yingbo_Zhou1;~Semih_Yavuz2", "aff": "Salesforce AI Research;Salesforce AI Research;SalesForce.com;Salesforce Research;Salesforce Research;Salesforce Research;SalesForce.com", "aff_domain": "salesforce.com;salesforce.com;salesforce.com;salesforce.com;salesforce.com;salesforce.com;salesforce.com", "position": "research scientist;Researcher;Researcher;Researcher;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@misc{\ntu2024unlocking,\ntitle={Unlocking Anticipatory Text Generation: A Constrained Approach for Faithful Decoding with Large Language Models},\nauthor={Lifu Tu and Semih Yavuz and Jin Qu and Jiacheng Xu and Rui Meng and Caiming Xiong and Yingbo Zhou},\nyear={2024},\nurl={https://openreview.net/forum?id=774elYc5tw}\n}", "github": "", "project": "", "reviewers": "Ht7A;eM49;h9Ue;cGVr", "site": "https://openreview.net/forum?id=774elYc5tw", "pdf_size": 702047, "rating": "3;3;5;6", "confidence": "4;3;4;3", "soundness": "2;2;3;3", "contribution": "2;4;3;2", "presentation": "1;1;3;3", "wc_summary": "111;100;34;94", "wc_strengths": "82;40;88;12", "wc_weaknesses": "258;271;90;25", "wc_questions": "1;12;1;45", "wc_review": "452;423;213;176", "wc_reply_reviewers": "0;49;10;6", "wc_reply_authors": "514;276;327;332", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 84.75, 29.92803869283786 ], "wc_strengths_avg": [ 55.5, 31.188940347501386 ], "wc_weaknesses_avg": [ 161.0, 106.12021485089446 ], "wc_questions_avg": [ 14.75, 18.0329559418305 ], "wc_review_avg": [ 316.0, 122.63156200587188 ], "wc_reply_reviewers_avg": [ 16.25, 19.240257274787155 ], "wc_reply_authors_avg": [ 362.25, 90.31161331744661 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2262832977763853226&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Salesforce", "aff_unique_dep": "Salesforce AI Research", "aff_unique_url": "https://www.salesforce.com", "aff_unique_abbr": "Salesforce AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Exploring the Common Appearance-Boundary Adaptation for Nighttime Optical Flow", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19374", "id": "776lhoaulC", "author_site": "Hanyu Zhou, Yi Chang, Haoyue Liu, YAN WENDING, Yuxing Duan, Zhiwei Shi, Luxin Yan", "tldr": "", "abstract": "We investigate a challenging task of nighttime optical flow, which suffers from weakened texture and amplified noise. These degradations weaken discriminative visual features, thus causing invalid motion feature matching. Typically, existing methods employ domain adaptation to transfer knowledge from auxiliary domain to nighttime domain in either input visual space or output motion space. However, this direct adaptation is ineffective, since there exists a large domain gap due to the intrinsic heterogeneous nature of the feature representations between auxiliary and nighttime domains. To overcome this issue, we explore a common-latent space as the intermediate bridge to reinforce the feature alignment between auxiliary and nighttime domains. In this work, we exploit two auxiliary daytime and event domains, and propose a novel common appearance-boundary adaptation framework for nighttime optical flow. In appearance adaptation, we employ the intrinsic image decomposition to embed the auxiliary daytime image and the nighttime image into a reflectance-aligned common space. We discover that motion distributions of the two reflectance maps are very similar, benefiting us to consistently transfer motion appearance knowledge from daytime to nighttime domain. In boundary adaptation, we theoretically derive the motion correlation formula between nighttime image and accumulated events within a spatiotemporal gradient-aligned common space. We figure out that the correlation of the two spatiotemporal gradient maps shares significant discrepancy, benefitting us to contrastively transfer boundary knowledge from event to nighttime domain. Moreover, appearance adaptation and boundary adaptation are complementary to each other, since they could jointly transfer global motion and local boundary knowledge to the nighttime domain. Extensive experiments have been performed to verify the superiority of the proposed method.", "keywords": "nighttime optical flow;event camera;domain adaptation;common space", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/135190ac7fe30de3e806610e290c0c8d4bd76b47.zip", "author": "Hanyu Zhou;Yi Chang;Haoyue Liu;YAN WENDING;Yuxing Duan;Zhiwei Shi;Luxin Yan", "authorids": "~Hanyu_Zhou1;~Yi_Chang2;~Haoyue_Liu1;~YAN_WENDING1;~Yuxing_Duan1;~Zhiwei_Shi2;~Luxin_Yan2", "gender": "M;M;M;M;M;M;M", "homepage": "https://hyzhouboy.github.io/;https://github.com/Liu-haoyue;;;https://alex-code-hust.github.io/Alex.github.io/;http://faculty.hust.edu.cn/yanluxin/en/index.htm;https://owuchangyuo.github.io/", "dblp": "262/5105;200/2303;262/3827;335/6783;;81/9161;02/5438-2", "google_scholar": "bRXguCgAAAAJ;;;Hn5oJJsAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=I1nZ67YAAAAJ", "orcid": "0009-0007-1986-4975;;0000-0001-5993-8405;;;;0000-0001-8542-5937", "linkedin": "hanyu-zhou-0b67b3274/;;;;louis-alex-2530a4287/;;", "or_profile": "~Hanyu_Zhou1;~Haoyue_Liu1;~YAN_WENDING1;~Yuxing_Duan1;~Zhiwei_Shi2;~Luxin_Yan2;~Yi_Chang3", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huawei Technologies Ltd.;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;huawei.com;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "PhD student;PhD student;Researcher;PhD student;MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhou2024exploring,\ntitle={Exploring the Common Appearance-Boundary Adaptation for Nighttime Optical Flow},\nauthor={Hanyu Zhou and Yi Chang and Haoyue Liu and YAN WENDING and Yuxing Duan and Zhiwei Shi and Luxin Yan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=776lhoaulC}\n}", "github": "", "project": "", "reviewers": "koka;hzUU;R9ad", "pdf_size": 20187515, "rating": "6;8;10", "confidence": "4;2;3", "soundness": "3;3;3", "contribution": "3;3;4", "presentation": "3;4;4", "wc_summary": "97;63;87", "wc_strengths": "50;43;77", "wc_weaknesses": "57;23;43", "wc_questions": "74;13;6", "wc_review": "278;142;213", "wc_reply_reviewers": "0;0;19", "wc_reply_authors": "1335;313;321", "reply_reviewers": "0;0;1", "reply_authors": "4;3;4", "rating_avg": [ 8.0, 1.632993161855452 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 82.33333333333333, 14.2672897060218 ], "wc_strengths_avg": [ 56.666666666666664, 14.65908895153068 ], "wc_weaknesses_avg": [ 41.0, 13.9522996909709 ], "wc_questions_avg": [ 31.0, 30.539591789456953 ], "wc_review_avg": [ 211.0, 55.53977553669682 ], "wc_reply_reviewers_avg": [ 6.333333333333333, 8.956685895029603 ], "wc_reply_authors_avg": [ 656.3333333333334, 479.9009156991565 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8075682768590297256&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=776lhoaulC", "pdf": "https://openreview.net/pdf?id=776lhoaulC", "email": "hust.edu.cn;hust.edu.cn;huawei.com;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.hust.edu.cn;https://www.huawei.com", "aff_unique_abbr": "HUST;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "77N93tc3o5", "title": "Deep Independent Vector Analysis", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce a deep multivariate latent variable model, Deep Independent Vector Analysis (DeepIVA), for learning linked and identifiable disentangled representations across multiple data modalities by unifying multidataset independent subspace analysis (MISA) and identifiable variational autoencoders (iVAE). DeepIVA aims to leverage hidden linkage information via the MISA loss to attain latent cross-modal alignment while leveraging the identifiability properties of the iVAE to ensure proper unimodal disentanglement. We propose a more strict set of performance measures, and demonstrate that DeepIVA can successfully recover nonlinearly mixed multimodal sources on multiple linked synthetic datasets compared with iVAE and MISA. We then apply DeepIVA on a large multimodal neuroimaging dataset, and show that DeepIVA can reveal linked nonlinear imaging sources associated with phenotype measures including age and sex.", "keywords": "multimodal fusion;nonlinear IVA;MISA;iVAE", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/c0e99dfac4f06706f29dd4a4b13a869df51ec49d.pdf", "author": "Xinhui Li;Rogers F Silva;Vince Calhoun", "authorids": "~Xinhui_Li2;~Rogers_F_Silva1;~Vince_Calhoun1", "gender": "F;M;", "homepage": "https://xinhuili.github.io/;https://trendscenter.org/rogers-silva/;", "dblp": ";154/4305;48/3821.html", "google_scholar": "YKtWorEAAAAJ;cMtwwG8AAAAJ;WNOoGKIAAAAJ", "orcid": "0000-0001-5604-9493;0000-0002-7271-1288;", "linkedin": "xinhui-li/;rogersfsilva;", "or_profile": "~Xinhui_Li2;~Rogers_F_Silva1;~Vince_Calhoun1", "aff": "Georgia Institute of Technology;TReNDS Center (Georgia State University, Georgia Institute of Technology, Emory University);Emory University", "aff_domain": "gatech.edu;gsu.edu;emory.edu", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nli2024deep,\ntitle={Deep Independent Vector Analysis},\nauthor={Xinhui Li and Rogers F Silva and Vince Calhoun},\nyear={2024},\nurl={https://openreview.net/forum?id=77N93tc3o5}\n}", "github": "", "project": "", "reviewers": "Uwfa;FDhN;queK;dLxh", "site": "https://openreview.net/forum?id=77N93tc3o5", "pdf_size": 17338729, "rating": "3;5;5;5", "confidence": "4;2;3;4", "soundness": "2;2;3;1", "contribution": "2;2;2;1", "presentation": "2;3;3;2", "wc_summary": "55;89;70;44", "wc_strengths": "5;70;138;24", "wc_weaknesses": "130;66;160;207", "wc_questions": "37;100;169;556", "wc_review": "227;325;537;831", "wc_reply_reviewers": "46;19;66;784", "wc_reply_authors": "608;312;1133;1506", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;3;4", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 64.5, 16.889345754054535 ], "wc_strengths_avg": [ 59.25, 51.24146270355678 ], "wc_weaknesses_avg": [ 140.75, 51.143792389692806 ], "wc_questions_avg": [ 215.5, 202.055066751616 ], "wc_review_avg": [ 480.0, 231.56208670678367 ], "wc_reply_reviewers_avg": [ 228.75, 321.00730131883296 ], "wc_reply_authors_avg": [ 889.75, 461.5497670890973 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;2", "aff_unique_norm": "Georgia Institute of Technology;Georgia State University;Emory University", "aff_unique_dep": ";TReNDS Center;", "aff_unique_url": "https://www.gatech.edu;https://www.gsu.edu;https://www.emory.edu", "aff_unique_abbr": "Georgia Tech;GSU;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "78Fp8ac3Hi", "title": "Violence Detection and Localization in Video Through Subgroup Analysis", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In an era of rapid technological advancements, computer systems play a crucial role in early Violence Detection (VD) and localization, which is critical for timely human intervention. However, existing VD methods often fall short, lacking applicability to surveillance data, and failing to address the localization and social dimension of violent events.\nTo address these shortcomings, we propose a novel approach to integrate social subgroups into VD. Our method recognizes and tracks subgroups across frames, providing an additional layer of information in VD. This enables the system to not only detect violence at video-level, but also to identify the groups involved. This adaptable add-on module can enhance the applicability of existing models and algorithms.\nThrough extensive experiments on the SCFD and RWF-2000 surveillance datasets, we find that our approach improves social awareness in VD by localizing the people involved in an act of violence. The system offers a small performance boost on the SCFD dataset and maintains performance on RWF-2000, reaching 91.3% and 87.2% accuracy respectively, demonstrating its practical utility while performing close to state-of-the-art methods. Furthermore, our method generalizes well to unseen datasets, marking a promising advance in early VD.", "keywords": "violence detection;violence localization;subgroup analysis;subgroup tracking", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Emmeke A Veltmeijer;Morris E Franken;Charlotte Gerritsen", "authorids": "~Emmeke_A_Veltmeijer1;~Morris_E_Franken1;~Charlotte_Gerritsen1", "gender": "F;;F", "homepage": ";;https://charlottegerritsen.com/", "dblp": "252/8837.html;;", "google_scholar": "Gg3kiW0AAAAJ;;", "orcid": "0000-0002-0749-4520;;", "linkedin": "emmeke-veltmeijer-70730214b/;;", "or_profile": "~Emmeke_A_Veltmeijer1;~Morris_E_Franken1;~Charlotte_Gerritsen1", "aff": "Vrije Universiteit Amsterdam;;Vrije Universiteit Amsterdam", "aff_domain": "vu.nl;;vu.nl", "position": "PhD student;;Associate Professor", "bibtex": "@misc{\nveltmeijer2024violence,\ntitle={Violence Detection and Localization in Video Through Subgroup Analysis},\nauthor={Emmeke A Veltmeijer and Morris E Franken and Charlotte Gerritsen},\nyear={2024},\nurl={https://openreview.net/forum?id=78Fp8ac3Hi}\n}", "github": "", "project": "", "reviewers": "WPpq;n6jP;u3A5;bXAa", "site": "https://openreview.net/forum?id=78Fp8ac3Hi", "pdf_size": 6067759, "rating": "3;3;5;5", "confidence": "4;4;3;4", "soundness": "2;2;2;3", "contribution": "3;2;2;2", "presentation": "2;3;2;3", "wc_summary": "182;68;39;48", "wc_strengths": "37;19;23;34", "wc_weaknesses": "108;180;183;86", "wc_questions": "43;69;105;9", "wc_review": "370;336;350;177", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.25, 57.40372374680932 ], "wc_strengths_avg": [ 28.25, 7.46240577829965 ], "wc_weaknesses_avg": [ 139.25, 42.97310205233036 ], "wc_questions_avg": [ 56.5, 35.16745654721137 ], "wc_review_avg": [ 308.25, 76.73452612742194 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AWz-xtCHgQcJ:scholar.google.com/&scioq=Violence+Detection+and+Localization+in+Video+Through+Subgroup+Analysis&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Vrije Universiteit Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.vu.nl", "aff_unique_abbr": "VU Amsterdam", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "title": "Mirage: Model-agnostic Graph Distillation for Graph Classification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19373", "id": "78iGZdqxYY", "author_site": "Mridul Gupta, Sahil Manchanda, HARIPRASAD KODAMANA, Sayan Ranu", "tldr": "", "abstract": "GNNs, like other deep learning models, are data and computation hungry. There is a pressing need to scale training of GNNs on large datasets to enable their usage on low-resource environments. Graph distillation is an effort in that direction with the aim to construct a smaller synthetic training set from the original training data without significantly compromising model performance. While initial efforts are promising, this work is motivated by two key observations: (1) Existing graph distillation algorithms themselves rely on training with the full dataset, which undermines the very premise of graph distillation. (2) The distillation process is specific to the target GNN architecture and hyper-parameters and thus not robust to changes in the modeling pipeline. We circumvent these limitations by designing a distillation algorithm called MIRAGE for graph classification. MIRAGE is built on the insight that a message-passing GNN decomposes the input graph into a multiset of computation trees. Furthermore, the frequency distribution of computation trees is often skewed in nature, enabling us to condense this data into a concise distilled summary. By compressing the computation data itself, as opposed to emulating gradient flows on the original training set\u2014a prevalent approach to date\u2014MIRAGE transforms into an unsupervised and architecture-agnostic distillation algorithm. Extensive benchmarking on real-world datasets underscores MIRAGE\u2019s superiority, showcasing enhanced generalization accuracy, data compression, and distillation efficiency when compared to state-of-the-art baselines.", "keywords": "graph distillation;graph classification;frequent pattern mining", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Mridul Gupta;Sahil Manchanda;HARIPRASAD KODAMANA;Sayan Ranu", "authorids": "~Mridul_Gupta2;~Sahil_Manchanda1;~HARIPRASAD_KODAMANA1;~Sayan_Ranu2", "gender": "M;M;M;M", "homepage": "https://web.iitd.ac.in/~aiz218322;https://www.cse.iitd.ac.in/~sahilm;https://web.iitd.ac.in/~kodamana/;https://www.cse.iitd.ac.in/~sayan/index.html", "dblp": ";200/8052;;38/768", "google_scholar": "g_cTs3YAAAAJ;OPyjQHwAAAAJ;https://scholar.google.co.in/citations?user=YBcs36wAAAAJ;K4w5qYUAAAAJ", "orcid": "0009-0003-4343-4263;0000-0001-7437-9891;;0000-0003-4147-9372", "linkedin": "mridul1618/;;;", "or_profile": "~Mridul_Gupta2;~Sahil_Manchanda1;~HARIPRASAD_KODAMANA1;~Sayan_Ranu2", "aff": "Indian Institute of Technology, Delhi;Indian Institute of Technology Delhi;Indian Institute of Technology, Delhi;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;iitd.ac.in;iitd.ac.in", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ngupta2024mirage,\ntitle={Mirage: Model-agnostic Graph Distillation for Graph Classification},\nauthor={Mridul Gupta and Sahil Manchanda and HARIPRASAD KODAMANA and Sayan Ranu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=78iGZdqxYY}\n}", "github": "", "project": "", "reviewers": "XL6A;yzvf;pdUR;2KBE", "pdf_size": 2107733, "rating": "6;6;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "contribution": "2;3;4;2", "presentation": "3;3;3;3", "wc_summary": "90;132;63;87", "wc_strengths": "54;73;72;45", "wc_weaknesses": "10;132;164;428", "wc_questions": "67;44;23;5", "wc_review": "221;381;322;565", "wc_reply_reviewers": "0;10;15;11", "wc_reply_authors": "702;1251;1406;2160", "reply_reviewers": "0;1;1;1", "reply_authors": "4;2;3;7", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.0, 24.829418035870273 ], "wc_strengths_avg": [ 61.0, 11.937336386313323 ], "wc_weaknesses_avg": [ 183.5, 152.40980939558975 ], "wc_questions_avg": [ 34.75, 23.177305710543667 ], "wc_review_avg": [ 372.25, 125.13068168918444 ], "wc_reply_reviewers_avg": [ 9.0, 5.522680508593631 ], "wc_reply_authors_avg": [ 1379.75, 520.9128405981177 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 1.8708286933869707 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14231408030950146569&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=78iGZdqxYY", "pdf": "https://openreview.net/pdf?id=78iGZdqxYY", "email": "iitd.ac.in;iitd.ac.in;iitd.ac.in;iitd.ac.in", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Delhi", "aff_unique_dep": "", "aff_unique_url": "https://www.iitdelhi.ac.in", "aff_unique_abbr": "IIT Delhi", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "title": "A Characterization Theorem for Equivariant Networks with Point-wise Activations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19372", "id": "79FVDdfoSR", "author_site": "Marco Pacini, Xiaowen Dong, Bruno Lepri, Gabriele Santin", "tldr": "", "abstract": "Equivariant neural networks have shown improved performance, expressiveness and sample complexity on symmetrical domains. \nBut for some specific symmetries, representations, and choice of coordinates, the most common point-wise activations, such as ReLU, are not equivariant, hence they cannot be employed in the design of equivariant neural networks. \nThe theorem we present in this paper describes all possibile combinations of representations, choice of coordinates and point-wise activations to obtain an equivariant layer, generalizing and strengthening existing characterizations.\nNotable cases of practical relevance are discussed as corollaries. Indeed, we prove that rotation-equivariant networks can only be invariant, as it happens for any network which is equivariant with respect to connected compact groups. Then, we discuss implications of our findings when applied to important instances of equivariant networks. First, we completely characterize permutation equivariant networks such as Invariant Graph Networks with point-wise nonlinearities and their geometric counterparts, highlighting a plethora of models whose expressive power and performance are still unknown. \nSecond, we show that feature spaces of disentangled steerable convolutional neural networks are trivial representations.", "keywords": "Geometric Deep Learning;Equivariant Neural Networks;Characterization Theorem;Point-wise Activations", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Marco Pacini;Xiaowen Dong;Bruno Lepri;Gabriele Santin", "authorids": "~Marco_Pacini1;~Xiaowen_Dong1;~Bruno_Lepri1;gabriele.santin@unive.it", "gender": "M;;M;", "homepage": "https://marco-pacini.github.io/;https://web.media.mit.edu/~xdong/;;", "dblp": "213/0755;91/9827-1;99/6489;", "google_scholar": "https://scholar.google.com/citations?hl=en;_8tUq8kAAAAJ;JfcopG0AAAAJ;", "orcid": ";;0000-0003-1275-2333;", "linkedin": ";;brunolepri/?originalSubdomain=it;", "or_profile": "~Marco_Pacini1;~Xiaowen_Dong1;~Bruno_Lepri1;gabriele.santin@unive.it", "aff": "Fondazione Bruno Kessler;Massachusetts Institute of Technology;Fondazione Bruno Kessler;", "aff_domain": "fbk.eu;mit.edu;fbk.eu;", "position": "PhD student;Research Affiliate;Principal Researcher;", "bibtex": "@inproceedings{\npacini2024a,\ntitle={A Characterization Theorem for Equivariant Networks with Point-wise Activations},\nauthor={Marco Pacini and Xiaowen Dong and Bruno Lepri and Gabriele Santin},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=79FVDdfoSR}\n}", "github": "", "project": "", "reviewers": "3Kou;uXzC;merJ;sg92", "pdf_size": 342950, "rating": "6;6;8;8", "confidence": "3;2;3;3", "soundness": "3;3;3;4", "contribution": "2;2;4;3", "presentation": "2;1;3;2", "wc_summary": "57;68;85;59", "wc_strengths": "54;97;123;68", "wc_weaknesses": "296;135;135;594", "wc_questions": "45;98;73;233", "wc_review": "452;398;416;954", "wc_reply_reviewers": "16;27;17;250", "wc_reply_authors": "738;536;517;1417", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.25, 11.053845484717073 ], "wc_strengths_avg": [ 85.5, 26.63174797117155 ], "wc_weaknesses_avg": [ 290.0, 187.41798206148738 ], "wc_questions_avg": [ 112.25, 72.19201825686825 ], "wc_review_avg": [ 555.0, 231.18174668429165 ], "wc_reply_reviewers_avg": [ 77.5, 99.68575625434157 ], "wc_reply_authors_avg": [ 802.0, 365.47982160442183 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10859373733821825022&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "openreview": "https://openreview.net/forum?id=79FVDdfoSR", "pdf": "https://openreview.net/pdf?id=79FVDdfoSR", "email": "fbk.eu;mit.edu;fbk.eu;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Fondazione Bruno Kessler;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.fbk.eu;https://web.mit.edu", "aff_unique_abbr": "FBK;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Italy;United States" }, { "title": "Designing Skill-Compatible AI: Methodologies and Frameworks in Chess", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19371", "id": "79rfgv3jw4", "author_site": "KARIM HAMADE, Reid McIlroy-Young, Siddhartha Sen, Jon Kleinberg, Ashton Anderson", "tldr": "", "abstract": "Powerful artificial intelligence systems are often used in settings where they must interact with agents that are computationally much weaker, for example when they work alongside humans or operate in complex environments where some tasks are handled by algorithms, heuristics, or other entities of varying computational power. For AI agents to successfully interact in these settings, however, achieving superhuman performance alone is not sufficient; they also need to account for suboptimal actions or idiosyncratic style from their less-skilled counterparts. We propose a formal evaluation framework for assessing the compatibility of near-optimal AI with interaction partners who may have much lower levels of skill; we use popular collaborative chess variants as model systems to study and develop AI agents that can successfully interact with lower-skill entities. Traditional chess engines designed to output near-optimal moves prove to be inadequate partners when paired with engines of various lower skill levels in this domain, as they are not designed to consider the presence of other agents. We contribute three methodologies to explicitly create skill-compatible AI agents in complex decision-making settings, and two chess game frameworks designed to foster collaboration between powerful AI agents and less-skilled partners. On these frameworks, our agents outperform state-of-the-art chess AI (based on AlphaZero) despite being weaker in conventional chess, demonstrating that skill-compatibility is a tangible trait that is qualitatively and measurably distinct from raw performance. Our evaluations further explore and clarify the mechanisms by which our agents achieve skill-compatibility.", "keywords": "Skill-AI compatibility;Agent Systems;Decision-making;Chess;Deep RL", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/d067ff4cd68eb84dc82fae7b1d69cea1c70699ea.zip", "author": "Karim Hamade;Reid McIlroy-Young;Siddhartha Sen;Jon Kleinberg;Ashton Anderson", "authorids": "~Karim_Hamade1;~Reid_McIlroy-Young1;~Siddhartha_Sen1;~Jon_Kleinberg1;~Ashton_Anderson1", "gender": "M;M;;;M", "homepage": ";https://reidmcy.com/;http://sidsen.org;http://www.cs.toronto.edu/~ashton/;http://www.cs.cornell.edu/home/kleinber/", "dblp": ";196/4704;;21/8524;https://dblp.uni-trier.de/pid/k/JonMKleinberg.html", "google_scholar": ";https://scholar.google.ca/citations?user=7Tclf3kAAAAJ;;https://scholar.google.co.uk/citations?user=FMSltawAAAAJ;VX7d5EQAAAAJ", "orcid": ";0000-0001-9104-4145;;;0000-0002-1929-2512", "linkedin": "karim-h-400143200/;;;;", "or_profile": "~Karim_Hamade1;~Reid_McIlroy-Young1;~Siddhartha_Sen1;~Ashton_Anderson1;~Jon_Kleinberg3", "aff": "Department of Computer Science, University of Toronto;Harvard University;Microsoft Research;Department of Computer Science, University of Toronto;", "aff_domain": "cs.toronto.edu;harvard.edu;research.microsoft.com;cs.toronto.edu;", "position": "MS student;Postdoc;Principal Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nhamade2024designing,\ntitle={Designing Skill-Compatible {AI}: Methodologies and Frameworks in Chess},\nauthor={Karim Hamade and Reid McIlroy-Young and Siddhartha Sen and Jon Kleinberg and Ashton Anderson},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=79rfgv3jw4}\n}", "github": "", "project": "", "reviewers": "9rEp;SKaW;XZq8;D1Jx", "pdf_size": 966147, "rating": "5;6;8;8", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "contribution": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "104;73;253;139", "wc_strengths": "79;74;217;84", "wc_weaknesses": "82;282;366;65", "wc_questions": "95;16;704;92", "wc_review": "360;445;1540;380", "wc_reply_reviewers": "39;47;379;52", "wc_reply_authors": "825;420;1521;401", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 142.25, 68.0711943482704 ], "wc_strengths_avg": [ 113.5, 59.860253925288355 ], "wc_weaknesses_avg": [ 198.75, 128.8630571575888 ], "wc_questions_avg": [ 226.75, 277.35300160625627 ], "wc_review_avg": [ 681.25, 496.7944117036745 ], "wc_reply_reviewers_avg": [ 129.25, 144.2677632043971 ], "wc_reply_authors_avg": [ 791.75, 453.8156977232057 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18109915714699930021&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=79rfgv3jw4", "pdf": "https://openreview.net/pdf?id=79rfgv3jw4", "email": "cs.toronto.edu;harvard.edu;research.microsoft.com;cs.toronto.edu;", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Toronto;Harvard University;Microsoft", "aff_unique_dep": "Department of Computer Science;;Microsoft Research", "aff_unique_url": "https://www.utoronto.ca;https://www.harvard.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "U of T;Harvard;MSR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Toronto;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Canada;United States" }, { "id": "79tJB1eTmb", "title": "Meta-CoT: Generalizable Chain-of-Thought Prompting in Mixed-task Scenarios with Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) have unveiled remarkable reasoning capabilities by exploiting chain-of-thought (CoT) prompting, which generates intermediate reasoning chains to serve as the rationale for deriving the answer. However, current CoT methods either simply employ general prompts such as Let\u2019s think step by step, or heavily rely on handcrafted task-specific demonstrations to attain preferable performances, thereby engendering an inescapable gap between performance and generalization. To bridge this gap, we propose Meta-CoT, a generalizable CoT prompting method in mixed-task scenarios where the type of input questions is unknown. Meta-CoT firstly categorizes the scenario based on the input question and subsequently constructs diverse demonstrations from the corresponding data pool in an automatic pattern. Meta-CoT simultaneously enjoys remarkable performances on ten public benchmark reasoning tasks and superior generalization capabilities. Notably, Meta-CoT achieves the state-of-the-art result on SVAMP (93.7%) without any additional program-aided methods. Our further experiments on five out-of-distribution datasets verify the stability and generality of Meta-CoT.", "keywords": "Chain of Thought Prompting;Large Language Models;In-context Learning;Few-shot Learning;Arithmetic Reasoning;Commonsense Reasoning;Symbolic Reasoning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Anni Zou;Zhuosheng Zhang;hai zhao;Xiangru Tang", "authorids": "~Anni_Zou1;~Zhuosheng_Zhang1;~hai_zhao1;~Xiangru_Tang2", "gender": "F;M;M;M", "homepage": ";https://bcmi.sjtu.edu.cn/~zhangzs/;http://bcmi.sjtu.edu.cn/~zhaohai/;https://xiangrutang.github.io/", "dblp": ";06/9708;25/1145-1.html;246/8064", "google_scholar": ";https://scholar.google.co.jp/citations?user=63LTQhgAAAAJ;https://scholar.google.com.tw/citations?user=4dU5KS0AAAAJ;", "orcid": "0000-0001-6378-6475;0000-0002-4183-3645;;", "linkedin": ";;;", "or_profile": "~Anni_Zou1;~Zhuosheng_Zhang1;~hai_zhao1;~Xiangru_Tang2", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Yale University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;yale.edu", "position": "MS student;Assistant Professor;Full Professor;PhD student", "bibtex": "@misc{\nzou2024metacot,\ntitle={Meta-CoT: Generalizable Chain-of-Thought Prompting in Mixed-task Scenarios with Large Language Models},\nauthor={Anni Zou and Zhuosheng Zhang and hai zhao and Xiangru Tang},\nyear={2024},\nurl={https://openreview.net/forum?id=79tJB1eTmb}\n}", "github": "", "project": "", "reviewers": "JvEc;FQzo;uTXx", "site": "https://openreview.net/forum?id=79tJB1eTmb", "pdf_size": 1538016, "rating": "3;3;3", "confidence": "5;4;4", "soundness": "2;1;2", "contribution": "1;2;2", "presentation": "3;1;2", "wc_summary": "101;76;115", "wc_strengths": "16;20;15", "wc_weaknesses": "56;109;192", "wc_questions": "81;57;63", "wc_review": "254;262;385", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 97.33333333333333, 16.131404843417148 ], "wc_strengths_avg": [ 17.0, 2.160246899469287 ], "wc_weaknesses_avg": [ 119.0, 55.97023018236272 ], "wc_questions_avg": [ 67.0, 10.198039027185569 ], "wc_review_avg": [ 300.3333333333333, 59.95739227892495 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6122894346217833235&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.yale.edu", "aff_unique_abbr": "SJTU;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "7AB077M4TY", "title": "Dynamic Training Guided by Training Dynamics", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper centers around a novel concept proposed recently by researchers from the control community where the training process of a deep neural network can be considered a nonlinear dynamical system acting upon the high-dimensional weight space. Koopman operator theory, a data-driven dynamical system analysis framework, can then be deployed to discover the otherwise non-intuitive training dynamics. Different from existing approaches that mainly take advantage of the prediction capability of this framework, we take a deep dive into understanding the underlying relationship between the low-dimensional Koopman modes that describe the training dynamics and the weight evolution itself, and develop two novel strategies for speeding up model convergence in an online fashion, including 1) a gradient acceleration strategy that improves training efficiency by pushing the slowly decaying Koopman modes to decay faster, and 2) a masking strategy that drastically reduces the computational complexity of gradient acceleration by analyzing the contribution of the corresponding Koopman modes in weight reconstruction. These strategies offer promising insights into pursuing faster and more efficient training methodologies and improve our understanding of training dynamics to further control and inform the training process.", "keywords": "training dynamics;Deep Neural Networks;Koopman Operator", "primary_area": "optimization", "supplementary_material": "", "author": "Fanqi Wang;Landon Harris;Weisheng Tang;Hairong Qi;Dan Wilson;Igor Mezic", "authorids": "~Fanqi_Wang1;~Landon_Harris1;~Weisheng_Tang1;~Hairong_Qi1;~Dan_Wilson1;~Igor_Mezic1", "gender": "F;;M;F;M;M", "homepage": ";;;http://www.eecs.utk.edu/people/faculty/hqi/;http://volweb.utk.edu/~dwilso81;https://mgroup.me.ucsb.edu/", "dblp": ";;;00/6984-1.html;;", "google_scholar": ";;CKxchGcAAAAJ;https://scholar.google.com.tw/citations?user=GqnNG-kAAAAJ;;5d9ngqsAAAAJ", "orcid": ";;0000-0001-7307-7410;;;", "linkedin": ";;;hairong-qi-6a67602/;;", "or_profile": "~Fanqi_Wang1;~Landon_Harris1;~Weisheng_Tang1;~Hairong_Qi1;~Dan_Wilson1;~Igor_Mezic1", "aff": "University of Tennessee, Knoxville;;University of Tennessee, Knoxville;University of Tennessee, Knoxville;University of Tennessee, Knoxville;University of California, Santa Barbara", "aff_domain": "utk.edu;;utk.edu;vols.utk.edu;utk.edu;ucsb.edu", "position": "PhD student;;Postdoc;Full Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nwang2024dynamic,\ntitle={Dynamic Training Guided by Training Dynamics},\nauthor={Fanqi Wang and Landon Harris and Weisheng Tang and Hairong Qi and Dan Wilson and Igor Mezic},\nyear={2024},\nurl={https://openreview.net/forum?id=7AB077M4TY}\n}", "github": "", "project": "", "reviewers": "ptUk;xUqE;aC1Y;APZy", "site": "https://openreview.net/forum?id=7AB077M4TY", "pdf_size": 7102041, "rating": "3;3;3;5", "confidence": "3;5;3;2", "soundness": "3;2;1;3", "contribution": "1;2;2;3", "presentation": "1;2;2;3", "wc_summary": "577;94;112;77", "wc_strengths": "2;160;25;9", "wc_weaknesses": "2;526;156;145", "wc_questions": "2;13;239;37", "wc_review": "583;793;532;268", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1195;645;1203;604", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 215.0, 209.36690282850344 ], "wc_strengths_avg": [ 49.0, 64.62584622269948 ], "wc_weaknesses_avg": [ 207.25, 193.798058555807 ], "wc_questions_avg": [ 72.75, 96.81522349300238 ], "wc_review_avg": [ 544.0, 186.97727134601146 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 911.75, 287.6294273887844 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Tz7f2UO3ynEJ:scholar.google.com/&scioq=Dynamic+Training+Guided+by+Training+Dynamics&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Tennessee;University of California, Santa Barbara", "aff_unique_dep": ";", "aff_unique_url": "https://www.utk.edu;https://www.ucsb.edu", "aff_unique_abbr": "UT;UCSB", "aff_campus_unique_index": "0;0;0;0;1", "aff_campus_unique": "Knoxville;Santa Barbara", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7AS7vaVU8d", "title": "Learning Personalized Story Evaluation", "track": "main", "status": "Reject", "tldr": "", "abstract": "While large language models (LLMs) have shown impressive results for more objective tasks such as QA and retrieval, it remains nontrivial to evaluate their performance on open-ended text generation for reasons including (1) data contamination; (2) multi-dimensional evaluation criteria; and (3) subjectiveness stemming from reviewers\u2019 personal preferences. To address such issues, we propose to model personalization in an uncontaminated open-ended generation assessment. We create two new datasets Per-MPST and Per-DOC for personalized story evaluation, by re-purposing existing datasets with proper anonymization and new personalized labels. We further develop a personalized story evaluation model PERSE to infer reviewer preferences and provide a personalized evaluation. Specifically, given a few exemplary reviews from a particular reviewer, PERSE predicts either a detailed review or fine-grained comparison in several aspects (such as interestingness and surprise) for that reviewer on a new text input. Experimental results show that PERSE outperforms GPT-4 by 15.8% on Kendall correlation of story ratings, and by 13.7% on pairwise preference prediction accuracy. Both datasets and code will be released.", "keywords": "Open-ended Text Evaluation;Personalization;Automatic Story Evaluation", "primary_area": "generative models", "supplementary_material": "/attachment/4f28ad392c850a96d2f67fdb3dc50bf94641d766.zip", "author": "Danqing Wang;Kevin Yang;Hanlin Zhu;Xiaomeng Yang;Andrew Cohen;Lei Li;Yuandong Tian", "authorids": "~Danqing_Wang1;~Kevin_Yang2;~Hanlin_Zhu2;~Xiaomeng_Yang1;~Andrew_Cohen4;~Lei_Li11;~Yuandong_Tian1", "gender": "F;M;M;M;M;M;M", "homepage": ";https://hanlinzhu.com/;;;https://www.cs.cmu.edu/~leili;http://yuandong-tian.com;https://people.eecs.berkeley.edu/~yangk/", "dblp": "226/6524.html;;;;13/7007-5.html;t/YuandongTian;13/10565", "google_scholar": "https://scholar.google.com/citations?hl=en-US;yDVn5LEAAAAJ;t8v3JXsAAAAJ;;BYXqAlwAAAAJ;0mgEF28AAAAJ;sRpY9TIAAAAJ", "orcid": ";;0009-0007-3917-6811;;0000-0003-3095-9776;0000-0003-4202-4847;", "linkedin": ";;xiaomeng-yang-356a976b;andrew-cohen-17a7aa15b;;yuandongtian;", "or_profile": "~Danqing_Wang1;~Hanlin_Zhu2;~Xiaomeng_Yang1;~Andrew_Cohen4;~Lei_Li11;~Yuandong_Tian1;~Kevin_Yang1", "aff": "Carnegie Mellon University;Electrical Engineering & Computer Science Department, University of California Berkeley;Moonshot AI;Meta Platforms;School of Computer Science, Carnegie Mellon University;Meta AI (FAIR);Scaled Cognition", "aff_domain": "andrew.cmu.edu;eecs.berkeley.edu;msh.team;meta.com;cs.cmu.edu;meta.com;scaledcognition.com", "position": "PhD student;PhD student;Member of Technical Staff;Researcher;Assistant Professor;Research Scientist;Researcher", "bibtex": "@misc{\nwang2024learning,\ntitle={Learning Personalized Story Evaluation},\nauthor={Danqing Wang and Kevin Yang and Hanlin Zhu and Xiaomeng Yang and Andrew Cohen and Lei Li and Yuandong Tian},\nyear={2024},\nurl={https://openreview.net/forum?id=7AS7vaVU8d}\n}", "github": "", "project": "", "reviewers": "hjpK;7x1E;jhUM;9dNi", "site": "https://openreview.net/forum?id=7AS7vaVU8d", "pdf_size": 1974189, "rating": "5;5;5;8", "confidence": "4;3;4;5", "soundness": "2;2;2;3", "contribution": "2;2;3;4", "presentation": "1;1;3;3", "wc_summary": "47;55;127;117", "wc_strengths": "12;50;86;41", "wc_weaknesses": "61;230;47;63", "wc_questions": "2;171;22;397", "wc_review": "122;506;282;618", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "287;451;261;1219", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 86.5, 35.787567673704785 ], "wc_strengths_avg": [ 47.25, 26.413774815425377 ], "wc_weaknesses_avg": [ 100.25, 75.16440314404153 ], "wc_questions_avg": [ 148.0, 157.89395175243413 ], "wc_review_avg": [ 382.0, 192.79004123657424 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 554.5, 390.5032010112081 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15846584602882506993&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;0;3;4", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley;Moonshot AI;Meta;Scaled Cognition", "aff_unique_dep": ";Electrical Engineering & Computer Science Department;;Meta Platforms, Inc.;", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu;https://moonshot.ai;https://www.meta.com;", "aff_unique_abbr": "CMU;UC Berkeley;Moonshot AI;Meta;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7AiPfnM73h", "title": "Projected Off-Policy Q-Learning (POP-QL) for Stabilizing Offline Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "A key problem in off-policy Reinforcement Learning (RL) is the mismatch, or distribution shift, between the dataset and the distribution over states and actions visited by the learned policy. This problem is exacerbated in the fully offline setting. The main approach to correct this shift has been through importance sampling, which leads to high-variance gradients. Other approaches, such as conservatism or behavior-regularization, regularize the policy at the cost of performance. In this paper, we propose a new approach for stable off-policy Q-Learning that builds on a theoretical result by Kolter (2011). Our method, Projected Off-Policy Q-Learning (POP-QL), is a novel actor-critic algorithm that simultaneously reweights off-policy samples and constrains the policy to prevent divergence and reduce value-approximation error. In our experiments, POP-QL not only shows competitive performance on standard benchmarks, but also out-performs competing methods in tasks where the data-collection policy is significantly sub-optimal.", "keywords": "TD learning;Off-policy RL;offline RL", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/2851abdae8899cd5ecd6e5e79858f1ab4a01d3ca.zip", "author": "Melrose Roderick;Gaurav Manek;Felix Berkenkamp;J Zico Kolter", "authorids": "~Melrose_Roderick1;~Gaurav_Manek1;~Felix_Berkenkamp1;~J_Zico_Kolter1", "gender": "M;;M;M", "homepage": "https://melroderick.github.io/;https://www.gauravmanek.com/;https://berkenkamp.me;http://www.zicokolter.com", "dblp": "181/3909;200/8866;168/8558;67/2526", "google_scholar": "PYrd2GMAAAAJ;C8Mdr2UAAAAJ;https://scholar.google.ch/citations?user=N_tCEl8AAAAJ;UXh1I6UAAAAJ", "orcid": ";;;", "linkedin": "https://linkedin.com/in/melrose-roderick-4b74b199;https://sg.linkedin.com/in/gauravmanek;berkenkamp/;", "or_profile": "~Melrose_Roderick1;~Gaurav_Manek1;~Felix_Berkenkamp1;~Zico_Kolter1", "aff": "Mila, University of Montreal;Carnegie Mellon University;Bosch;Carnegie Mellon University", "aff_domain": "mila.umontreal.ca;cmu.edu;bosch.com;cmu.edu", "position": "Postdoc;PhD student;Research Scientist;Full Professor", "bibtex": "@misc{\nroderick2024projected,\ntitle={Projected Off-Policy Q-Learning ({POP}-{QL}) for Stabilizing Offline Reinforcement Learning},\nauthor={Melrose Roderick and Gaurav Manek and Felix Berkenkamp and J Zico Kolter},\nyear={2024},\nurl={https://openreview.net/forum?id=7AiPfnM73h}\n}", "github": "", "project": "", "reviewers": "Mmg3;5Wu7;JhFx;Urdi", "site": "https://openreview.net/forum?id=7AiPfnM73h", "pdf_size": 1100123, "rating": "3;3;5;5", "confidence": "4;4;3;3", "soundness": "2;2;3;2", "contribution": "2;2;2;2", "presentation": "2;3;3;3", "wc_summary": "74;96;139;96", "wc_strengths": "25;36;38;38", "wc_weaknesses": "87;409;143;136", "wc_questions": "239;1;126;46", "wc_review": "425;542;446;316", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "413;200;268;70", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.25, 23.573024837725004 ], "wc_strengths_avg": [ 34.25, 5.402545696243577 ], "wc_weaknesses_avg": [ 193.75, 126.13360971604673 ], "wc_questions_avg": [ 103.0, 90.38528641322104 ], "wc_review_avg": [ 432.25, 80.31305933657364 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 237.75, 123.68584195452607 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IiVP5kxbhnUJ:scholar.google.com/&scioq=Projected+Off-Policy+Q-Learning+(POP-QL)+for+Stabilizing+Offline+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Montreal;Carnegie Mellon University;Robert Bosch GmbH", "aff_unique_dep": "Mila;;", "aff_unique_url": "https://www.mila.quebec;https://www.cmu.edu;https://www.bosch.com", "aff_unique_abbr": "Mila;CMU;Bosch", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Canada;United States;Germany" }, { "id": "7ArYyAmDGQ", "title": "Prediction Risk and Estimation Risk of the Ridgeless Least Squares Estimator under General Assumptions on Regression Errors", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, there has been a significant growth in research focusing on minimum $\\ell_2$ norm (ridgeless) interpolation least squares estimators. However, the majority of these analyses have been limited to a simple regression error structure, assuming independent and identically distributed errors with zero mean and common variance. In this paper, we explore prediction risk as well as estimation risk under more general regression error assumptions, highlighting the benefits of overparameterization in a \\emph{finite} sample. We find that including a large number of \\emph{unimportant} parameters relative to the sample size can effectively reduce both risks. Notably, we establish that the estimation difficulties associated with the variance components of both risks \ncan be summarized through the trace of the variance-covariance matrix of the regression errors.", "keywords": "prediction risk;estimation risk;generalization;statistical learning;overparameterization;interpolation;ridgeless regression;benign overfitting;double descent;nonspherical errors", "primary_area": "learning theory", "supplementary_material": "/attachment/97aff42c3175e3ee375dc9ad0230bf08fb7b77ae.zip", "author": "Sungyoon Lee;Sokbae Lee", "authorids": "~Sungyoon_Lee1;~Sokbae_Lee1", "gender": "M;M", "homepage": "https://sites.google.com/view/sungyoon-lee/home;https://sites.google.com/site/sokbae/", "dblp": ";270/3314", "google_scholar": "https://scholar.google.co.kr/citations?user=PAoFkGEAAAAJ;nlNC3hQAAAAJ", "orcid": ";0000-0003-4080-7733", "linkedin": ";", "or_profile": "~Sungyoon_Lee1;~Sokbae_Lee1", "aff": "Hanyang University;Columbia University", "aff_domain": "hanyang.ac.kr;columbia.edu", "position": "Assistant Professor;Professor", "bibtex": "@misc{\nlee2024prediction,\ntitle={Prediction Risk and Estimation Risk of the Ridgeless Least Squares Estimator under General Assumptions on Regression Errors},\nauthor={Sungyoon Lee and Sokbae Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=7ArYyAmDGQ}\n}", "github": "", "project": "", "reviewers": "zjX2;jNnY;xuqE", "site": "https://openreview.net/forum?id=7ArYyAmDGQ", "pdf_size": 387900, "rating": "5;5;6", "confidence": "3;3;3", "soundness": "3;3;3", "contribution": "2;3;2", "presentation": "3;2;3", "wc_summary": "80;162;101", "wc_strengths": "34;7;51", "wc_weaknesses": "121;36;148", "wc_questions": "164;2;2", "wc_review": "399;207;302", "wc_reply_reviewers": "27;0;153", "wc_reply_authors": "448;361;626", "reply_reviewers": "1;0;2", "reply_authors": "1;1;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 114.33333333333333, 34.77866523539332 ], "wc_strengths_avg": [ 30.666666666666668, 18.116904322268255 ], "wc_weaknesses_avg": [ 101.66666666666667, 47.72374205314956 ], "wc_questions_avg": [ 56.0, 76.36753236814714 ], "wc_review_avg": [ 302.6666666666667, 78.38508928503062 ], "wc_reply_reviewers_avg": [ 60.0, 66.6783323126786 ], "wc_reply_authors_avg": [ 478.3333333333333, 110.2915328673159 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DoXQ8vjLUiAJ:scholar.google.com/&scioq=Prediction+Risk+and+Estimation+Risk+of+the+Ridgeless+Least+Squares+Estimator+under+General+Assumptions+on+Regression+Errors&hl=en&as_sdt=0,5", "gs_version_total": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Hanyang University;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hanyang.ac.kr;https://www.columbia.edu", "aff_unique_abbr": "HYU;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "South Korea;United States" }, { "id": "7B5Korw050", "title": "Lung Nodule Segmentation Network with Self-Supervised Learning and Attention Mechanisms", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Pulmonary nodule detection is one of the most important tasks for early lung cancer diagnosis. Especially, end-to-end methods for multi-tasking, including pulmonary nodule detection, false positive detection, and segmentation have been widely used based on supervised learning, leading to significant improvement in performance when detecting pulmonary nodules. However, those methods with confined environments were not able to exploit the representative features comprehensively. Therefore, some self-supervised methods have been proposed to handle the raw dataset. However, they were merely applied to each task, missing rich features of the end-to-end framework. In this paper, we propose a novel adaptation of self-supervised learning to a multi-tasking framework. Additionally, we employed other attention methods, such as Convolutional Block Attention Module(CBAM), and Quartet Attention Mechanism(QAM) to further enhance the performance without significantly in- creasing the number of parameters to learn.", "keywords": "Pulmonary Nodule Detection and Segmentation;3D Segmentation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/9beadd02dd7401a3d837cc988ab2988cfe93c3b7.zip", "author": "Eunah Jung;Seungmin Chou;Changwon Lim", "authorids": "~Eunah_Jung1;~Seungmin_Chou1;clim@cau.ac.kr", "gender": "F;M;", "homepage": "https://github.com/JUNGEUNAH;;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";seugnmin-chu-000685216/;", "or_profile": "~Eunah_Jung1;~Seungmin_Chou1;clim@cau.ac.kr", "aff": "Chung-Ang University;Chung-Ang University;", "aff_domain": "cau.ac.kr;cau.ac.kr;", "position": "MS student;Undergrad student;", "bibtex": "@misc{\njung2024lung,\ntitle={Lung Nodule Segmentation Network with Self-Supervised Learning and Attention Mechanisms},\nauthor={Eunah Jung and Seungmin Chou and Changwon Lim},\nyear={2024},\nurl={https://openreview.net/forum?id=7B5Korw050}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=7B5Korw050", "pdf_size": 319035, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7fWtsvgK5YEJ:scholar.google.com/&scioq=Lung+Nodule+Segmentation+Network+with+Self-Supervised+Learning+and+Attention+Mechanisms&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Chung-Ang University", "aff_unique_dep": "", "aff_unique_url": "http://www.cau.ac.kr", "aff_unique_abbr": "CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "7CLvyZ6Xn7", "title": "Cross-domain Adaptation for Few-shot 3D Shape Generation", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "Realistic and diverse 3D shape generation is helpful for a wide variety of applications such as virtual reality, gaming, and animation. Modern generative models learn from large-scale datasets and generate new samples following similar distributions. However, when training data is limited, deep neural generative networks overfit and tend to replicate training samples. Prior works focus on few-shot image generation to produce high-quality and diverse results using a few target images. Unfortunately, abundant 3D shape data is typically hard to obtain as well. In this work, we make the first attempt to realize few-shot 3D shape generation by adapting generative models pre-trained on large source domains to target domains. To relieve overfitting and keep considerable diversity, we propose to maintain the probability distributions of the pairwise relative distances between adapted samples at feature-level and shape-level during domain adaptation. Our approach only needs the silhouettes of few-shot target samples as training data to learn target geometry distributions and achieve generated shapes with diverse topology and textures. Moreover, we introduce several metrics to evaluate generation quality and diversity. The effectiveness of our approach is demonstrated qualitatively and quantitatively under a series of few-shot 3D shape adaptation setups.", "keywords": "Few-shot;3D shape generation;Domain adaptation", "primary_area": "generative models", "supplementary_material": "/attachment/508ef1649b323ebae12785e156b6cce1e9d8d954.zip", "author": "JingYuan Zhu;Huimin Ma;Jiansheng Chen;Jian Yuan", "authorids": "~JingYuan_Zhu1;~Huimin_Ma1;~Jiansheng_Chen3;~Jian_Yuan1", "gender": "M;F;M;", "homepage": ";http://server.3dimagelab.cn:5000;http://scce.ustb.edu.cn/shiziduiwu/jiaoshixinxi/2021-11-15/210.html;http://bdktzweb.tsinghua.edu.cn/yuanjian/zh_CN/index.htm", "dblp": "302/9530;69/7694-1;;64/4192", "google_scholar": "a3ErJwkAAAAJ;32hwVLEAAAAJ;A1gA9XIAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~JingYuan_Zhu1;~Huimin_Ma1;~Jiansheng_Chen3;~Jian_Yuan1", "aff": "Electronic Engineering, Tsinghua University, Tsinghua University;University of Science and Technology Beijing;University of Science and Technology Beijing;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;ustb.edu.cn;ustb.edu.cn;tsinghua.edu.cn", "position": "PhD student;Full Professor;Full Professor;Professor", "bibtex": "@misc{\nzhu2024crossdomain,\ntitle={Cross-domain Adaptation for Few-shot 3D Shape Generation},\nauthor={JingYuan Zhu and Huimin Ma and Jiansheng Chen and Jian Yuan},\nyear={2024},\nurl={https://openreview.net/forum?id=7CLvyZ6Xn7}\n}", "github": "", "project": "", "reviewers": "4te8;KzHF;DPcy;xeiS", "site": "https://openreview.net/forum?id=7CLvyZ6Xn7", "pdf_size": 48874117, "rating": "3;5;6;6", "confidence": "5;3;3;4", "soundness": "2;3;2;3", "contribution": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "50;100;86;92", "wc_strengths": "11;30;119;85", "wc_weaknesses": "61;298;223;88", "wc_questions": "16;2;69;62", "wc_review": "138;430;497;327", "wc_reply_reviewers": "0;264;185;0", "wc_reply_authors": "685;1480;872;345", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;3;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.0, 19.131126469708992 ], "wc_strengths_avg": [ 61.25, 43.01380592321493 ], "wc_weaknesses_avg": [ 167.5, 97.1763860204731 ], "wc_questions_avg": [ 37.25, 28.78693279944913 ], "wc_review_avg": [ 348.0, 135.52306076826926 ], "wc_reply_reviewers_avg": [ 112.25, 115.67276040624257 ], "wc_reply_authors_avg": [ 845.5, 412.1750235033656 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7385489458759963, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PCzSa-ZAer8J:scholar.google.com/&scioq=Cross-domain+Adaptation+for+Few-shot+3D+Shape+Generation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Tsinghua University;University of Science and Technology Beijing", "aff_unique_dep": "Electronic Engineering;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ustb.edu.cn", "aff_unique_abbr": "THU;USTB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Elastic Feature Consolidation For Cold Start Exemplar-Free Incremental Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19370", "id": "7D9X2cFnt1", "author_site": "Simone Magistri, Tomaso Trinci, Albin Soutif--Cormerais, Joost van de Weijer, Andrew Bagdanov", "tldr": "", "abstract": "Exemplar-Free Class Incremental Learning (EFCIL) aims to learn from a sequence of tasks without having access to previous task data. In this paper, we consider the challenging Cold Start scenario in which insufficient data is available in the first task to learn a high-quality backbone. This is especially challenging for EFCIL since it requires high plasticity, which results in feature drift which is difficult to compensate for in the exemplar-free setting. To address this problem, we propose a simple and effective approach that consolidates feature representations by regularizing drift in directions highly relevant to previous tasks and employs prototypes to reduce task-recency bias. Our method, called Elastic Feature Consolidation (EFC), exploits a tractable second-order approximation of feature drift based on an Empirical Feature Matrix (EFM). The EFM induces a pseudo-metric in feature space which we use to regularize feature drift in important directions and to update Gaussian prototypes used in a novel asymmetric cross entropy loss which effectively balances prototype rehearsal with data from new tasks. Experimental results on CIFAR-100, Tiny-ImageNet, ImageNet-Subset and ImageNet-1K demonstrate that Elastic Feature Consolidation is better able to learn new tasks by maintaining model plasticity and significantly outperform the state-of-the-art.", "keywords": "Computer vision;continual learning;class-incremental learning;exemplar free;lifelong learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/75279702f6513c9c1950c5b24c1637c0100a3626.zip", "author": "Simone Magistri;Tomaso Trinci;Albin Soutif;Joost van de Weijer;Andrew D. Bagdanov", "authorids": "~Simone_Magistri1;~Tomaso_Trinci1;~Albin_Soutif1;~Joost_van_de_Weijer5;~Andrew_D._Bagdanov2", "gender": "M;M;M;M;M", "homepage": "https://webgol.dinfo.unifi.it/simone-magistri/;https://webgol.dinfo.unifi.it/tomaso-trinci/;;http://www.micc.unifi.it/bagdanov;http://lamp.cvc.uab.es/", "dblp": "283/0920;364/8212;295/9611;64/3935;67/3379", "google_scholar": "fAS993EAAAAJ;kfN-d6IAAAAJ;BtPOFGoAAAAJ;_Fk4YUcAAAAJ;https://scholar.google.es/citations?user=Gsw2iUEAAAAJ", "orcid": "0000-0002-0520-8463;0000-0002-4052-1930;;;0000-0002-9656-9706", "linkedin": ";;albin-soutif-b2252210b/;;", "or_profile": "~Simone_Magistri1;~Tomaso_Trinci1;~Albin_Soutif1;~Andrew_D._Bagdanov2;~Joost_van_de_Weijer1", "aff": "University of Florence;University of Florence;Computer Vision Center, Universitat Aut\u00f2noma de Barcelona;Universit\u00e0 degli Studi di Firenze;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona", "aff_domain": "unifi.it;unifi.it;cvc.uab.es;unifi.it;cvc.uab.es", "position": "PhD student;PhD student;PhD student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nmagistri2024elastic,\ntitle={Elastic Feature Consolidation For Cold Start Exemplar-Free Incremental Learning},\nauthor={Simone Magistri and Tomaso Trinci and Albin Soutif and Joost van de Weijer and Andrew D. Bagdanov},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7D9X2cFnt1}\n}", "github": "", "project": "", "reviewers": "RQMN;4Tis;Hr2j;sY5k", "pdf_size": 1200954, "rating": "6;6;8;8", "confidence": "5;5;4;5", "soundness": "3;3;3;4", "contribution": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "198;133;56;197", "wc_strengths": "82;106;75;159", "wc_weaknesses": "462;99;6;339", "wc_questions": "9;9;3;65", "wc_review": "751;347;140;760", "wc_reply_reviewers": "38;0;35;88", "wc_reply_authors": "1401;763;258;723", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 146.0, 58.25375524376089 ], "wc_strengths_avg": [ 105.5, 32.95830699535399 ], "wc_weaknesses_avg": [ 226.5, 182.3410266506142 ], "wc_questions_avg": [ 21.5, 25.233905761891084 ], "wc_review_avg": [ 499.5, 266.27476410655214 ], "wc_reply_reviewers_avg": [ 40.25, 31.355820831226854 ], "wc_reply_authors_avg": [ 786.25, 406.6653261589928 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9154825138737762782&as_sdt=1005&sciodt=0,4&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=7D9X2cFnt1", "pdf": "https://openreview.net/pdf?id=7D9X2cFnt1", "email": "unifi.it;unifi.it;cvc.uab.es;unifi.it;cvc.uab.es", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Florence;Universitat Aut\u00f2noma de Barcelona;Universitat Aut\u00f3noma de Barcelona", "aff_unique_dep": ";Computer Vision Center;Computer Vision Center", "aff_unique_url": "https://www.unifi.it;https://www.uab.cat;https://www.uab.cat", "aff_unique_abbr": "UNIFI;UAB;UAB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "Italy;Spain" }, { "title": "Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19369", "id": "7ERQPyR2eb", "author_site": "Zhenhui Ye, Tianyun Zhong, Yi Ren, Jiaqi Yang, Weichuang Li, Jiawei Huang, Ziyue Jiang, Jinzheng He, Rongjie Huang, Jinglin Liu, Chen Zhang, Xiang Yin, Zejun MA, Zhou Zhao", "tldr": "", "abstract": "One-shot 3D talking portrait generation aims to reconstruct a 3D avatar from an unseen image, and then animate it with a reference video or audio to generate a talking portrait video. The existing methods fail to simultaneously achieve the goals of accurate 3D avatar reconstruction and stable talking face animation. Besides, while the existing works mainly focus on synthesizing the head part, it is also vital to generate natural torso and background segments to obtain a realistic talking portrait video. To address these limitations, we present Real3D-Potrait, a framework that (1) improves the one-shot 3D reconstruction power with a large image-to-plane model that distills 3D prior knowledge from a 3D face generative model; (2) facilitates accurate motion-conditioned animation with an efficient motion adapter; (3) synthesizes realistic video with natural torso movement and switchable background using a head-torso-background super-resolution model; and (4) supports one-shot audio-driven talking face generation with a generalizable audio-to-motion model. Extensive experiments show that Real3D-Portrait generalizes well to unseen identities and generates more realistic talking portrait videos compared to previous methods. Video samples are available at https://real3dportrait.github.io.", "keywords": "One-shot Talking Face Generation;Neural Radiance Field", "primary_area": "generative models", "supplementary_material": "/attachment/d5e2a3d21b399033bef1d6ec656016a274654cde.zip", "author": "Zhenhui Ye;Tianyun Zhong;Yi Ren;Jiaqi Yang;Weichuang Li;Jiawei Huang;Ziyue Jiang;Jinzheng He;Rongjie Huang;Jinglin Liu;Chen Zhang;Xiang Yin;Zejun MA;Zhou Zhao", "authorids": "~Zhenhui_Ye1;~Tianyun_Zhong3;~Yi_Ren2;~Jiaqi_Yang8;~Weichuang_Li1;~Jiawei_Huang5;~Ziyue_Jiang1;~Jinzheng_He1;~Rongjie_Huang1;~Jinglin_Liu1;~Chen_Zhang3;~Xiang_Yin2;~Zejun_MA1;~Zhou_Zhao3", "gender": "M;M;M;M;M;M;M;;M;M;F;M;M;M", "homepage": "https://yerfor.github.io;;https://rayeren.github.io/;https://omnihuman-lab.github.io/;https://www.waytron.net/;;;;;;https://actuy.github.io/;;;https://dblp.uni-trier.de/pid/75/7785.html?", "dblp": "265/6375;;75/6568-6;;318/0593;13/4208-8;258/6865;272/8857;212/8936-1;;94/4084-20;18/1022-6.html;;75/7785", "google_scholar": ";https://scholar.google.co.jp/scholar?hl=zh-CN;4FA6C0AAAAAJ;https://scholar.google.com/citations?hl=zh-CN;742-_K0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;wDgSBssAAAAJ;https://scholar.google.com/citations?hl=zh-CN;iRHBUsgAAAAJ;Ri8x0jEAAAAJ;eBBFeVcAAAAJ;e6_J-lEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ", "orcid": ";;;0009-0006-6034-4069;;;;;;;;;;0000-0001-6121-0384", "linkedin": ";;;;;;;;;;;;zejun-ma-58614365/;", "or_profile": "~Zhenhui_Ye1;~Tianyun_Zhong3;~Yi_Ren2;~Jiaqi_Yang8;~Weichuang_Li1;~Jiawei_Huang5;~Ziyue_Jiang1;~Jinzheng_He1;~Rongjie_Huang1;~Jinglin_Liu1;~Chen_Zhang3;~Xiang_Yin2;~Zejun_MA1;~Zhou_Zhao2", "aff": "Zhejiang University;Zhejiang University;ByteDance;Tsinghua University;Hongkong University of Science and Technology(Guangzhou);Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;ByteDance;Bytedance;ByteDance Inc.;ByteDance Inc.;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;bytedance.com;tsinghua.edu.cn;hkust-gz.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;bytedance.com;bytedance.com;bytedance.com;bytedance.com;zju.edu.cn", "position": "PhD student;MS student;Researcher;MS student;PhD student;MS student;PhD student;MS student;MS student;Research Scientist;Research Scientist;Researcher;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nye2024realdportrait,\ntitle={Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis},\nauthor={Zhenhui Ye and Tianyun Zhong and Yi Ren and Jiaqi Yang and Weichuang Li and Jiawei Huang and Ziyue Jiang and Jinzheng He and Rongjie Huang and Jinglin Liu and Chen Zhang and Xiang Yin and Zejun MA and Zhou Zhao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7ERQPyR2eb}\n}", "github": "", "project": "", "reviewers": "Kntd;8dWt;yTCz;MF4S", "pdf_size": 4764610, "rating": "8;8;8;10", "confidence": "4;3;5;4", "soundness": "3;2;3;4", "contribution": "3;1;2;3", "presentation": "3;3;4;3", "wc_summary": "228;85;116;122", "wc_strengths": "35;43;189;158", "wc_weaknesses": "10;64;381;278", "wc_questions": "341;30;22;91", "wc_review": "614;222;708;649", "wc_reply_reviewers": "221;99;88;0", "wc_reply_authors": "1880;1002;1576;945", "reply_reviewers": "4;2;4;0", "reply_authors": "5;4;6;4", "rating_avg": [ 8.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 137.75, 53.9646875280493 ], "wc_strengths_avg": [ 106.25, 68.19594929319483 ], "wc_weaknesses_avg": [ 183.25, 151.92000362032644 ], "wc_questions_avg": [ 121.0, 129.79021534769097 ], "wc_review_avg": [ 548.25, 191.3326618745477 ], "wc_reply_reviewers_avg": [ 102.0, 78.69243928103894 ], "wc_reply_authors_avg": [ 1350.75, 392.77943874393424 ], "reply_reviewers_avg": [ 2.5, 1.6583123951777 ], "reply_authors_avg": [ 4.75, 0.82915619758885 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4610920972123504276&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7ERQPyR2eb", "pdf": "https://openreview.net/pdf?id=7ERQPyR2eb", "email": "zju.edu.cn;zju.edu.cn;bytedance.com;tsinghua.edu.cn;hkust-gz.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;bytedance.com;bytedance.com;bytedance.com;bytedance.com;zju.edu.cn", "author_num": 14, "aff_unique_index": "0;0;1;2;3;0;0;0;0;1;1;1;1;0", "aff_unique_norm": "Zhejiang University;ByteDance;Tsinghua University;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.bytedance.com;https://www.tsinghua.edu.cn;https://www.ust.hk", "aff_unique_abbr": "ZJU;ByteDance;THU;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Guangzhou", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7ErllmwXym", "title": "Interpreting and improving diffusion models using the Euclidean distance function", "track": "main", "status": "Reject", "tldr": "", "abstract": "Denoising is intuitively related to projection. Indeed, under the manifold hypothesis, adding random noise is approximately equivalent to orthogonal perturbation. Hence, learning to denoise is approximately learning to project. In this paper, we use this observation to reinterpret denoising diffusion models as approximate gradient descent applied to the Euclidean distance function. We then provide straight-forward convergence analysis of the DDIM sampler under simple assumptions on the projection-error of the denoiser. Finally, we propose a new sampler based on two simple modifications to DDIM using insights from our theoretical results. In as few as 5-10 function evaluations, our sampler achieves state-of-the-art FID scores on pretrained CIFAR-10 and CelebA models and can generate high quality samples on latent diffusion models.", "keywords": "Diffusion models;distance functions;projection;training-free sampler", "primary_area": "generative models", "supplementary_material": "/attachment/3c87d9fda1783f5522157a8aa1542c4aca19463e.zip", "author": "Frank Permenter;Chenyang Yuan", "authorids": "~Frank_Permenter1;~Chenyang_Yuan1", "gender": ";", "homepage": "https://www.mit.edu/~fperment;", "dblp": "90/9943;", "google_scholar": "BQ_S4vMAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Frank_Permenter1;~Chenyang_Yuan1", "aff": "Toyota Research Institute;", "aff_domain": "tri.global;", "position": "Principal Researcher;", "bibtex": "@misc{\npermenter2024interpreting,\ntitle={Interpreting and improving diffusion models using the Euclidean distance function},\nauthor={Frank Permenter and Chenyang Yuan},\nyear={2024},\nurl={https://openreview.net/forum?id=7ErllmwXym}\n}", "github": "", "project": "", "reviewers": "LpGF;dfzc;gUw5", "site": "https://openreview.net/forum?id=7ErllmwXym", "pdf_size": 7309770, "rating": "3;6;8", "confidence": "4;3;3", "soundness": "2;3;3", "contribution": "1;3;3", "presentation": "2;4;2", "wc_summary": "99;152;57", "wc_strengths": "20;75;47", "wc_weaknesses": "513;83;1", "wc_questions": "6;47;73", "wc_review": "638;357;178", "wc_reply_reviewers": "0;88;0", "wc_reply_authors": "160;586;73", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 102.66666666666667, 38.87015421745698 ], "wc_strengths_avg": [ 47.333333333333336, 22.45489305746572 ], "wc_weaknesses_avg": [ 199.0, 224.54101332867157 ], "wc_questions_avg": [ 42.0, 27.58018612458347 ], "wc_review_avg": [ 391.0, 189.32687782421877 ], "wc_reply_reviewers_avg": [ 29.333333333333332, 41.48359782961079 ], "wc_reply_authors_avg": [ 273.0, 224.15619554230483 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9176629354822472, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14071772171599282298&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Toyota Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.tri.global", "aff_unique_abbr": "TRI", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "7F4ioiKQFT", "title": "ColCLIP: Enhancing Fine-Grained Image Retrieval with Pre-trained Embeddings", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the realm of image retrieval systems, efficiently searching for images based on any visual element described in the query is critical for user experience. However, current embedding models like CLIP primarily focus on aligning text with the most salient aspects of images, which may not always correspond to the elements users seek. In this paper, we propose ColCLIP, a fine-grained image retrieval system that leverages pre-trained embeddings and enhances them for our use case. We fine-tune CLIP on the Visual Genome Dataset and incorporate the MaxSim operator for image-text interaction. Our evaluations show that ColCLIP consistently outperforms standard CLIP in handling fine-grained retrieval tasks. ColCLIP improves image retrieval systems by enabling more relevant searches for users while maintaining efficiency and ease of development. We release our code at https://anonymous.4open.science/r/image-is-context-32B6.", "keywords": "Multimodal Learning;Image;Language;Retrieval", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Conghao Shen;Yixin Liu;Wanyue Zhai", "authorids": "~Conghao_Shen1;~Yixin_Liu6;~Wanyue_Zhai1", "gender": "M;F;F", "homepage": "https://tomshen.io;;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": "conghao-shen/;yixin-liu-261090201/;wanyue-zhai-74a074182/", "or_profile": "~Conghao_Shen1;~Yixin_Liu6;~Wanyue_Zhai1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "MS student;MS student;MS student", "bibtex": "@misc{\nshen2024colclip,\ntitle={Col{CLIP}: Enhancing Fine-Grained Image Retrieval with Pre-trained Embeddings},\nauthor={Conghao Shen and Yixin Liu and Wanyue Zhai},\nyear={2024},\nurl={https://openreview.net/forum?id=7F4ioiKQFT}\n}", "github": "", "project": "", "reviewers": "hnuQ;coku;yk6P;Qpf8", "site": "https://openreview.net/forum?id=7F4ioiKQFT", "pdf_size": 3760218, "rating": "3;3;5;5", "confidence": "4;4;5;4", "soundness": "2;2;2;3", "contribution": "2;1;2;2", "presentation": "2;3;1;2", "wc_summary": "53;61;115;69", "wc_strengths": "25;19;106;45", "wc_weaknesses": "206;55;262;194", "wc_questions": "39;18;4;18", "wc_review": "323;153;487;326", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.5, 24.057223447438815 ], "wc_strengths_avg": [ 48.75, 34.4265522525855 ], "wc_weaknesses_avg": [ 179.25, 76.18849978835388 ], "wc_questions_avg": [ 19.75, 12.497499749949988 ], "wc_review_avg": [ 322.25, 118.11302849389648 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZMvOkV_SPHkJ:scholar.google.com/&scioq=ColCLIP:+Enhancing+Fine-Grained+Image+Retrieval+with+Pre-trained+Embeddings&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7FHrZuKogW", "title": "Contractive Systems Improve Graph Neural Networks Against Adversarial Attacks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have established themselves as a key component in addressing diverse graph-based tasks. Despite their notable successes, GNNs remain susceptible to input perturbations in the form of adversarial attacks. This paper introduces an innovative approach to fortify GNNs against adversarial perturbations through the lens of contractive dynamical systems. Our method introduces graph neural layers based on differential equations with contractive properties, which, as we show, improve the robustness of GNNs. A distinctive feature of the proposed approach is the simultaneous learned evolution of both the node features and the adjacency matrix, yielding an intrinsic enhancement of model robustness to perturbations in the input features and the connectivity of the graph. We mathematically derive the underpinnings of our novel architecture and provide theoretical insights to reason about its expected behavior. We demonstrate the efficacy of our method through numerous real-world benchmarks, reading on par or improved performance compared to existing methods.", "keywords": "Graph Neural Networks;Adversarial Defense;Contractive Systems;Dynamical Systems Inspired Neural Networks", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Moshe Eliasof;Davide Murari;Ferdia Sherry;Carola-Bibiane Sch\u00f6nlieb", "authorids": "~Moshe_Eliasof1;~Davide_Murari1;~Ferdia_Sherry1;~Carola-Bibiane_Sch\u00f6nlieb1", "gender": "M;M;;F", "homepage": ";http://davidemurari.com;https://ferdiasherry.com;http://www.damtp.cam.ac.uk/research/cia/", "dblp": "239/6004;286/5121;;07/8184", "google_scholar": "44LKqBsAAAAJ;P8A76uwAAAAJ;8V2NqB8AAAAJ;nPeOXjwAAAAJ", "orcid": ";0000-0002-1095-6685;;", "linkedin": ";;;", "or_profile": "~Moshe_Eliasof1;~Davide_Murari1;~Ferdia_Sherry1;~Carola-Bibiane_Sch\u00f6nlieb1", "aff": "University of Cambridge;Norwegian University of Science and Technology;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;ntnu.no;cam.ac.uk;cam.ac.uk", "position": "Postdoc;PhD student;Postdoc;Full Professor", "bibtex": "@misc{\neliasof2024contractive,\ntitle={Contractive Systems Improve Graph Neural Networks Against Adversarial Attacks},\nauthor={Moshe Eliasof and Davide Murari and Ferdia Sherry and Carola-Bibiane Sch{\\\"o}nlieb},\nyear={2024},\nurl={https://openreview.net/forum?id=7FHrZuKogW}\n}", "github": "", "project": "", "reviewers": "mRqu;SPJa;fwuD;QT3W", "site": "https://openreview.net/forum?id=7FHrZuKogW", "pdf_size": 531618, "rating": "5;5;6;6", "confidence": "4;4;1;2", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "32;22;80;51", "wc_strengths": "33;62;107;13", "wc_weaknesses": "78;570;6;36", "wc_questions": "139;1;10;9", "wc_review": "282;655;203;109", "wc_reply_reviewers": "205;20;9;9", "wc_reply_authors": "1925;2577;188;265", "reply_reviewers": "2;1;1;1", "reply_authors": "6;6;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 46.25, 22.094965489902897 ], "wc_strengths_avg": [ 53.75, 35.336772631353874 ], "wc_weaknesses_avg": [ 172.5, 230.91719295020022 ], "wc_questions_avg": [ 39.75, 57.408078699778834 ], "wc_review_avg": [ 312.25, 207.14653629737572 ], "wc_reply_reviewers_avg": [ 60.75, 83.40376190556395 ], "wc_reply_authors_avg": [ 1238.75, 1038.5225984541694 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 2.0 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14697871274676579373&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Cambridge;Norwegian University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ntnu.no", "aff_unique_abbr": "Cambridge;NTNU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Norway" }, { "title": "SLiMe: Segment Like Me", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19368", "id": "7FeIRqCedv", "author_site": "Aliasghar Khani, Saeid Asgari, Aditya Sanghi, Ali Mahdavi Amiri, Ghassan Hamarneh", "tldr": "", "abstract": "Significant strides have been made using large vision-language models, like Stable Diffusion (SD), for a variety of downstream tasks, including image generation, image editing, and 3D shape generation. Inspired by these advancements, we explore leveraging these vision-language models for segmenting images at any desired granularity using as few as one annotated sample. We propose SLiMe, which frames this problem as an optimization task. Specifically, given a single image and its segmentation mask, we first extract our novel \u201cweighted accumulated self-attention map\u201d along with cross-attention map from the SD prior. Then, using these extracted maps, the text embeddings of SD are optimized to highlight the segmented region in these attention maps, which in turn can be used to derive new segmentation results. Moreover, leveraging additional training data when available, i.e. few-shot, improves the performance of SLiMe. We performed comprehensive experiments examining various design factors and showed that SLiMe outperforms other existing one-shot and few-shot segmentation methods.", "keywords": "one-shot segmentation;computer vision;text-to-image models;stable diffusion;cross attention", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/9c59450fa65d10e741d665936f860f90609cced7.zip", "author": "Aliasghar Khani;Saeid Asgari;Aditya Sanghi;Ali Mahdavi Amiri;Ghassan Hamarneh", "authorids": "~Aliasghar_Khani1;~Saeid_Asgari1;~Aditya_Sanghi1;~Ali_Mahdavi_Amiri1;~Ghassan_Hamarneh1", "gender": "M;;M;M;M", "homepage": "http://aliasgharkhani.github.io/;https://asgsaeid.github.io/;https://github.com/sanghiad;https://www.sfu.ca/~amahdavi;http://www.medicalimageanalysis.com", "dblp": ";201/4374.html;;33/10499.html;h/GhassanHamarneh", "google_scholar": "yr7Y5EcAAAAJ;SuePM1sAAAAJ;q0-11e25FxIC;https://scholar.google.ca/citations?user=M9eTADwAAAAJ;https://scholar.google.ca/citations?user=61DdlkAAAAAJ", "orcid": ";;;;0000-0001-5040-7448", "linkedin": "aliasghar-khani-08157b16b/;;;;ghassanhamarneh/", "or_profile": "~Aliasghar_Khani1;~Saeid_Asgari1;~Aditya_Sanghi1;~Ali_Mahdavi_Amiri1;~Ghassan_Hamarneh1", "aff": "Computing Science, Simon Fraser University;Autodesk;Autodesk;Simon Fraser University;Simon Fraser University", "aff_domain": "cs.sfu.ca;autodesk.com;autodesk.com;sfu.ca;sfu.ca", "position": "MS student;Research Scientist;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkhani2024slime,\ntitle={{SL}iMe: Segment Like Me},\nauthor={Aliasghar Khani and Saeid Asgari and Aditya Sanghi and Ali Mahdavi Amiri and Ghassan Hamarneh},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7FeIRqCedv}\n}", "github": "", "project": "", "reviewers": "Fjuo;kAQy;16rP;VRf7", "pdf_size": 22137145, "rating": "6;6;8;8", "confidence": "2;3;3;4", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "105;79;129;75", "wc_strengths": "124;64;48;66", "wc_weaknesses": "168;160;140;95", "wc_questions": "47;5;12;15", "wc_review": "444;308;329;251", "wc_reply_reviewers": "0;42;21;13", "wc_reply_authors": "667;315;244;616", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.0, 21.77154105707724 ], "wc_strengths_avg": [ 75.5, 28.85740806101615 ], "wc_weaknesses_avg": [ 140.75, 28.314086600135983 ], "wc_questions_avg": [ 19.75, 16.145819892467525 ], "wc_review_avg": [ 333.0, 70.15340333868343 ], "wc_reply_reviewers_avg": [ 19.0, 15.247950681976906 ], "wc_reply_authors_avg": [ 460.5, 183.61985186792847 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1984554634286821078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7FeIRqCedv", "pdf": "https://openreview.net/pdf?id=7FeIRqCedv", "email": "cs.sfu.ca;autodesk.com;autodesk.com;sfu.ca;sfu.ca", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Simon Fraser University;Autodesk", "aff_unique_dep": "Computing Science;", "aff_unique_url": "https://www.sfu.ca;https://www.autodesk.com", "aff_unique_abbr": "SFU;Autodesk", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Canada;United States" }, { "id": "7GCRhebJEr", "title": "Robustness via learned Bregman divergence", "track": "main", "status": "Reject", "tldr": "", "abstract": "We exploit the Bregman divergence to generate functions that are trained to measure the semantic similarity between images under corruptions and use these functions as alternatives to the $L^p$ norms to define robustness threat models. Then we replace the projected gradient descent (PGD) by semantic attacks, which are instantiations of the mirror descent, the optimization framework associated with the Bregman divergence. Adversarial training under these settings yield classification models that are more robust to common image corruptions. Particularly, for the contrast corruption that was found problematic in prior work we achieve an accuracy that exceeds the $L^p$- and the LPIPS-based adversarially trained neural networks by a margin of 29\\% on the CIFAR-10-C corruption dataset.", "keywords": "Bregman divergence;Mirror descent;Corruption robustness;Adversarial training;Self-supervised learning.", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Mohamed-Hicham LEGHETTAS;Markus P\u00fcschel", "authorids": "~Mohamed-Hicham_LEGHETTAS1;~Markus_P\u00fcschel1", "gender": "M;M", "homepage": "https://acl.inf.ethz.ch/people/hichaml/;https://acl.inf.ethz.ch/", "dblp": ";37/6355", "google_scholar": ";az9ZryAAAAAJ", "orcid": ";0000-0001-8834-8551", "linkedin": ";", "or_profile": "~Mohamed-Hicham_LEGHETTAS1;~Markus_P\u00fcschel1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": "inf.ethz.ch;inf.ethz.ch", "position": "PhD student;Full Professor", "bibtex": "@misc{\nleghettas2024robustness,\ntitle={Robustness via learned Bregman divergence},\nauthor={Mohamed-Hicham LEGHETTAS and Markus P{\\\"u}schel},\nyear={2024},\nurl={https://openreview.net/forum?id=7GCRhebJEr}\n}", "github": "", "project": "", "reviewers": "qLEt;brGx;zXKR;GMTd", "site": "https://openreview.net/forum?id=7GCRhebJEr", "pdf_size": 1783808, "rating": "3;3;6;8", "confidence": "4;4;3;4", "soundness": "2;2;2;3", "contribution": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "21;94;67;80", "wc_strengths": "11;43;57;119", "wc_weaknesses": "53;271;65;410", "wc_questions": "6;2;82;237", "wc_review": "91;410;271;846", "wc_reply_reviewers": "0;119;36;72", "wc_reply_authors": "6;1349;255;540", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 27.408940147331492 ], "wc_strengths_avg": [ 57.5, 39.22690403281911 ], "wc_weaknesses_avg": [ 199.75, 149.14317785269296 ], "wc_questions_avg": [ 81.75, 95.132473425219 ], "wc_review_avg": [ 404.5, 278.86242127615543 ], "wc_reply_reviewers_avg": [ 56.75, 44.04188347471075 ], "wc_reply_authors_avg": [ 537.5, 505.1823928048166 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:orqCiqyFMJ4J:scholar.google.com/&scioq=Robustness+via+learned+Bregman+divergence&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Zurich", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "7GkdjhupsV", "title": "InfoAug: Mutual Information Informed Augmentation for Representation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Representation learning methods utilizing the InfoNCE loss have demonstrated considerable capacity in reducing human annotation effort by training invariant neural feature extractors. Although different variants of the training objective adhere to the information maximization principle between the data and learned features, data selection and augmentation still rely on human hypotheses or engineering, which may be suboptimal. For instance, data augmentation in contrastive learning primarily focuses on color jittering, aiming to emulate real-world illumination changes. In this work, we investigate the potential of selecting training data based on their mutual information computed from real-world distributions, which, in principle, should endow the learned features with better generalization when applied in open environments. Specifically, we consider patches attached to scenes that exhibit high mutual information under natural perturbations, such as color changes and motion, as positive samples for learning with contrastive loss. We evaluate the proposed mutual-information-informed data augmentation method on several benchmarks across multiple state-of-the-art representation learning frameworks, demonstrating its effectiveness and establishing it as a promising direction for future research. The data and code will be available for further investigation.", "keywords": "representation learning;mutual information;data augmentation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Hanyang Chen;Qingyuan Zheng;YANG ZONGRU;Yanchao Yang", "authorids": "~Hanyang_Chen2;~Qingyuan_Zheng1;~YANG_ZONGRU2;~Yanchao_Yang1", "gender": "M;M;M;M", "homepage": "https://jeremycccc.github.io/;;https://github.com/yangzongru-jpg;https://yanchaoyang.github.io/", "dblp": ";;;84/8637-1", "google_scholar": ";;;r2tKnV4AAAAJ", "orcid": ";;;", "linkedin": ";https://www.linkedin.cn/incareer/in/ACoAAD7uNpgBlyknH4xCq6-6dwLecU4xaHDNqBc;;", "or_profile": "~Hanyang_Chen2;~Qingyuan_Zheng1;~YANG_ZONGRU2;~Yanchao_Yang1", "aff": "University of Hong Kong;University of Hong Kong;University of Hong Kong;University of Hong Kong", "aff_domain": "hku.hk;hku.hk;hku.hk;hku.hk", "position": "Undergrad student;PhD student;MS student;Assistant Professor", "bibtex": "@misc{\nchen2024infoaug,\ntitle={InfoAug: Mutual Information Informed Augmentation for Representation Learning},\nauthor={Hanyang Chen and Qingyuan Zheng and YANG ZONGRU and Yanchao Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=7GkdjhupsV}\n}", "github": "", "project": "", "reviewers": "xZ59;m3jm;LYQ4;A3xm", "site": "https://openreview.net/forum?id=7GkdjhupsV", "pdf_size": 1388488, "rating": "3;3;3;6", "confidence": "5;3;4;3", "soundness": "2;2;1;3", "contribution": "2;1;1;3", "presentation": "2;2;2;3", "wc_summary": "39;94;71;128", "wc_strengths": "34;27;42;97", "wc_weaknesses": "134;190;199;258", "wc_questions": "73;2;2;37", "wc_review": "280;313;314;520", "wc_reply_reviewers": "0;61;0;18", "wc_reply_authors": "495;467;559;457", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.0, 32.50384592629001 ], "wc_strengths_avg": [ 50.0, 27.649593125396983 ], "wc_weaknesses_avg": [ 195.25, 43.96234183935155 ], "wc_questions_avg": [ 28.5, 29.3981291921782 ], "wc_review_avg": [ 356.75, 95.24015697173121 ], "wc_reply_reviewers_avg": [ 19.75, 24.923633362734254 ], "wc_reply_authors_avg": [ 494.5, 39.75864685826217 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5X_KkOYcPoIJ:scholar.google.com/&scioq=InfoAug:+Mutual+Information+Informed+Augmentation+for+Representation+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.hku.hk", "aff_unique_abbr": "HKU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "7Gza2TkLPJ", "title": "BiTGNN: prediction of drug-target interactions based on bidirectional transformer and graph neural network on heterogeneous graph", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Drug-target interaction (DTI) is a widely explored topic in the field of bioinformatics and plays a pivotal role in drug discovery. However, the traditional bio-experimental process of drug-target interaction identification requires a large investment of time and labor. To address this challenge, graph neural network (GNN) approaches in deep learning are becoming a prominent trend in the field of DTI research, which is characterized by multimodal processing of data, feature learning and interpretability in DTI. Nevertheless, some methods are still limited by homogeneous graphs and single features. To address the problems we mechanistically analyze graph convolutional neural networks (GCN) and graph attentional neural networks (GAT) in order to propose a new model for drug-target interaction prediction based on graph neural networks named BiTGNN (bidirectional transformer and graph neural network). The method first establishes drug-target pairs through the pseudo-position specificity scoring matrix (PsePSSM) and drug fingerprint data, and constructs a heterogeneous network by utilizing the relationship between the drug and the target. Then, the computational extraction of drug and target attributes is performed using GCN and GAT for the purpose of model information flow extension and graph information enhancement. We collect interaction data using the proposed Bi-directional transformer (Bi-transformer) architecture, in which we design a bi-directional cross-attention mechanism for calculating the effects of drug-target interactions for realistic biological interaction simulations. Finally, a feed-forward neural network is used to obtain the feature matrices of the drug and the target, and DTI prediction is performed by fusing the two feature matrices. The Enzyme, Ion Channel (IC) , G Protein-coupled Receptor (GPCR) , and Nuclear Receptor (NR) datasets are used in the experiments, and compared with several existing mainstream models, our model outperforms the others in Area Under the Curve (AUC), Area Under the Precision-Recall Curve (AUPR) , Accuracy and Specificity metrics.", "keywords": "DTI prediction;bidirectional transformer;graph attention network;graph convolutional neural network", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Qingqian Zhang;Changxiang He;Xiaofei Qin;Peisheng Yang;Junyang Kong", "authorids": "~Qingqian_Zhang2;~Changxiang_He2;~Xiaofei_Qin1;~Peisheng_Yang1;~Junyang_Kong1", "gender": "F;M;M;;", "homepage": ";;;;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";0000-0002-0258-5423;0009-0002-1134-5517;0009-0003-1645-4058;", "linkedin": ";;;;", "or_profile": "~Qingqian_Zhang2;~Xiaofei_Qin1;~Peisheng_Yang1;~Junyang_Kong1;~changxiang_he1", "aff": "University of Shanghai for Science and Technology;Shanghai University of Science and Technology;University of Shanghai for Science and Technology;Shanghai University of Science and Technology;", "aff_domain": "usst.edu.cn;usst.edu.cn;usst.edu.cn;usst.edu.cn;", "position": "MS student;Associate Professor;MS student;MS student;", "bibtex": "@misc{\nzhang2024bitgnn,\ntitle={Bi{TGNN}: prediction of drug-target interactions based on bidirectional transformer and graph neural network on heterogeneous graph},\nauthor={Qingqian Zhang and Changxiang He and Xiaofei Qin and Peisheng Yang and Junyang Kong},\nyear={2024},\nurl={https://openreview.net/forum?id=7Gza2TkLPJ}\n}", "github": "", "project": "", "reviewers": "rts4;eUmF;UEEt;Z8k5", "site": "https://openreview.net/forum?id=7Gza2TkLPJ", "pdf_size": 472893, "rating": "1;1;3;3", "confidence": "5;4;4;5", "soundness": "1;1;2;3", "contribution": "1;1;2;2", "presentation": "1;1;2;1", "wc_summary": "93;75;58;111", "wc_strengths": "5;7;24;109", "wc_weaknesses": "67;192;250;317", "wc_questions": "4;2;35;55", "wc_review": "169;276;367;592", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 19.79109648301478 ], "wc_strengths_avg": [ 36.25, 42.64607250380743 ], "wc_weaknesses_avg": [ 206.5, 91.88715905935932 ], "wc_questions_avg": [ 24.0, 22.169799277395363 ], "wc_review_avg": [ 351.0, 155.79313206942084 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6952811768654855146&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Shanghai for Science and Technology;Shanghai University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.usst.edu.cn;https://www.sustech.edu.cn", "aff_unique_abbr": "USST;SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "7HdtLgsvys", "title": "Tube Loss: A Novel Approach for High Quality Prediction Interval Estimation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper proposes a continuous loss function termed 'tube loss' for Prediction Interval (PI) estimation. The minimizer of the proposed tube loss is a pair of functions $\\mu_1(x)$ and $\\mu_2(x)$ such that the interval $[\\mu_1(x),\\mu_2(x)]$ contains $t$ fraction of $y_i$ values. The tube loss function also facilitates an upward or downward movement of the PI tube so that the estimated PI may cover the densest regions of response values, thus allowing the sharpening of the width of PI, especially when the distribution of the response is skewed. The tube loss function-based machine learning models also have the privilege of trading off the calibration error and the width of PI by solving a single optimization problem. We have illustrated the use of tube loss functions in kernel machines, neural networks, and sequential deep learning models. Our numerical experiments show that the tube loss function is effective in yielding narrow and more accurate PIs compared to the existing methods.", "keywords": "Prediction Interval Estimation;Neural Network;Loss Function;Kernel Machine", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/f03da49e3d74031967c62860619ea67ae3dee76c.zip", "author": "Pritam Anand;Tathagata Bandyopadhyay;Harshkumar Mukeshbhai Savaliya;Suresh Chandra", "authorids": "~Pritam_Anand1;~Tathagata_Bandyopadhyay1;~Harshkumar_Mukeshbhai_Savaliya2;~Suresh_Chandra2", "gender": "M;M;;M", "homepage": "https://scholar.google.com/citations?user=ATYzQhoAAAAJ&hl=en;;;", "dblp": ";;;", "google_scholar": ";https://scholar.google.co.in/citations?user=NZjB-lUAAAAJ;;https://scholar.google.co.in/citations?user=X8dtzjAAAAAJ", "orcid": ";;;", "linkedin": ";;harsh-savaliya-8b81b117a/;", "or_profile": "~Pritam_Anand1;~Tathagata_Bandyopadhyay1;~Harshkumar_Mukeshbhai_Savaliya2;~Suresh_Chandra2", "aff": "DA-IICT, Gandhinagar;DA-IICT;;", "aff_domain": "daiict.ac.in;daiict.ac.in;;", "position": "Assistant Professor;Full Professor;;", "bibtex": "@misc{\nanonymous2024tube,\ntitle={Tube Loss: A Novel Approach for High Quality Prediction Interval Estimation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=7HdtLgsvys}\n}", "github": "", "project": "", "reviewers": "tfsf;PHGF;9f1c;RfXE", "site": "https://openreview.net/forum?id=7HdtLgsvys", "pdf_size": 944500, "rating": "1;3;3;3", "confidence": "5;3;2;3", "soundness": "1;3;2;2", "contribution": "1;3;1;2", "presentation": "1;2;1;2", "wc_summary": "37;62;26;74", "wc_strengths": "18;94;15;51", "wc_weaknesses": "44;68;79;137", "wc_questions": "340;137;15;4", "wc_review": "439;361;135;266", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 49.75, 19.13602623325961 ], "wc_strengths_avg": [ 44.5, 31.87867625859016 ], "wc_weaknesses_avg": [ 82.0, 34.18332927027442 ], "wc_questions_avg": [ 124.0, 135.1906061825303 ], "wc_review_avg": [ 300.25, 113.38292419936964 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:v68TjBMqysEJ:scholar.google.com/&scioq=Tube+Loss:+A+Novel+Approach+for+High+Quality+Prediction+Interval+Estimation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Dhirubhai Ambani Institute of Information and Communication Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.daiict.ac.in", "aff_unique_abbr": "DA-IICT", "aff_campus_unique_index": "0", "aff_campus_unique": "Gandhinagar;", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "id": "7Hf4Wtc8uW", "title": "Variational Bayes Classifier", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Classifiers have traditionally been designed as fully-observed models. These classifiers are generally deterministic, so they are able to obtain a single output per input. The problem with this is that in this scenario it is not usually possible to capture the model uncertainty. On the other hand, Bayesian models offer the ability to capture this uncertainty, but usually have a higher computational cost. In this paper we propose to build a classifier as a latent variable model. This latent variable corresponds to what is usually called embedding and with our proposal we can model its distribution, which has two fundamental advantages. The first is that by knowing the distribution of the embeddings, the uncertainty of the predictions can be estimated. In addition, certain conditions can be imposed on the distribution of the embeddings to favor aspects such as interclass separation. We also propose an evidence lower bound to optimize the parameters of this classifier which can be maximized using stochastic gradient methods. Finally, we give two alternatives to implement these models using neural networks and demonstrate empirically the theoretical advantages of our proposal using different architectures and datasets.", "keywords": "embeddings organization;calibrated classification;variational classifier", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/76a3d790c197c181ceb24e0c74ecd4265d93b142.zip", "author": "Antonio Almud\u00e9var;Alfonso Ortega;Antonio Miguel;Luis Vicente;Eduardo Lleida", "authorids": "~Antonio_Almud\u00e9var1;~Alfonso_Ortega2;~Antonio_Miguel1;~Luis_Vicente1;~Eduardo_Lleida1", "gender": "M;M;;M;", "homepage": "https://vivolab.i3a.es/antonio-almudevar/;http://alfonso.vivolab.es;https://vivolab.i3a.es/antonio-miguel/;;https://vivolab.i3a.es/eduardo-lleida/", "dblp": "333/6950;121/1854-1.html;;;14/4997", "google_scholar": "https://scholar.google.com/citations?hl=es;https://scholar.google.es/citations?hl=es;https://scholar.google.com/citations?hl=es;https://scholar.google.es/citations?user=pOEAiugAAAAJ;https://scholar.google.es/citations?hl=es", "orcid": ";0000-0002-3886-7748;0000-0001-5803-4316;0000-0003-4391-5203;0000-0001-9137-4013", "linkedin": ";;antonio-miguel-78792410/;luisvicenteborruel/;https://linkedin.com/in/eduardolleida", "or_profile": "~Antonio_Almud\u00e9var1;~Alfonso_Ortega2;~Antonio_Miguel1;~Luis_Vicente1;~Eduardo_Lleida1", "aff": "Universidad de Zaragoza;Universidad de Zaragoza;Universidad de Zaragoza;Universidad de Zaragoza;Universidad de Zaragoza", "aff_domain": "unizar.es;unizar.es;unizar.es;unizar.es;unizar.es", "position": "PhD student;Associate Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nalmud{\\'e}var2024variational,\ntitle={Variational Bayes Classifier},\nauthor={Antonio Almud{\\'e}var and Alfonso Ortega and Antonio Miguel and Luis Vicente and Eduardo Lleida},\nyear={2024},\nurl={https://openreview.net/forum?id=7Hf4Wtc8uW}\n}", "github": "", "project": "", "reviewers": "fLRd;pf4E;QwG7;KP2i", "site": "https://openreview.net/forum?id=7Hf4Wtc8uW", "pdf_size": 617254, "rating": "3;5;5;5", "confidence": "3;4;3;4", "soundness": "3;3;2;3", "contribution": "2;2;2;2", "presentation": "3;3;2;2", "wc_summary": "72;100;74;39", "wc_strengths": "57;85;73;69", "wc_weaknesses": "159;468;101;193", "wc_questions": "147;1;450;4", "wc_review": "435;654;698;305", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 71.25, 21.649191670822262 ], "wc_strengths_avg": [ 71.0, 10.0 ], "wc_weaknesses_avg": [ 230.25, 141.15129294483987 ], "wc_questions_avg": [ 150.5, 182.70536390593463 ], "wc_review_avg": [ 523.0, 160.51012429127329 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HfhhSe8DOPsJ:scholar.google.com/&scioq=Variational+Bayes+Classifier&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Universidad de Zaragoza", "aff_unique_dep": "", "aff_unique_url": "https://www.unizar.es", "aff_unique_abbr": "UNIZAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Spain" }, { "id": "7HfliVAtCG", "title": "Detect Every Thing with Few Examples", "track": "main", "status": "Reject", "tldr": "", "abstract": "Open-set object detection aims at detecting arbitrary categories beyond those seen during training. Most recent advancements have adopted the open-vocabulary paradigm, utilizing vision-language backbones to represent categories with language. In this paper, we introduce DE-ViT, an open-set object detector that employs vision-only DINOv2 backbones and learns new categories through example images instead of language. To improve general detection ability, we transform multi-classification tasks into binary classification tasks while bypassing per-class inference, and propose a novel region propagation technique for localization. We evaluate DE-ViT on open-vocabulary, few-shot, and one-shot object detection benchmark with COCO and LVIS. For COCO, DE-ViT outperforms the open-vocabulary SoTA by 6.9 AP50 and achieves 50 AP50 in novel classes. DE-ViT surpasses the few-shot SoTA by 15 mAP on 10-shot and 7.2 mAP on 30-shot and one-shot SoTA by 2.8 AP50. For LVIS, DE-ViT outperforms the open-vocabulary SoTA by 2.2 mask AP and reaches 34.3 mask APr.", "keywords": "Few-shot;Object detection;Open-vocabulary", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Xinyu Zhang;Yuting Wang;Abdeslam Boularias", "authorids": "~Xinyu_Zhang7;~Yuting_Wang2;~Abdeslam_Boularias1", "gender": "M;;M", "homepage": "https://mlzxy.github.io/;;http://rl.cs.rutgers.edu/", "dblp": ";09/8269-4;57/2269", "google_scholar": "M7hnG9oAAAAJ;o9V5WAYAAAAJ;https://scholar.google.com.tw/citations?user=8AF3RCsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xinyu_Zhang7;~Yuting_Wang2;~Abdeslam_Boularias1", "aff": "Rutgers University;Amazon;, Rutgers University", "aff_domain": "rutgers.edu;amazon.com;cs.rutgers.edu", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@misc{\nzhang2024detect,\ntitle={Detect Every Thing with Few Examples},\nauthor={Xinyu Zhang and Yuting Wang and Abdeslam Boularias},\nyear={2024},\nurl={https://openreview.net/forum?id=7HfliVAtCG}\n}", "github": "", "project": "", "reviewers": "Vm41;f8VP;bhpM;Jjnu", "site": "https://openreview.net/forum?id=7HfliVAtCG", "pdf_size": 9368714, "rating": "5;5;6;6", "confidence": "5;4;4;4", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "63;46;105;79", "wc_strengths": "39;14;56;37", "wc_weaknesses": "204;83;18;284", "wc_questions": "11;4;197;4", "wc_review": "317;147;376;404", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1944;2224;1480;997", "reply_reviewers": "0;0;0;0", "reply_authors": "3;5;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.25, 21.72987574745884 ], "wc_strengths_avg": [ 36.5, 14.941552797483935 ], "wc_weaknesses_avg": [ 147.25, 103.38610883479463 ], "wc_questions_avg": [ 54.0, 82.61053201620238 ], "wc_review_avg": [ 311.0, 99.7572052535555 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1661.25, 466.5604864323596 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2956108232415386259&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Rutgers University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.rutgers.edu;https://www.amazon.com", "aff_unique_abbr": "Rutgers;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7J0NsFXnFd", "title": "Optimal Action Abstraction for Imperfect Information Extensive-Form Games", "track": "main", "status": "Reject", "tldr": "", "abstract": "Action abstraction is critical for solving imperfect information extensive-form games (IIEFGs) with large action spaces. However, due to the large number of states and high computational complexity in IIEFGs, existing methods often focus on using a fixed abstraction, which can result in sub-optimal performance. To tackle this issue, we propose a novel Markov Decision Process (MDP) formulation for finding the optimal (and possibly state-dependent) action abstraction. Specifically, the state of the MDP is defined as the public information of the game, each action is a feature vector representing a particular action abstraction, and the reward is defined as the expected value difference between the selected action abstraction and a default fixed action abstraction. Based on this MDP, we build a game tree with the action abstraction selected by reinforcement learning (RL), and solve for the optimal strategy based on counterfactual regret minimization (CFR). This two-phase framework, named RL-CFR, effectively trades off computational complexity (due to CFR) and performance improvement (due to RL) for IIEFGs, and offers a novel RL-guided action abstraction selection in CFR. To demonstrate the effectiveness of RL-CFR, we apply the method to solve Heads-up No-limit (HUNL) Texas Hold'em, a popular representative benchmark for IIEFGs. Our results show that RL-CFR defeats ReBeL, one of the best fixed action abstraction-based HUNL algorithms, and a strong HUNL agent Slumbot by significant win-rate margins $64\\pm 11$ and $84\\pm 17$ mbb/hand, respectively.", "keywords": "Game Theory;Imperfect Information Games;Extensive-Form Games;Regret Minimization;Reinforcement Learning;Texas Hold'em", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/98566200990017f081b3b26726c2443862776056.pdf", "author": "Boning Li;Zhixuan Fang;Longbo Huang", "authorids": "~Boning_Li3;~Zhixuan_Fang1;~Longbo_Huang2", "gender": "M;M;M", "homepage": "https://lbn187.github.io/;https://people.iiis.tsinghua.edu.cn/~fang/;http://people.iiis.tsinghua.edu.cn/~huang/", "dblp": ";179/2243;79/7077", "google_scholar": ";0N4s3CAAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Boning_Li3;~Zhixuan_Fang1;~Longbo_Huang2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nli2024optimal,\ntitle={Optimal Action Abstraction for Imperfect Information Extensive-Form Games},\nauthor={Boning Li and Zhixuan Fang and Longbo Huang},\nyear={2024},\nurl={https://openreview.net/forum?id=7J0NsFXnFd}\n}", "github": "", "project": "", "reviewers": "YRS4;Y7tW;BWf4;3LiM", "site": "https://openreview.net/forum?id=7J0NsFXnFd", "pdf_size": 644861, "rating": "3;6;6;6", "confidence": "4;4;3;4", "soundness": "2;3;2;2", "contribution": "2;3;2;3", "presentation": "1;3;2;2", "wc_summary": "112;75;64;247", "wc_strengths": "49;88;110;119", "wc_weaknesses": "412;221;240;1434", "wc_questions": "187;68;124;1080", "wc_review": "760;452;538;2880", "wc_reply_reviewers": "238;14;0;426", "wc_reply_authors": "1272;680;912;4144", "reply_reviewers": "1;1;0;1", "reply_authors": "4;2;2;8", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 124.5, 72.92633269265636 ], "wc_strengths_avg": [ 91.5, 27.004629232781554 ], "wc_weaknesses_avg": [ 576.75, 500.49444302609396 ], "wc_questions_avg": [ 364.75, 415.08997518610346 ], "wc_review_avg": [ 1157.5, 1000.8150428525743 ], "wc_reply_reviewers_avg": [ 169.5, 175.63812228556762 ], "wc_reply_authors_avg": [ 1752.0, 1397.036864223704 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 2.449489742783178 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GQzfbIjeHbUJ:scholar.google.com/&scioq=Optimal+Action+Abstraction+for+Imperfect+Information+Extensive-Form+Games&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "7JRbs3i9Ei", "title": "Machine Learning for PROTAC Engineering", "track": "main", "status": "Reject", "tldr": "", "abstract": "PROTACs are a promising therapeutic technology that harnesses the cell's built-in degradation processes to degrade specific proteins. Despite their potential, developing new PROTAC molecules is challenging and requires significant expertise, time, and cost. Meanwhile, machine learning has transformed various scientific fields, including drug development. In this work, we present a strategy for curating open-source PROTAC data and propose an open-source toolkit for predicting the degradation effectiveness, i.e., activity, of novel PROTAC molecules. We organized the curated data into 16 different datasets ready to be processed by machine learning models. The datasets incorporate important features such as $pDC_{50}$, $D_{max}$, E3 ligase type, POI amino acid sequence, and experimental cell type. Our toolkit includes a configurable PyTorch dataset class tailored to process PROTAC features, a customizable machine learning model for processing various PROTAC features, and a hyperparameter optimization mechanism powered by Optuna. To evaluate the system, three surrogate models were developed utilizing different PROTAC representations. Using our automatically-curated public datasets, the best models achieved a 71.4% validation accuracy and a 0.73 ROC-AUC validation score. This is not only comparable to state-of-the-art models for protein degradation prediction, but also open-source, easily-reproducible, and less computationally complex than existing approaches.", "keywords": "Deep learning;Chemoinformatics;PROTAC;Drug design.", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Stefano Ribes;Eva Nittinger;Christian Tyrchan;Roc\u00edo Mercado", "authorids": "~Stefano_Ribes1;eva.nittinger@astrazeneca.com;christian.tyrchan@astrazeneca.com;~Roc\u00edo_Mercado1", "gender": "M;;;F", "homepage": ";;;https://rociomer.github.io/", "dblp": ";;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;v2P0-IoAAAAJ", "orcid": ";;;0000-0002-6170-6088", "linkedin": "stefano-ribes-1379b1107/;;;rociomer/", "or_profile": "~Stefano_Ribes1;eva.nittinger@astrazeneca.com;christian.tyrchan@astrazeneca.com;~Roc\u00edo_Mercado1", "aff": ";;;Chalmers University of Technology", "aff_domain": ";;;chalmers.se", "position": ";;;Assistant Professor", "bibtex": "@misc{\nribes2024machine,\ntitle={Machine Learning for {PROTAC} Engineering},\nauthor={Stefano Ribes and Eva Nittinger and Christian Tyrchan and Roc{\\'\\i}o Mercado},\nyear={2024},\nurl={https://openreview.net/forum?id=7JRbs3i9Ei}\n}", "github": "", "project": "", "reviewers": "A2WQ;qQeP;BNfR", "site": "https://openreview.net/forum?id=7JRbs3i9Ei", "pdf_size": 798945, "rating": "3;5;5", "confidence": "4;3;4", "soundness": "1;2;3", "contribution": "1;2;2", "presentation": "2;2;2", "wc_summary": "193;33;99", "wc_strengths": "210;43;213", "wc_weaknesses": "222;19;110", "wc_questions": "48;369;66", "wc_review": "673;464;488", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 108.33333333333333, 65.65228268858762 ], "wc_strengths_avg": [ 155.33333333333334, 79.44110327084391 ], "wc_weaknesses_avg": [ 117.0, 83.02208541506691 ], "wc_questions_avg": [ 161.0, 147.26167186338745 ], "wc_review_avg": [ 541.6666666666666, 93.3821301011185 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MDX2luF9r44J:scholar.google.com/&scioq=Machine+Learning+for+PROTAC+Engineering&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Chalmers University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.chalmers.se", "aff_unique_abbr": "Chalmers", "aff_country_unique_index": "0", "aff_country_unique": "Sweden" }, { "id": "7JU8TwFXGC", "title": "LLM Performance Predictors are good initializers for Architecture Search", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) have become an integral component in solving a wide range of NLP tasks. In this work, we explore a novel use case of using LLMs to build performance predictors (PP): models that, given a specific deep neural network architecture, predict its performance on a downstream task. We design PP prompts for LLMs consisting of: (i) role: description of the role assigned to the LLM, (ii) instructions: set of instructions to be followed by the LLM to carry out performance prediction, (iii) hyperparameters: a definition of each architecture-specific hyperparameter and (iv) demonstrations: sample architectures along with their efficiency metrics and 'training from scratch' performance. For machine translation (MT) tasks, we discover that GPT-4 with our PP prompts (LLM-PP) can predict the performance of architecture with a mean absolute error matching the SOTA and a marginal degradation in rank correlation coefficient compared to SOTA performance predictors. Further, we show that the predictions from LLM-PP can be distilled to a small regression model (LLM-Distill-PP). LLM-Distill-PP models surprisingly retain the performance of LLM-PP largely and can be a cost-effective alternative for heavy use cases of performance estimation. Specifically, for neural architecture search (NAS), we propose a Hybrid-Search algorithm for NAS (HS-NAS), which uses LLM-Distill-PP for the initial part of search, resorting to the baseline predictor for rest of the search. We show that HS-NAS performs very similar to SOTA NAS across benchmarks, reduces search hours by \u223c50%, and in some cases, improves latency, GFLOPs, and model size.", "keywords": "Large language models;Neural architecture search;Performance Predictor;Machine Translation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Ganesh Jawahar;Muhammad Abdul-Mageed;Laks V. S. Lakshmanan;Dujian Ding", "authorids": "~Ganesh_Jawahar1;~Muhammad_Abdul-Mageed2;~Laks_V._S._Lakshmanan1;~Dujian_Ding1", "gender": "M;;;", "homepage": "https://ganeshjawahar.github.io/;;https://www.cs.ubc.ca/~laks;", "dblp": "203/9710;;l/LVSLakshmanan;244/8792", "google_scholar": "https://scholar.google.co.in/citations?user=X7SMP1EAAAAJ;;https://scholar.google.ca/citations?user=_RCsaOsAAAAJ;https://scholar.google.ca/citations?user=1-FsZPQAAAAJ", "orcid": ";;0000-0002-9775-4241;", "linkedin": "https://in.linkedin.com/in/ganesh-jawahar-ab928435;;laksvslakshmanan/;dujian-ding-250123133/", "or_profile": "~Ganesh_Jawahar1;~Muhammad_Abdul-Mageed2;~Laks_V._S._Lakshmanan1;~Dujian_Ding1", "aff": "University of British Columbia;;University of British Columbia;Computing Science, University of British Columbia", "aff_domain": "ubc.ca;;ubc.ca;cs.ubc.ca", "position": "PhD student;;Professor;PhD student", "bibtex": "@misc{\njawahar2024llm,\ntitle={{LLM} Performance Predictors are good initializers for Architecture Search},\nauthor={Ganesh Jawahar and Muhammad Abdul-Mageed and Laks V. S. Lakshmanan and Dujian Ding},\nyear={2024},\nurl={https://openreview.net/forum?id=7JU8TwFXGC}\n}", "github": "", "project": "", "reviewers": "mogH;oJTh;ABPv;GzhX", "site": "https://openreview.net/forum?id=7JU8TwFXGC", "pdf_size": 536989, "rating": "3;5;6;6", "confidence": "4;4;4;3", "soundness": "1;2;3;3", "contribution": "1;3;3;3", "presentation": "3;2;3;1", "wc_summary": "43;84;58;68", "wc_strengths": "28;83;185;41", "wc_weaknesses": "206;326;102;95", "wc_questions": "4;28;85;50", "wc_review": "281;521;430;254", "wc_reply_reviewers": "165;240;44;0", "wc_reply_authors": "1285;1546;801;549", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 63.25, 14.922717580923388 ], "wc_strengths_avg": [ 84.25, 61.617266248998746 ], "wc_weaknesses_avg": [ 182.25, 93.91585329431874 ], "wc_questions_avg": [ 41.75, 29.80247472945829 ], "wc_review_avg": [ 371.5, 109.28060212132802 ], "wc_reply_reviewers_avg": [ 112.25, 95.34247479481535 ], "wc_reply_authors_avg": [ 1045.25, 391.83949201171646 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=712574979734589959&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Vancouver", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "7Jer2DQt9V", "title": "The Unreasonable Effectiveness of Pretraining in Graph OOD", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph neural networks have shown significant progress in various tasks, yet their ability to generalize in out-of-distribution (OOD) scenarios remains an open question. In this study, we conduct a comprehensive benchmarking of the efficacy of graph pre-trained models in the context of OOD challenges, named as PODGenGraph. We conduct extensive experiments across diverse datasets, spanning general and molecular graph domains and encompassing different graph sizes. Our benchmark is framed around distinct distribution shifts, including both concept and covariate shifts, whilst also varying the degree of shift. Our findings are striking: even basic pre-trained models exhibit performance that is not only comparable to, but often surpasses, specifically designed to handle distribution shift. We further investigate the results, examining the influence of the key factors (e.g., sample size, learning rates, in-distribution performance etc) of pre-trained models for OOD generalization. In general, our work shows that pre-training could be a flexible and simple approach to OOD generalization in graph learning. Leveraging pre-trained models together for graph OOD generalization in real-world applications stands as a promising avenue for future research.", "keywords": "Graph pre-training;Graph out of distribution", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/f6031b9f3f6b1d563bf73f57f72d57225c61d6b9.pdf", "author": "Qi Liu;Rosa H. M. Chan;Rose Yu", "authorids": "~Qi_Liu8;~Rosa_H._M._Chan1;~Rose_Yu1", "gender": ";F;F", "homepage": "https://www.qi-liu.com/;https://www.ee.cityu.edu.hk/~rosachan;http://roseyu.com", "dblp": ";86/7576;164/7314", "google_scholar": ";n71R2h8AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Qi_Liu8;~Rosa_H._M._Chan1;~Rose_Yu1", "aff": "City University of Hong Kong;City University of Hong Kong;University of California, San Diego", "aff_domain": "cityu.edu.hk;cityu.edu.hk;ucsd.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\nliu2024the,\ntitle={The Unreasonable Effectiveness of Pretraining in Graph {OOD}},\nauthor={Qi Liu and Rosa H. M. Chan and Rose Yu},\nyear={2024},\nurl={https://openreview.net/forum?id=7Jer2DQt9V}\n}", "github": "", "project": "", "reviewers": "RvDk;xDkS;qBm3;tP6E", "site": "https://openreview.net/forum?id=7Jer2DQt9V", "pdf_size": 1404820, "rating": "3;5;5;5", "confidence": "4;4;4;4", "soundness": "2;3;2;3", "contribution": "2;2;2;3", "presentation": "3;2;3;2", "wc_summary": "58;73;80;84", "wc_strengths": "49;27;56;134", "wc_weaknesses": "156;244;295;220", "wc_questions": "12;2;43;70", "wc_review": "275;346;474;508", "wc_reply_reviewers": "73;48;0;0", "wc_reply_authors": "718;504;924;576", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;2;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 73.75, 9.908960591303208 ], "wc_strengths_avg": [ 66.5, 40.413487847499624 ], "wc_weaknesses_avg": [ 228.75, 49.97686964986903 ], "wc_questions_avg": [ 31.75, 26.76167969317322 ], "wc_review_avg": [ 400.75, 94.44409722158395 ], "wc_reply_reviewers_avg": [ 30.25, 31.514877439076294 ], "wc_reply_authors_avg": [ 680.5, 160.28958169513078 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WzMw4lJiXFIJ:scholar.google.com/&scioq=The+Unreasonable+Effectiveness+of+Pretraining+in+Graph+OOD&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "City University of Hong Kong;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;https://www.ucsd.edu", "aff_unique_abbr": "CityU;UCSD", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Hong Kong SAR;San Diego", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "STREAM: Spatio-TempoRal Evaluation and Analysis Metric for Video Generative Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19367", "id": "7JfKCZQPxJ", "author_site": "Pum Jun Kim, Seojun Kim, Jaejun Yoo", "tldr": "", "abstract": "Image generative models have made significant progress in generating realistic and diverse images, supported by comprehensive guidance from various evaluation metrics. However, current video generative models struggle to generate even\nshort video clips, with limited tools that provide insights for improvements. Current video evaluation metrics are simple adaptations of image metrics by switching the embeddings with video embedding networks, which may underestimate the unique characteristics of video. Our analysis reveals that the widely used Frechet Video Distance (FVD) has a stronger emphasis on the spatial aspect than the temporal naturalness of video and is inherently constrained by the input size of the embedding networks used, limiting it to 16 frames. Additionally, it demonstrates considerable instability and diverges from human evaluations. To address the limitations, we propose STREAM, a new video evaluation metric uniquely designed to independently evaluate spatial and temporal aspects. This feature allows comprehensive analysis and evaluation of video generative models from various perspectives, unconstrained by video length. We provide analytical and experimental evidence demonstrating that STREAM provides an effective evaluation tool for both visual and temporal quality of videos, offering insights into area of improvement for video generative models. To the best of our knowledge, STREAM is the first evaluation metric that can separately assess the temporal and spatial aspects of videos. Our code is available at https://github.com/pro2nit/STREAM.", "keywords": "Generative Models;Video Generative Models;Evaluation;Fidelity;Diversity;Assessment", "primary_area": "generative models", "supplementary_material": "", "author": "Pum Jun Kim;Seojun Kim;Jaejun Yoo", "authorids": "~Pum_Jun_Kim1;~Seojun_Kim1;~Jaejun_Yoo1", "gender": "M;M;M", "homepage": ";;", "dblp": "349/4625;;141/8878-1", "google_scholar": "WGJgXskAAAAJ;;https://scholar.google.co.kr/citations?user=7NBlQw4AAAAJ", "orcid": "0000-0001-8220-0951;;0000-0001-5252-9668", "linkedin": ";\uc11c\uc900-\uae40-6b74a7292/;jaejunyoo/", "or_profile": "~Pum_Jun_Kim1;~Seojun_Kim1;~Jaejun_Yoo1", "aff": "Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology", "aff_domain": "unist.ac.kr;unist.ac.kr;unist.ac.kr", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nkim2024stream,\ntitle={{STREAM}: Spatio-TempoRal Evaluation and Analysis Metric for Video Generative Models},\nauthor={Pum Jun Kim and Seojun Kim and Jaejun Yoo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7JfKCZQPxJ}\n}", "github": "", "project": "", "reviewers": "rxEA;srFK;1pjF;QwpP", "pdf_size": 5353516, "rating": "3;6;6;6", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "contribution": "2;2;3;3", "presentation": "3;3;2;2", "wc_summary": "41;94;94;48", "wc_strengths": "32;270;63;61", "wc_weaknesses": "43;351;158;75", "wc_questions": "185;95;2;41", "wc_review": "301;810;317;225", "wc_reply_reviewers": "0;129;41;0", "wc_reply_authors": "1191;659;528;648", "reply_reviewers": "0;1;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.25, 24.873429598670143 ], "wc_strengths_avg": [ 106.5, 95.19059827524985 ], "wc_weaknesses_avg": [ 156.75, 119.74634649959054 ], "wc_questions_avg": [ 80.75, 68.65265836076561 ], "wc_review_avg": [ 413.25, 231.68553580230252 ], "wc_reply_reviewers_avg": [ 42.5, 52.671149598238316 ], "wc_reply_authors_avg": [ 756.5, 256.06688579353636 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17515982151271475644&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=7JfKCZQPxJ", "pdf": "https://openreview.net/pdf?id=7JfKCZQPxJ", "email": "unist.ac.kr;unist.ac.kr;unist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ulsan National Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.unist.ac.kr", "aff_unique_abbr": "UNIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "7JigPd5Pm5", "title": "Informed weight initialization of Graph Neural Networks and its effect on Oversmoothing", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this work, we generalize the ideas of Kaiming initialization to Graph Neural Networks (GNNs) and propose a new initialization scheme that addresses the problem of oversmoothing. GNNs are typically initialized using methods, that have been designed for other types of Neural Networks, such as Xavier or Kaiming initialization. Such methods ignore the underlying topology of the graph. In this work, propose a new initialization method, called G-Init, which takes into account (a) the variance of signals flowing forward, (b) the gradients flowing backward\nin the network, and (c) the effect of graph convolution, which tends to smooth node representations and lead to the problem of oversmoothing. Oversmoothing is an inherent problem of GNNs, which appears when their depth increases, making node representations indistinguishable. We show that in deep GNNs, G-Init reduces oversmoothing and enables deep architectures. We also verify the theoretical results experimentally.", "keywords": "Graph Neural Networks;Weight initialization;Oversmoothing", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Dimitrios Kelesis;Dimitris Fotakis;Georgios Paliouras", "authorids": "~Dimitrios_Kelesis1;~Dimitris_Fotakis1;~Georgios_Paliouras1", "gender": ";M;M", "homepage": ";http://www.softlab.ntua.gr/~fotakis/;https://users.iit.demokritos.gr/~paliourg", "dblp": "309/5763;95/4731;55/2039", "google_scholar": ";zFDLf0UAAAAJ;-pec7wIAAAAJ", "orcid": ";0000-0001-6864-8960;0000-0001-9629-2367", "linkedin": "dimitrios-kelesis-b614451b5/;;georgios-paliouras-a203a79/", "or_profile": "~Dimitrios_Kelesis1;~Dimitris_Fotakis1;~Georgios_Paliouras1", "aff": "National Centre For Scientific Research Demokritos;National Technical University of Athens;NCSR \u201cDemokritos\u201d", "aff_domain": "iit.demokritos.gr;ntua.gr;demokritos.gr", "position": "Researcher;Full Professor;Researcher", "bibtex": "@misc{\nkelesis2024informed,\ntitle={Informed weight initialization of Graph Neural Networks and its effect on Oversmoothing},\nauthor={Dimitrios Kelesis and Dimitris Fotakis and Georgios Paliouras},\nyear={2024},\nurl={https://openreview.net/forum?id=7JigPd5Pm5}\n}", "github": "", "project": "", "reviewers": "8Vyh;fihQ;kvnA;FoM5", "site": "https://openreview.net/forum?id=7JigPd5Pm5", "pdf_size": 240711, "rating": "1;3;3;3", "confidence": "4;4;3;4", "soundness": "1;1;2;3", "contribution": "2;1;2;1", "presentation": "1;2;2;1", "wc_summary": "54;356;86;51", "wc_strengths": "20;27;24;24", "wc_weaknesses": "255;574;4;108", "wc_questions": "44;3;326;1", "wc_review": "373;960;440;184", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 136.75, 127.32512517174291 ], "wc_strengths_avg": [ 23.75, 2.48746859276655 ], "wc_weaknesses_avg": [ 235.25, 214.94810420192127 ], "wc_questions_avg": [ 93.5, 135.32645713237304 ], "wc_review_avg": [ 489.25, 287.54249685915994 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KdTdcnEWWEMJ:scholar.google.com/&scioq=Informed+weight+initialization+of+Graph+Neural+Networks+and+its+effect+on+Oversmoothing&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "National Centre for Scientific Research 'Demokritos';National Technical University of Athens;National Centre for Scientific Research \u201cDemokritos\u201d", "aff_unique_dep": ";;", "aff_unique_url": "https://www.demokritos.gr;https://www.ntua.gr;https://www.demokritos.gr", "aff_unique_abbr": "NCSR Demokritos;NTUA;NCSR Demokritos", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Greece" }, { "title": "AutoDAN: Generating Stealthy Jailbreak Prompts on Aligned Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19366", "id": "7Jwpw4qKkb", "author_site": "Xiaogeng Liu, Nan Xu, Muhao Chen, Chaowei Xiao", "tldr": "", "abstract": "The aligned Large Language Models (LLMs) are powerful language understanding and decision-making tools that are created through extensive alignment with human feedback. However, these large models remain susceptible to jailbreak attacks, where adversaries manipulate prompts to elicit malicious outputs that should not be given by aligned LLMs. Investigating jailbreak prompts can lead us to delve into the limitations of LLMs and further guide us to secure them. Unfortunately, existing jailbreak techniques suffer from either (1) scalability issues, where attacks heavily rely on manual crafting of prompts, or (2) stealthiness problems, as attacks depend on token-based algorithms to generate prompts that are often semantically meaningless, making them susceptible to detection through basic perplexity testing. In light of these challenges, we intend to answer this question: Can we develop an approach that can automatically generate stealthy jailbreak prompts? In this paper, we introduce AutoDAN, a novel jailbreak attack against aligned LLMs. AutoDAN can automatically generate stealthy jailbreak prompts by the carefully designed hierarchical genetic algorithm. Extensive evaluations demonstrate that AutoDAN not only automates the process while preserving semantic meaningfulness, but also demonstrates superior attack strength in cross-model transferability, and cross-sample universality compared with the baseline. Moreover, we also compare AutoDAN with perplexity-based defense methods and show that AutoDAN can bypass them effectively. Code is available at https://github.com/SheltonLiu-N/AutoDAN.", "keywords": "Large Language Models;Jailbreak Attack;Adversarial Attack", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Xiaogeng Liu;Nan Xu;Muhao Chen;Chaowei Xiao", "authorids": "~Xiaogeng_Liu1;~Nan_Xu2;~Muhao_Chen1;~Chaowei_Xiao2", "gender": "M;F;M;M", "homepage": ";https://sites.google.com/site/xunannancy;https://muhaochen.github.io/;https://xiaocw11.github.io/", "dblp": "304/1538;;173/2608;150/3317", "google_scholar": "Gvs5nz8AAAAJ;https://scholar.google.co.uk/citations?hl=en;k79yEZkAAAAJ;Juoqtj8AAAAJ", "orcid": ";;0000-0003-0118-3147;0000-0002-7043-4926", "linkedin": ";https://linkedin.com/in/nan-xu-b52777125;;", "or_profile": "~Xiaogeng_Liu1;~Nan_Xu2;~Muhao_Chen1;~chaowei_xiao1", "aff": "University of Wisconsin - Madison;University of Southern California;University of Southern California;NVIDIA", "aff_domain": "wisc.edu;usc.edu;usc.edu;nvidia.com", "position": "PhD student;PhD student;Adjunct Professor;Researcher", "bibtex": "@inproceedings{\nliu2024autodan,\ntitle={Auto{DAN}: Generating Stealthy Jailbreak Prompts on Aligned Large Language Models},\nauthor={Xiaogeng Liu and Nan Xu and Muhao Chen and Chaowei Xiao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7Jwpw4qKkb}\n}", "github": "", "project": "", "reviewers": "6bTy;UziC;QEyR;GPCZ", "pdf_size": 514653, "rating": "6;6;8;8", "confidence": "4;4;4;3", "soundness": "2;3;3;2", "contribution": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "45;78;19;71", "wc_strengths": "33;48;42;53", "wc_weaknesses": "130;311;51;101", "wc_questions": "141;6;5;84", "wc_review": "349;443;117;309", "wc_reply_reviewers": "69;0;0;23", "wc_reply_authors": "1048;1606;105;1835", "reply_reviewers": "1;0;0;1", "reply_authors": "6;5;1;9", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 53.25, 23.284920012746447 ], "wc_strengths_avg": [ 44.0, 7.44983221287567 ], "wc_weaknesses_avg": [ 148.25, 98.12078016404068 ], "wc_questions_avg": [ 59.0, 57.17079674099356 ], "wc_review_avg": [ 304.5, 118.6791894141513 ], "wc_reply_reviewers_avg": [ 23.0, 28.16913204200655 ], "wc_reply_authors_avg": [ 1148.5, 667.0046851409666 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 5.25, 2.8613807855648994 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 508, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6354376323759784601&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=7Jwpw4qKkb", "pdf": "https://openreview.net/pdf?id=7Jwpw4qKkb", "email": "wisc.edu;usc.edu;usc.edu;nvidia.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Wisconsin-Madison;University of Southern California;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.wisc.edu;https://www.usc.edu;https://www.nvidia.com", "aff_unique_abbr": "UW-Madison;USC;NVIDIA", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Madison;Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Foundation Model for Error Correction Codes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19365", "id": "7KDuQPrAF3", "author_site": "Yoni Choukroun, Lior Wolf", "tldr": "", "abstract": "In recent years, Artificial Intelligence has undergone a paradigm shift with the rise of foundation models, which are trained on large amounts of data, typically in a self-supervised way, and can then be adapted to a wide range of downstream tasks. In this work, we propose the first foundation model for Error Correction Codes. This model is trained on multiple codes and can then be applied to an unseen code. To enable this, we extend the Transformer architecture in multiple ways: (1) a code-invariant initial embedding, which is also position- and length-invariant, (2) a learned modulation of the attention maps that is conditioned on the Tanner graph, and (3) a length-invariant code-aware noise prediction module that is based on the parity-check matrix. The proposed architecture is trained on multiple short- and medium-length codes and is able to generalize to unseen codes. Its performance on these codes matches and even outperforms the state of the art, despite having a smaller capacity than the leading code-specific transformers. The suggested framework therefore demonstrates, for the first time, the benefits of learning a universal decoder rather than a neural decoder optimized for a given code.", "keywords": "Error Correction Codes;Foundation Model", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Yoni Choukroun;Lior Wolf", "authorids": "~Yoni_Choukroun1;~Lior_Wolf1", "gender": "M;M", "homepage": "https://yonilc.github.io/;http://www.cs.tau.ac.il/~wolf", "dblp": "186/8305;83/4103", "google_scholar": "https://scholar.google.co.il/citations?user=gjo4ebcAAAAJ;UbFrXTsAAAAJ", "orcid": ";0000-0001-5578-8892", "linkedin": ";", "or_profile": "~Yoni_Choukroun1;~Lior_Wolf1", "aff": "Huawei Technologies Ltd.;Tel Aviv University", "aff_domain": "huawei.com;tau.ac.il", "position": "Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nchoukroun2024a,\ntitle={A Foundation Model for Error Correction Codes},\nauthor={Yoni Choukroun and Lior Wolf},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7KDuQPrAF3}\n}", "github": "", "project": "", "reviewers": "w8i1;dPJy;jdxb;1rnF", "pdf_size": 1667896, "rating": "3;6;8;8", "confidence": "4;3;4;5", "soundness": "3;3;4;4", "contribution": "2;3;3;4", "presentation": "3;2;3;4", "wc_summary": "121;92;105;64", "wc_strengths": "53;69;179;21", "wc_weaknesses": "346;144;154;13", "wc_questions": "119;36;1;13", "wc_review": "639;341;439;111", "wc_reply_reviewers": "605;231;0;0", "wc_reply_authors": "2180;1054;254;86", "reply_reviewers": "2;2;0;0", "reply_authors": "5;3;1;1", "rating_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 95.5, 20.886598574205422 ], "wc_strengths_avg": [ 80.5, 59.43694137487224 ], "wc_weaknesses_avg": [ 164.25, 118.7694720877381 ], "wc_questions_avg": [ 42.25, 46.06177916668005 ], "wc_review_avg": [ 382.5, 190.01249958884284 ], "wc_reply_reviewers_avg": [ 209.0, 247.31659871508828 ], "wc_reply_authors_avg": [ 893.5, 827.9279859987823 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.34554737023254406, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15745966124655371585&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=7KDuQPrAF3", "pdf": "https://openreview.net/pdf?id=7KDuQPrAF3", "email": "huawei.com;tau.ac.il", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Huawei;Tel Aviv University", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.tau.ac.il", "aff_unique_abbr": "Huawei;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Israel" }, { "id": "7LZjuA4AB2", "title": "Ask Your Distribution Shift if Pre-Training is Right for You", "track": "main", "status": "Reject", "tldr": "", "abstract": "Pre-training is a widely used approach to develop models that are robust to distribution shifts. However, in practice, its effectiveness varies: fine-tuning a pre-trained model improves robustness significantly in some cases but *not at all* in others (compared to training from scratch). In this work, we seek to characterize the failure modes that pre-training *can* and *cannot* address. In particular, we focus on two possible failure modes of models under distribution shift: poor extrapolation (e.g., they cannot generalize to a different domain) and biases in the training data (e.g., they rely on spurious features). Our study suggests that, as a rule of thumb, pre-training can help mitigate poor extrapolation but not dataset biases. After providing theoretical motivation and empirical evidence for this finding, we explore two of its implications for developing robust models: (1) pre-training and interventions designed to prevent exploiting biases have complementary robustness benefits, and (2) fine-tuning on a (very) small, non-diverse but *de-biased* dataset can result in significantly more robust models than fine-tuning on a large and diverse but biased dataset.", "keywords": "robustness;distribution shift;transfer learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Benjamin Cohen-Wang;Joshua Vendrow;Aleksander Madry", "authorids": "~Benjamin_Cohen-Wang1;~Joshua_Vendrow2;~Aleksander_Madry1", "gender": "M;M;M", "homepage": "https://bencw99.github.io;https://people.csail.mit.edu/madry/;http://www.joshvendrow.com", "dblp": ";67/2454;274/7218", "google_scholar": "QwJR7jEAAAAJ;SupjsEUAAAAJ;zQjuF5wAAAAJ", "orcid": ";;0000-0002-1041-5782", "linkedin": ";;joshua-vendrow/", "or_profile": "~Benjamin_Cohen-Wang1;~Aleksander_Madry1;~Joshua_Vendrow1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "PhD student;Professor;PhD student", "bibtex": "@misc{\ncohen-wang2024ask,\ntitle={Ask Your Distribution Shift if Pre-Training is Right for You},\nauthor={Benjamin Cohen-Wang and Joshua Vendrow and Aleksander Madry},\nyear={2024},\nurl={https://openreview.net/forum?id=7LZjuA4AB2}\n}", "github": "", "project": "", "reviewers": "Uetj;f9zp;cFpN;kid6", "site": "https://openreview.net/forum?id=7LZjuA4AB2", "pdf_size": 11272932, "rating": "3;3;3;3", "confidence": "4;4;3;4", "soundness": "3;1;2;1", "contribution": "1;2;2;2", "presentation": "2;2;2;2", "wc_summary": "126;80;68;135", "wc_strengths": "18;51;70;125", "wc_weaknesses": "241;566;358;431", "wc_questions": "84;32;24;53", "wc_review": "469;729;520;744", "wc_reply_reviewers": "0;139;56;294", "wc_reply_authors": "464;539;355;933", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 102.25, 28.74347752099596 ], "wc_strengths_avg": [ 66.0, 38.8136573901507 ], "wc_weaknesses_avg": [ 399.0, 117.85372289410293 ], "wc_questions_avg": [ 48.25, 23.19886850689059 ], "wc_review_avg": [ 615.5, 122.45101061240777 ], "wc_reply_reviewers_avg": [ 122.25, 110.80698308319742 ], "wc_reply_authors_avg": [ 572.75, 218.03712413256602 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10406340723698108579&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Online Continual Learning for Interactive Instruction Following Agents", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19364", "id": "7M0EzjugaN", "author_site": "Byeonghwi Kim, Minhyuk Seo, Jonghyun Choi", "tldr": "", "abstract": "In learning an embodied agent executing daily tasks via language directives, the literature largely assumes that the agent learns all training data at the beginning. We argue that such a learning scenario is less realistic, since a robotic agent is supposed to learn the world continuously as it explores and perceives it. To take a step towards a more realistic embodied agent learning scenario, we propose two continual learning setups for embodied agents; learning new behaviors (Behavior Incremental Learning, Behavior-IL) and new environments (Environment Incremental Learning, Environment-IL) For the tasks, previous \u2018data prior\u2019 based continual learning methods maintain logits for the past tasks. However, the stored information is often insufficiently learned information and requires task boundary information, which might not always be available. Here, we propose to update them based on confidence scores without task boundary information (i.e., task-free) in a moving average fashion, named Confidence-Aware Moving Average (CAMA). In the proposed challenging Behavior-IL and Environment-IL setups, our simple CAMA outperforms prior arts in our empirical validations by noticeable margins.", "keywords": "Embodied AI;Continual Learning", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "", "author": "Byeonghwi Kim;Minhyuk Seo;Jonghyun Choi", "authorids": "~Byeonghwi_Kim1;~Minhyuk_Seo1;~Jonghyun_Choi1", "gender": "M;M;M", "homepage": "https://bhkim94.github.io/;https://dbd05088.github.io/;https://ppolon.github.io/", "dblp": "280/2943;350/4104;21/11103", "google_scholar": "Sr9hbXYAAAAJ;ayDPR-gAAAAJ;uiGWnm4AAAAJ", "orcid": "0000-0003-3775-2778;;0000-0002-7934-8434", "linkedin": "byeonghwi-kim-821909167;minhyuk-seo-59ba11247/;jonghyun-choi-459bb615/", "or_profile": "~Byeonghwi_Kim1;~Minhyuk_Seo1;~Jonghyun_Choi1", "aff": "Seoul National University;Yonsei University;Yonsei University", "aff_domain": "snu.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "position": "PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nkim2024online,\ntitle={Online Continual Learning for Interactive Instruction Following Agents},\nauthor={Byeonghwi Kim and Minhyuk Seo and Jonghyun Choi},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7M0EzjugaN}\n}", "github": "", "project": "", "reviewers": "q27B;z9k5;UTbC;oYjZ", "pdf_size": 7496007, "rating": "6;6;6;6", "confidence": "2;4;4;3", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "56;129;87;110", "wc_strengths": "48;78;65;84", "wc_weaknesses": "75;260;187;100", "wc_questions": "77;152;107;48", "wc_review": "256;619;446;342", "wc_reply_reviewers": "16;118;97;0", "wc_reply_authors": "1225;1866;1929;722", "reply_reviewers": "1;1;1;0", "reply_authors": "2;4;5;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.5, 27.225906780123964 ], "wc_strengths_avg": [ 68.75, 13.808964479641476 ], "wc_weaknesses_avg": [ 155.5, 73.26834241335067 ], "wc_questions_avg": [ 96.0, 38.47726601514198 ], "wc_review_avg": [ 415.75, 135.26340044520543 ], "wc_reply_reviewers_avg": [ 57.75, 50.61805507919087 ], "wc_reply_authors_avg": [ 1435.5, 495.54641558586616 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5344051114832059180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7M0EzjugaN", "pdf": "https://openreview.net/pdf?id=7M0EzjugaN", "email": "snu.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Seoul National University;Yonsei University", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.yonsei.ac.kr", "aff_unique_abbr": "SNU;Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "7Mq096hr9Y", "title": "OpenMixup: A Comprehensive Mixup Benchmark for Visual Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Data mixing, or mixup, is a data-dependent augmentation technique that has greatly enhanced the generalizability of modern deep neural networks. However, a full grasp of mixup methodology necessitates a top-down hierarchical understanding from systematic impartial evaluations and empirical analysis, both of which are currently lacking within the community. In this paper, we present OpenMixup, the first comprehensive mixup benchmarking study for supervised visual classification. OpenMixup offers a unified mixup-based model design and training framework, encompassing a wide collection of data mixing algorithms, a diverse range of widely-used backbones and modules, and a set of model analysis toolkits. To ensure fair and complete comparisons, large-scale standard evaluations of various mixup baselines are conducted across 12 diversified image datasets with meticulous confounders tweaking powered by our modular and extensible codebase framework. Interesting observations and insights are derived through detailed empirical analysis of how mixup policies, network architectures, and dataset properties affect the mixup visual classification performance. We hope that OpenMixup can bolster the reproducibility of previously gained insights and facilitate a better understanding of mixup properties, thereby giving the community a kick-start for the development and evaluation of new mixup methods. The source code is publicly available.", "keywords": "Data Augmentation;Benchmark;Image Classification;Mixup;Supervised Learning", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/56234f3efb471cda180f65d29300100a283c25ac.zip", "author": "Siyuan Li;Zedong Wang;Zicheng Liu;Di Wu;Cheng Tan;Weiyang Jin;Stan Z. Li", "authorids": "~Siyuan_Li6;~Zedong_Wang1;~Zicheng_Liu2;~Di_Wu10;~Cheng_Tan1;~Weiyang_Jin1;~Stan_Z._Li2", "gender": "M;M;M;M;M;M;M", "homepage": "https://lupin1998.github.io/;https://jacky1128.github.io;;;https://chengtan9907.github.io/;;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "63/9705-2;179/8811.html;l/ZichengLiu-6;;70/1533-12.html;344/6132;l/StanZLi", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;6kTV6aMAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0001-6806-2468;0009-0000-0112-0491;;;;0000-0001-5351-1400;", "linkedin": "https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;;;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Siyuan_Li6;~Zedong_Wang1;~Zicheng_Liu2;~Di_Wu10;~Cheng_Tan1;~Weiyang_Jin1;~Stan_Z._Li1", "aff": "Alibaba Group;Westlake University;Zhejiang University;Westlake University;Zhejiang University & Westlake University;Beijing Jiaotong University;Westlake University", "aff_domain": "alibaba-inc.com;westlake.edu;zju.edu.cn;westlake.edu.cn;westlake.edu.cn;bjtu.edu.cn;westlake.edu.cn", "position": "Intern;Intern;PhD student;PhD student;PhD student;Undergrad student;Chair Professor", "bibtex": "@misc{\nli2024openmixup,\ntitle={OpenMixup: A Comprehensive Mixup Benchmark for Visual Classification},\nauthor={Siyuan Li and Zedong Wang and Zicheng Liu and Di Wu and Cheng Tan and Weiyang Jin and Stan Z. Li},\nyear={2024},\nurl={https://openreview.net/forum?id=7Mq096hr9Y}\n}", "github": "", "project": "", "reviewers": "nCz4;x5Ek;kQcu;rFDa", "site": "https://openreview.net/forum?id=7Mq096hr9Y", "pdf_size": 5269571, "rating": "5;5;5;6", "confidence": "4;5;5;4", "soundness": "3;2;3;3", "contribution": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "74;43;31;65", "wc_strengths": "94;15;55;77", "wc_weaknesses": "191;94;53;155", "wc_questions": "74;1;38;101", "wc_review": "433;153;177;398", "wc_reply_reviewers": "0;0;58;0", "wc_reply_authors": "749;717;731;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;0", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 53.25, 17.09349291397168 ], "wc_strengths_avg": [ 60.25, 29.55820529057879 ], "wc_weaknesses_avg": [ 123.25, 53.35904328227784 ], "wc_questions_avg": [ 53.5, 37.659660115301094 ], "wc_review_avg": [ 290.25, 126.14550130702244 ], "wc_reply_reviewers_avg": [ 14.5, 25.11473670974872 ], "wc_reply_authors_avg": [ 549.25, 317.3124446031072 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15958471513919602797&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;1;2;3;1", "aff_unique_norm": "Alibaba Group;Westlake University;Zhejiang University;Beijing Jiao Tong University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.alibaba.com;https://www.westlake.edu.cn;https://www.zju.edu.cn;http://www.njtu.edu.cn/en", "aff_unique_abbr": "Alibaba;WU;ZJU;BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7NqRDbkizw", "title": "DIA: Diffusion based Inverse Network Attack on Collaborative Inference", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "With the continuous expansion of neural networks in size and depth, and the growing popularity of machine learning as a service, collaborative inference systems present a promising approach for deploying models in resource-constrained computing environments. However, as the deployment of these systems gains traction, evaluating their privacy and security has become a critical issue. Towards this goal, this paper introduces a diffusion-based inverse network attack, named DIA, for collaborative inference systems that uses a novel feature map awareness conditioning mechanism to guide the diffusion model training. Compared to prior approaches, our extensive empirical results demonstrate that the proposed attack achieves an average improvement of 29%, 20%, 30% in terms of SSIM, PSNR, and MSE when applied to convolutional neural networks (CNN), 18%, 17%, 61% to ResNet models, and 55%, 54%, 84% to Vision transformers (ViTs). Moreover, we uncover a notable vulnerability of transformer-based model ViTs and analyze the potential reasons behind this vulnerability. Based on our analysis, we raise caution regarding the deployment of transformer-based models in collaborative inference systems, emphasizing the need for careful consideration regarding the security of such models in collaborative settings.", "keywords": "Diffusion model;data privacy;inverse network attack;collaborative Inference", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/15ab6727a8c0b11f331756a8e7474a67055d4bfc.pdf", "author": "Dake Chen;Shiduo Li;Yuke Zhang;Souvik Kundu;Chenghao Li;Peter Anthony Beerel", "authorids": "~Dake_Chen1;~Shiduo_Li1;~Yuke_Zhang1;~Souvik_Kundu2;~Chenghao_Li2;~Peter_Anthony_Beerel1", "gender": ";M;F;M;M;M", "homepage": "https://scholar.google.com/citations?user=MwaZe-8AAAAJ&hl=en&oi=ao;https://github.com/Risto0211;;https://ksouvik52.github.io;https://howardli0816.github.io/;http://sites.usc.edu/eessc.html", "dblp": ";386/9567;;126/2210;;29/6330", "google_scholar": ";PY12lawAAAAJ;CJ5iMiwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;JSdH7PsAAAAJ", "orcid": ";;0000-0001-5253-5478;0000-0002-3533-9405;;", "linkedin": ";shiduo-li/;;souvik-kundu-64922b50/;chenghao-li-3b0354252/;peter-beerel-b9902a1/", "or_profile": "~Dake_Chen1;~Shiduo_Li1;~Yuke_Zhang1;~Souvik_Kundu2;~Chenghao_Li2;~Peter_Anthony_Beerel1", "aff": "Meta Facebook;Tsinghua University;University of Southern California;Intel;University of Southern California;University of Southern California", "aff_domain": "meta.com;mail.tsinghua.edu.cn;usc.edu;intel.com;usc.edu;usc.edu", "position": "Researcher;Undergrad student;PhD student;Researcher;MS student;Full Professor", "bibtex": "@misc{\nchen2024dia,\ntitle={{DIA}: Diffusion based Inverse Network Attack on Collaborative Inference},\nauthor={Dake Chen and Shiduo Li and Yuke Zhang and Souvik Kundu and Chenghao Li and Peter Anthony Beerel},\nyear={2024},\nurl={https://openreview.net/forum?id=7NqRDbkizw}\n}", "github": "", "project": "", "reviewers": "wMLT;ZM8C;9V3L", "site": "https://openreview.net/forum?id=7NqRDbkizw", "pdf_size": 876877, "rating": "3;3;5", "confidence": "4;3;4", "soundness": "3;2;3", "contribution": "2;2;2", "presentation": "2;2;2", "wc_summary": "72;102;95", "wc_strengths": "34;98;66", "wc_weaknesses": "122;181;64", "wc_questions": "9;5;98", "wc_review": "237;386;323", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 89.66666666666667, 12.814921857827391 ], "wc_strengths_avg": [ 66.0, 26.127890589687233 ], "wc_weaknesses_avg": [ 122.33333333333333, 47.76563153100308 ], "wc_questions_avg": [ 37.333333333333336, 42.92888175679503 ], "wc_review_avg": [ 315.3333333333333, 61.070087240444934 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18400872964133980956&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;2;2", "aff_unique_norm": "Meta;Tsinghua University;University of Southern California;Intel", "aff_unique_dep": "Meta Platforms, Inc.;;;Intel Corporation", "aff_unique_url": "https://meta.com;https://www.tsinghua.edu.cn;https://www.usc.edu;https://www.intel.com", "aff_unique_abbr": "Meta;THU;USC;Intel", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Parameter-Efficient Orthogonal Finetuning via Butterfly Factorization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19363", "id": "7NzgkEdGyr", "author_site": "Weiyang Liu, Zeju Qiu, Yao Feng, Yuliang Xiu, Yuxuan Xue, Longhui Yu, Haiwen Feng, Zhen Liu, Juyeon Heo, Songyou Peng, Yandong Wen, Michael J Black, Adrian Weller, Bernhard Schoelkopf", "tldr": "", "abstract": "Large foundation models are becoming ubiquitous, but training them from scratch is prohibitively expensive. Thus, efficiently adapting these powerful models to downstream tasks is increasingly important. In this paper, we study a principled finetuning paradigm -- Orthogonal Finetuning (OFT) -- for downstream task adaptation. Despite demonstrating good generalizability, OFT still uses a fairly large number of trainable parameters due to the high dimensionality of orthogonal matrices. To address this, we start by examining OFT from an information transmission perspective, and then identify a few key desiderata that enable better parameter-efficiency. Inspired by how the Cooley-Tukey fast Fourier transform algorithm enables efficient information transmission, we propose an efficient orthogonal parameterization using butterfly structures. We apply this parameterization to OFT, creating a novel parameter-efficient finetuning method, called Orthogonal Butterfly (BOFT). By subsuming OFT as a special case, BOFT introduces a generalized orthogonal finetuning framework. Finally, we conduct an extensive empirical study of adapting large vision transformers, large language models, and text-to-image diffusion models to various downstream tasks in computer vision and natural language. The results validate the effectiveness of BOFT as a generic finetuning method.", "keywords": "Parameter-efficient finetuning;orthogonal;Butterfly matrix", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Weiyang Liu;Zeju Qiu;Yao Feng;Yuliang Xiu;Yuxuan Xue;Longhui Yu;Haiwen Feng;Zhen Liu;Juyeon Heo;Songyou Peng;Yandong Wen;Michael J. Black;Adrian Weller;Bernhard Sch\u00f6lkopf", "authorids": "~Weiyang_Liu1;~Zeju_Qiu1;~Yao_Feng3;~Yuliang_Xiu2;~Yuxuan_Xue1;~Longhui_Yu1;~Haiwen_Feng1;~Zhen_Liu6;~Juyeon_Heo1;~Songyou_Peng1;~Yandong_Wen1;~Michael_J._Black1;~Adrian_Weller1;~Bernhard_Sch\u00f6lkopf1", "gender": "M;M;F;M;;M;M;M;F;M;M;;M;", "homepage": "http://wyliu.com/;;https://ps.is.tuebingen.mpg.de/person/yfeng;http://xiuyuliang.cn;http://yuxuan-xue.com;https://yulonghui.github.io/;https://ps.is.mpg.de/person/hfeng;;https://sites.google.com/view/juyeonheo/%ED%99%88;https://pengsongyou.github.io/;;;http://mlg.eng.cam.ac.uk/adrian/;", "dblp": "137/1532;276/4222;05/9861;215/3940;254/6994;313/9946;119/9168;77/35-19;;205/2316;153/2125;;73/8324;", "google_scholar": "DMjROf0AAAAJ;7y5RN9wAAAAJ;wNQQhSIAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;5SKNmhcAAAAJ;https://scholar.google.com.hk/citations?user=3eHjDDgAAAAJ;g5co-iIAAAAJ;I1IiJCAAAAAJ;;eNypkO0AAAAJ;;;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ;", "orcid": ";;0000-0002-9481-9783;0000-0003-0165-5909;;;;;;;;;;", "linkedin": ";zeju-qiu-729b8018a/;;yuliangxiu;;%E9%BE%99%E8%BE%89-%E8%99%9E-71655a154/;;;;;;;;", "or_profile": "~Weiyang_Liu1;~Zeju_Qiu1;~Yao_Feng3;~Yuliang_Xiu2;~Yuxuan_Xue1;~Longhui_Yu1;~Haiwen_Feng1;~Zhen_Liu6;~Juyeon_Heo1;~Songyou_Peng1;~Yandong_Wen1;~Michael_J._Black1;~Adrian_Weller1;~Bernhard_Sch\u00f6lkopf1", "aff": "University of Cambridge;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;ETHZ - ETH Zurich;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;;Max Planck Institute for Intelligent Systems, Max-Planck Institute;University of Montreal;University of Cambridge;ETH Zurich;Max Planck Institute for Intelligent Systems, Max-Planck Institute;;University of Cambridge;", "aff_domain": "cam.ac.uk;is.mpg.de;ethz.ch;tuebingen.mpg.de;uni-tuebingen.de;;tuebingen.mpg.de;umontreal.ca;cam.ac.uk;inf.ethz.ch;tuebingen.mpg.de;;cam.ac.uk;", "position": "Researcher;Intern;PhD student;PhD student;PhD student;;PhD student;PhD student;PhD student;Senior Researcher;Postdoc;;Principal Researcher;", "bibtex": "@inproceedings{\nliu2024parameterefficient,\ntitle={Parameter-Efficient Orthogonal Finetuning via Butterfly Factorization},\nauthor={Weiyang Liu and Zeju Qiu and Yao Feng and Yuliang Xiu and Yuxuan Xue and Longhui Yu and Haiwen Feng and Zhen Liu and Juyeon Heo and Songyou Peng and Yandong Wen and Michael J. Black and Adrian Weller and Bernhard Sch{\\\"o}lkopf},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7NzgkEdGyr}\n}", "github": "", "project": "", "reviewers": "CCPY;oVSz;mHgj", "pdf_size": 11453628, "rating": "5;6;8", "confidence": "3;4;3", "soundness": "3;3;3", "contribution": "3;3;2", "presentation": "4;3;4", "wc_summary": "65;34;249", "wc_strengths": "45;35;129", "wc_weaknesses": "121;108;238", "wc_questions": "2;18;191", "wc_review": "233;195;807", "wc_reply_reviewers": "0;77;73", "wc_reply_authors": "1935;2983;3351", "reply_reviewers": "0;2;1", "reply_authors": "3;6;6", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 116.0, 94.89292211048549 ], "wc_strengths_avg": [ 69.66666666666667, 42.153159892099296 ], "wc_weaknesses_avg": [ 155.66666666666666, 58.459862774005515 ], "wc_questions_avg": [ 70.33333333333333, 85.57387971156203 ], "wc_review_avg": [ 411.6666666666667, 279.973014572635 ], "wc_reply_reviewers_avg": [ 50.0, 35.393031329156685 ], "wc_reply_authors_avg": [ 2756.3333333333335, 599.8873968411813 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 5.0, 1.4142135623730951 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5299967292332698973&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7NzgkEdGyr", "pdf": "https://openreview.net/pdf?id=7NzgkEdGyr", "email": "cam.ac.uk;is.mpg.de;ethz.ch;tuebingen.mpg.de;uni-tuebingen.de;;tuebingen.mpg.de;umontreal.ca;cam.ac.uk;inf.ethz.ch;tuebingen.mpg.de;;cam.ac.uk;", "author_num": 14, "aff_unique_index": "0;1;2;3;4;3;5;0;2;3;0", "aff_unique_norm": "University of Cambridge;Max-Planck-Institute for Intelligent Systems;ETH Zurich;Max Planck Institute for Intelligent Systems;Eberhard Karls University of T\u00fcbingen;University of Montreal", "aff_unique_dep": ";Intelligent Systems;;Intelligent Systems;;", "aff_unique_url": "https://www.cam.ac.uk;https://www.mpi-is.mpg.de;https://www.ethz.ch;https://www.mpi-is.mpg.de;https://www.uni-tuebingen.de/;https://wwwumontreal.ca", "aff_unique_abbr": "Cambridge;MPI-IS;ETHZ;MPI-IS;Uni T\u00fcbingen;UM", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "Cambridge;;T\u00fcbingen", "aff_country_unique_index": "0;1;2;1;1;1;3;0;2;1;0", "aff_country_unique": "United Kingdom;Germany;Switzerland;Canada" }, { "id": "7OO8tTOgh4", "title": "Non-targeted Adversarial Attacks on Vision-Language Models via Maximizing Information Entropy", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adversarial examples pose significant security concerns in deep neural networks and play a crucial role in assessing the robustness of models. Nevertheless, existing research has primarily focused on classification tasks, while the evaluation of adversarial examples is urgently needed for more complex tasks. In this paper, we investigate the adversarial robustness of large vision-language models (VLMs). We propose a non-targeted white-box attack method that maximizes information entropy (MIE) to induce the victim model to generate misleading image descriptions deviating from reality. Our method is thoroughly analyzed experimentally, with validation conducted on the ImageNet dataset. The comprehensive and quantifiable experimental results demonstrate a significant success rate achieved by our method in adversarial attacks. Given the consistent architecture of the language decoder, our proposed method can serve as a benchmark for evaluating the robustness of diverse vision-language models.", "keywords": "Adversarial Attacks;Vision-Language Models;Trustworthy AI", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Chaohu Liu;Yubo Wang;Haoyu Cao;Bing Liu;Deqiang Jiang;Linli Xu", "authorids": "~Chaohu_Liu1;~Yubo_Wang8;~Haoyu_Cao1;~Bing_Liu6;~Deqiang_Jiang1;~Linli_Xu1", "gender": "M;M;M;;M;", "homepage": "https://github.com/liuchaohu;https://github.com/LingoAmber;;;;", "dblp": "356/2510;;334/3895.html;;259/2591.html;", "google_scholar": ";https://scholar.google.com/citations?hl=en;LV8ejn8AAAAJ;;v4AK2MQAAAAJ;", "orcid": ";;0000-0002-3789-9705;0000-0001-5324-4816;;", "linkedin": ";;;;;", "or_profile": "~Chaohu_Liu1;~Yubo_Wang8;~Haoyu_Cao1;~Bing_Liu6;~Deqiang_Jiang1;~Linli_Xu1", "aff": "University of Science and Technology of China;Tencent Youtu Lab;University of Science and Technology of China;Tencent YouTu Lab;Tencent YouTu Lab;", "aff_domain": "ustc.edu.cn;tencent.com;ustc.edu.cn;tencent.com;tencent.com;", "position": "PhD student;Intern;PhD student;Researcher;Researcher;", "bibtex": "@misc{\nliu2024nontargeted,\ntitle={Non-targeted Adversarial Attacks on Vision-Language Models via Maximizing Information Entropy},\nauthor={Chaohu Liu and Yubo Wang and Haoyu Cao and Bing Liu and Deqiang Jiang and Linli Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=7OO8tTOgh4}\n}", "github": "", "project": "", "reviewers": "fBK2;CGXF;r3mc;qFBU", "site": "https://openreview.net/forum?id=7OO8tTOgh4", "pdf_size": 8884908, "rating": "5;5;5;6", "confidence": "4;4;5;3", "soundness": "2;2;2;3", "contribution": "2;1;2;2", "presentation": "4;3;2;3", "wc_summary": "66;17;256;43", "wc_strengths": "95;11;110;19", "wc_weaknesses": "141;152;156;151", "wc_questions": "45;3;164;27", "wc_review": "347;183;686;240", "wc_reply_reviewers": "0;0;0;55", "wc_reply_authors": "1395;866;963;867", "reply_reviewers": "0;0;0;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 95.5, 94.27221223669252 ], "wc_strengths_avg": [ 58.75, 44.16092730004659 ], "wc_weaknesses_avg": [ 150.0, 5.522680508593631 ], "wc_questions_avg": [ 59.75, 62.005544106958695 ], "wc_review_avg": [ 364.0, 195.0064101510512 ], "wc_reply_reviewers_avg": [ 13.75, 23.81569860407206 ], "wc_reply_authors_avg": [ 1022.75, 218.4998569793582 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ddHHjzArvYkJ:scholar.google.com/&scioq=Non-targeted+Adversarial+Attacks+on+Vision-Language+Models+via+Maximizing+Information+Entropy&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "University of Science and Technology of China;Tencent", "aff_unique_dep": ";Youtu Lab", "aff_unique_url": "http://www.ustc.edu.cn;https://www.tencent.com", "aff_unique_abbr": "USTC;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Data-independent Module-aware Pruning for Hierarchical Vision Transformers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19362", "id": "7Ol6foUi1G", "author_site": "Yang He, Joey Tianyi Zhou", "tldr": "", "abstract": "Hierarchical vision transformers (ViTs) have two advantages over conventional ViTs. First, hierarchical ViTs achieve linear computational complexity with respect to image size by local self-attention. Second, hierarchical ViTs create hierarchical feature maps by merging image patches in deeper layers for dense prediction. However, existing pruning methods ignore the unique properties of hierarchical ViTs and use the magnitude value as the weight importance. This approach leads to two main drawbacks. First, the \"local\" attention weights are compared at a \"global\" level, which may cause some \"locally\" important weights to be pruned due to their relatively small magnitude \"globally\". The second issue with magnitude pruning is that it fails to consider the distinct weight distributions of the network, which are essential for extracting coarse to fine-grained features at various hierarchical levels. \n\nTo solve the aforementioned issues, we have developed a Data-independent Module-Aware Pruning method (DIMAP) to compress hierarchical ViTs. To ensure that \"local\" attention weights at different hierarchical levels are compared fairly in terms of their contribution, we treat them as a **module** and examine their contribution by analyzing their information distortion. Furthermore, we introduce a novel weight metric that is solely based on weights and does not require input images, thereby eliminating the **dependence** on the patch merging process. Our method validates its usefulness and strengths on Swin Transformers of different sizes on ImageNet-1k classification. Notably, the top-5 accuracy drop is only 0.07% when we remove 52.5% FLOPs and 52.7% parameters of Swin-B. When we reduce 33.2% FLOPs and 33.2% parameters of Swin-S, we can even achieve a 0.8% higher relative top-5 accuracy than the original model. Code is available at: [https://github.com/he-y/Data-independent-Module-Aware-Pruning](https://github.com/he-y/Data-independent-Module-Aware-Pruning).", "keywords": "Filter Pruning; Model Compression; Vision Transformer", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/d13a5c5260bac7b5449a8f8460f25889e71c9944.pdf", "author": "Yang He;Joey Tianyi Zhou", "authorids": "~Yang_He2;~Joey_Tianyi_Zhou1", "gender": "M;M", "homepage": "https://joeyzhouty.github.io/;https://he-y.github.io/", "dblp": "123/5110;06/1998-2", "google_scholar": "https://scholar.google.com.sg/citations?user=cYNqDokAAAAJ;vvnFsIIAAAAJ", "orcid": "0000-0002-4675-7055;0000-0002-2257-6073", "linkedin": ";", "or_profile": "~Joey_Tianyi_Zhou1;~yang_he1", "aff": "A*STAR Centre for Frontier AI Research;Institute of High Performance Computing, Singapore, A*STAR", "aff_domain": "cfar.a-star.edu.sg;ihpc.a-star.edu.sg", "position": "Principal Researcher;Researcher", "bibtex": "@inproceedings{\nhe2024dataindependent,\ntitle={Data-independent Module-aware Pruning for Hierarchical Vision Transformers},\nauthor={Yang He and Joey Tianyi Zhou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7Ol6foUi1G}\n}", "github": "", "project": "", "reviewers": "uDum;qfKu;VGj7", "pdf_size": 3078423, "rating": "6;6;8", "confidence": "5;4;4", "soundness": "3;3;4", "contribution": "4;3;4", "presentation": "3;4;3", "wc_summary": "79;102;73", "wc_strengths": "73;60;140", "wc_weaknesses": "111;24;53", "wc_questions": "2;24;9", "wc_review": "265;210;275", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "795;473;275", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 84.66666666666667, 12.498888839501783 ], "wc_strengths_avg": [ 91.0, 35.05234181430203 ], "wc_weaknesses_avg": [ 62.666666666666664, 36.16935473881477 ], "wc_questions_avg": [ 11.666666666666666, 9.177266598624136 ], "wc_review_avg": [ 250.0, 28.577380332470412 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 514.3333333333334, 214.29159780282774 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3223724432329131&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=7Ol6foUi1G", "pdf": "https://openreview.net/pdf?id=7Ol6foUi1G", "email": "cfar.a-star.edu.sg;ihpc.a-star.edu.sg", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "A*STAR;Institute of High Performance Computing", "aff_unique_dep": "Centre for Frontier AI Research;", "aff_unique_url": "https://www.a-star.edu.sg;https://www.ihpc.a-star.edu.sg", "aff_unique_abbr": "A*STAR;IHPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "7OwML7fwl8", "title": "Fairness without Sensitive attributes via Noise and Uncertain Predictions", "track": "main", "status": "Reject", "tldr": "", "abstract": "While model fairness improvement has been explored previously, existing methods invariably rely on adjusting explicit sensitive attribute values in order to improve model fairness in downstream tasks. However, we observe the trend of sensitive demographic information being inaccessible as public concerns around data privacy grow. In this paper, we propose a confidence-based hierarchical structure of variational autoencoder (VAE) architectures called ``Reckoner\" for reliable fairness learning under the assumption of missing sensitive attributes. First, we present the results of exploratory data analyses conducted on the widely-used COMPAS dataset. We observed significant disparities in model fairness across different levels of confidence. Inspired by these findings, we devised a dual-model system in which the model initialised with a high-confidence data subset learns from the model initialised with a low-confidence data subset, enabling it to avoid biased predictions. To maintain predictiveness, we also introduced learnable noise into the dataset, forcing the data to retain only the most essential information for predictions. Our experimental results show that Reckoner consistently outperforms state-of-the-art baselines on both the COMPAS and the New Adult datasets in terms of both accuracy and fairness metrics.", "keywords": "Fairness;Fairness without Sensitive Attributes;Fairness without Demographics", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/1dfd360e7216c3fbf025619bae5b640ec25bb184.zip", "author": "Hongliang Ni;LEI HAN;Tong Chen;Shazia Wasim Sadiq;Gianluca Demartini", "authorids": "~Hongliang_Ni1;~LEI_HAN7;~Tong_Chen8;~Shazia_Wasim_Sadiq1;~Gianluca_Demartini1", "gender": "F;;M;;F", "homepage": ";;https://itee.uq.edu.au/profile/1253/rocky-chen;http://gianlucademartini.net;https://about.uq.edu.au/experts/792", "dblp": ";75/2307-3.html;22/1512-5;05/3422;s/SWSadiq", "google_scholar": ";https://scholar.google.com.au/citations?user=Oibami4AAAAJ;07cqSMsAAAAJ;https://scholar.google.co.uk/citations?user=PCAiILsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-7777-3592;0000-0001-7269-146X;0000-0002-7311-3693;0000-0001-6739-4145", "linkedin": "hongliang-ni-1931181b0/;;;gianlucademartini/;shazia-sadiq-4920651/?originalSubdomain=au", "or_profile": "~Hongliang_Ni1;~LEI_HAN7;~Tong_Chen8;~Gianluca_Demartini1;~Shazia_Sadiq1", "aff": "University of Queensland;;The University of Queensland;University of Queensland;The University of Queensland", "aff_domain": "uq.edu.au;;uq.edu.au;uq.edu.au;uq.edu.au", "position": "PhD student;;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nni2024fairness,\ntitle={Fairness without Sensitive attributes via Noise and Uncertain Predictions},\nauthor={Hongliang Ni and LEI HAN and Tong Chen and Shazia Wasim Sadiq and Gianluca Demartini},\nyear={2024},\nurl={https://openreview.net/forum?id=7OwML7fwl8}\n}", "github": "", "project": "", "reviewers": "4bz2;zU7R;KsYL", "site": "https://openreview.net/forum?id=7OwML7fwl8", "pdf_size": 414002, "rating": "3;5;5", "confidence": "3;5;4", "soundness": "2;2;1", "contribution": "2;3;2", "presentation": "1;3;3", "wc_summary": "67;93;110", "wc_strengths": "20;48;20", "wc_weaknesses": "236;63;226", "wc_questions": "2;474;4", "wc_review": "325;678;360", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "602;1385;646", "reply_reviewers": "0;0;0", "reply_authors": "1;2;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 90.0, 17.682382946499793 ], "wc_strengths_avg": [ 29.333333333333332, 13.199326582148888 ], "wc_weaknesses_avg": [ 175.0, 79.30111390558564 ], "wc_questions_avg": [ 160.0, 222.03303057578316 ], "wc_review_avg": [ 454.3333333333333, 158.80036383949363 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 877.6666666666666, 359.18828241219427 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:y1RoM6O9alsJ:scholar.google.com/&scioq=Fairness+without+Sensitive+attributes+via+Noise+and+Uncertain+Predictions&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Queensland", "aff_unique_dep": "", "aff_unique_url": "https://www.uq.edu.au", "aff_unique_abbr": "UQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "7Phicg0WAg", "title": "FlexCap: Generating Rich, Localized, and Flexible Captions in Images", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce FlexCap, a module that generates localized descriptions for any region in a given image. We use the idea of length conditioning to ensure the output captions have the desired length. This allows for controllable generation of the full spectrum of localized captions, ranging from short object names to full sentence descriptions. To train this model, we create a dataset of image-box-caption triplets from web-scale text-image pairs using open-vocabulary object detection models. We show that FlexCap can connect images with LLMs by representing images as a sequence of region descriptions and their spatial extents. Using this interpretable textual representation, we exceed the state-of-the-art zero-shot performance on many visual question answering tasks. We also show that FlexCap can be fine-tuned to achieve strong performance on the dense captioning task on the Visual Genome dataset. Finally, we demonstrate qualitatively how FlexCap can be used for image labeling, object attribute recognition, and visual dialog.", "keywords": "visual-language model;object detection;image captioning;visual question answering", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/1f64b8fab7ab2a25b9102d70b7f295273fc93497.zip", "author": "Debidatta Dwibedi;Vidhi Jain;Jonathan Tompson;Andrew Zisserman;Yusuf Aytar", "authorids": "~Debidatta_Dwibedi1;~Vidhi_Jain2;~Jonathan_Tompson1;~Andrew_Zisserman1;~Yusuf_Aytar1", "gender": "M;F;M;;M", "homepage": "https://debidatta.github.io/;http://vidhijain.github.io;http://jonathantompson.com;;", "dblp": "160/3739;199/2574;139/0769;;41/5577", "google_scholar": "EPfOJwQAAAAJ;;U_Jw8DUAAAAJ;;0ncQNL8AAAAJ", "orcid": ";;;;", "linkedin": ";vidhijain96/;;;", "or_profile": "~Debidatta_Dwibedi1;~Vidhi_Jain2;~Jonathan_Tompson1;~Andrew_Zisserman1;~Yusuf_Aytar1", "aff": "Google;Google;Google DeepMind;;Google DeepMind", "aff_domain": "google.com;google.com;google.com;;google.com", "position": "Google;Student Researcher;Researcher;;Research Scientist", "bibtex": "@misc{\ndwibedi2024flexcap,\ntitle={FlexCap: Generating Rich, Localized, and Flexible Captions in Images},\nauthor={Debidatta Dwibedi and Vidhi Jain and Jonathan Tompson and Andrew Zisserman and Yusuf Aytar},\nyear={2024},\nurl={https://openreview.net/forum?id=7Phicg0WAg}\n}", "github": "", "project": "", "reviewers": "sAQH;N2tn;1skp", "site": "https://openreview.net/forum?id=7Phicg0WAg", "pdf_size": 4647987, "rating": "5;5;5", "confidence": "5;3;4", "soundness": "4;3;2", "contribution": "3;2;2", "presentation": "4;3;3", "wc_summary": "85;46;47", "wc_strengths": "70;40;49", "wc_weaknesses": "68;111;151", "wc_questions": "29;95;14", "wc_review": "252;292;261", "wc_reply_reviewers": "0;50;21", "wc_reply_authors": "655;1086;997", "reply_reviewers": "0;1;1", "reply_authors": "1;2;3", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 59.333333333333336, 18.153665072253467 ], "wc_strengths_avg": [ 53.0, 12.569805089976535 ], "wc_weaknesses_avg": [ 110.0, 33.891985286593446 ], "wc_questions_avg": [ 46.0, 35.185224171518364 ], "wc_review_avg": [ 268.3333333333333, 17.13346303452853 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 20.499322482029065 ], "wc_reply_authors_avg": [ 912.6666666666666, 185.78541983218764 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IQx0IzgsA6sJ:scholar.google.com/&scioq=FlexCap:+Generating+Rich,+Localized,+and+Flexible+Captions+in+Images&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "7Pzu7VjSwy", "title": "Diving into Class-Incremental Learning from Better Balancing Old and New knowledge", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Class-Incremental Learning (Class-IL) aims to continuously learn new knowledge without forgetting old knowledge from a given data stream using deep neural networks. Recent Class-IL methods strive to balance old and new knowledge and have achieved excellent results in mitigating the forgetting by mainly employing the rehearsal-based strategy. However, the representation learning on new tasks is often impaired since the trade-off is hard to taken between old and new knowledge. To overcome this challenge, based on the Complementary Learning System (CLS) theory, we propose a novel CLS-based method by focusing on the representation of old and new knowledge in Class-IL, which can acquire more new knowledge from new tasks while consolidating the old knowledge so as to make a better balance between them. Specifically, our proposed method has two novel components: (1) To effectively mitigate the forgetting, we first propose a bidirectional transport (BDT) strategy between old and new models, which can better integrate the old knowledge into the new knowledge and meanwhile enforce the old knowledge to be better consolidated by bidirectionally transferring parameters across old and new models. (2) To ensure that the representation of new knowledge is not impaired by the old knowledge, we further devise a selective momentum (SMT) mechanism to give parameters greater flexibility to learn new knowledge while transferring important old knowledge, which is achieved by selectively (momentum) updating network parameters through parameter importance evaluation. Extensive experiments on four benchmarks show that our proposed method significantly outperforms the state-of-the-arts under the Class-IL setting.", "keywords": "class incremental learning;catastrophic forgetting;complementary learning system;knowledge representation", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Feifei Fu;Yizhao Gao;Shiqi Zhao;Haoran Wu;Zhiwu Lu", "authorids": "~Feifei_Fu1;~Yizhao_Gao1;~Shiqi_Zhao3;~Haoran_Wu6;~Zhiwu_Lu1", "gender": "M;M;M;M;F", "homepage": ";;;https://gsai.ruc.edu.cn/luzhiwu;", "dblp": "132/7629;;;53/5234;https://dblp.org/rec/journals/iet-ipr/Fu0TL21", "google_scholar": "https://scholar.google.com/citations?hl=en;;;OUXS8doAAAAJ;", "orcid": ";0009-0006-2508-7108;;;0009-0000-9957-5231", "linkedin": ";;https://www.linkedin.cn/incareer/in/%E6%B5%A9%E7%84%B6-%E5%90%B4-b807a0164;;https://www.linkedin.cn/injobs/in/%E8%8F%B2%E8%8F%B2-%E4%BB%98-a20230234", "or_profile": "~Yizhao_Gao1;~Shiqi_Zhao3;~Haoran_Wu6;~Zhiwu_Lu1;~FU_Feifei1", "aff": "Renmin University of China;China Unicom Research Institute;China Unicom Research Institute ;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;chinaunicom.cn;chinaunicom.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Researcher;Researcher;Full Professor;PhD student", "bibtex": "@misc{\nfu2024diving,\ntitle={Diving into Class-Incremental Learning from Better Balancing Old and New knowledge},\nauthor={Feifei Fu and Yizhao Gao and Shiqi Zhao and Haoran Wu and Zhiwu Lu},\nyear={2024},\nurl={https://openreview.net/forum?id=7Pzu7VjSwy}\n}", "github": "", "project": "", "reviewers": "nG1u;W1qo;T7Kt;UGyp", "site": "https://openreview.net/forum?id=7Pzu7VjSwy", "pdf_size": 3936805, "rating": "3;3;3;6", "confidence": "5;4;4;4", "soundness": "3;3;2;3", "contribution": "1;1;2;2", "presentation": "2;2;3;2", "wc_summary": "76;50;51;117", "wc_strengths": "32;26;63;83", "wc_weaknesses": "321;109;316;208", "wc_questions": "32;26;66;2", "wc_review": "461;211;496;410", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.5, 27.189152248645048 ], "wc_strengths_avg": [ 51.0, 23.205602771744587 ], "wc_weaknesses_avg": [ 238.5, 87.33985344617885 ], "wc_questions_avg": [ 31.5, 22.863726730347352 ], "wc_review_avg": [ 394.5, 110.26898929436145 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:B_17qcodV_sJ:scholar.google.com/&scioq=Diving+into+Class-Incremental+Learning+from+Better+Balancing+Old+and+New+knowledge&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Renmin University of China;China Unicom Research Institute", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.chinaunicom.com.cn/en-US/ResearchInstitute", "aff_unique_abbr": "RUC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Adversarial Adaptive Sampling: Unify PINN and Optimal Transport for the Approximation of PDEs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19361", "id": "7QI7tVrh2c", "author_site": "Kejun Tang, Jiayu Zhai, Xiaoliang Wan, Chao Yang", "tldr": "", "abstract": "Solving partial differential equations (PDEs) is a central task in scientific computing. Recently, neural network approximation of PDEs has received increasing attention due to its flexible meshless discretization and its potential for high-dimensional problems. One fundamental numerical difficulty is that random samples in the training set introduce statistical errors into the discretization of the loss functional which may become the dominant error in the final approximation, and therefore overshadow the modeling capability of the neural network. In this work, we propose a new minmax formulation to optimize simultaneously the approximate solution, given by a neural network model, and the random samples in the training set, provided by a deep generative model. The key idea is to use a deep generative model to adjust the random samples in the training set such that the residual induced by the neural network model can maintain a smooth profile in the training process. Such an idea is achieved by implicitly embedding the Wasserstein distance between the residual-induced distribution and the uniform distribution into the loss, which is then minimized together with the residual. A nearly uniform residual profile means that its variance is small for any normalized weight function such that the Monte Carlo approximation error of the loss functional is reduced significantly for a certain sample size. The adversarial adaptive sampling (AAS) approach proposed in this work is the first attempt to formulate two essential components, minimizing the residual and seeking the optimal training set, into one minmax objective functional for the neural network approximation of PDEs.", "keywords": "adversarial adaptive sampling;optimal transport;neural network approximation of PDEs", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/1a0842aff32043ada3484a12665005e0edb4749a.pdf", "author": "Kejun Tang;Jiayu Zhai;Xiaoliang Wan;Chao Yang", "authorids": "~Kejun_Tang1;~Jiayu_Zhai1;~Xiaoliang_Wan1;~Chao_Yang8", "gender": ";M;;M", "homepage": ";https://ims.shanghaitech.edu.cn/2022/0913/c4741a835499/page.htm;http://www.math.lsu.edu/~xlwan;", "dblp": ";206/7523;11/3027;", "google_scholar": ";gcu1p1UAAAAJ;j0uRgOYAAAAJ;JvVLHaEAAAAJ", "orcid": ";0000-0002-7376-6205;;", "linkedin": ";;;", "or_profile": "~Kejun_Tang1;~Jiayu_Zhai1;~Xiaoliang_Wan1;~Chao_Yang8", "aff": ";ShanghaiTech University;Louisiana State University;Peking University", "aff_domain": ";shanghaitech.edu.cn;lsu.edu;pku.edu.cn", "position": ";Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ntang2024adversarial,\ntitle={Adversarial Adaptive Sampling: Unify {PINN} and Optimal Transport for the Approximation of {PDE}s},\nauthor={Kejun Tang and Jiayu Zhai and Xiaoliang Wan and Chao Yang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7QI7tVrh2c}\n}", "github": "", "project": "", "reviewers": "YsSo;ZtMG;CyTK;3BKL", "pdf_size": 2677007, "rating": "5;6;8;10", "confidence": "4;3;4;5", "soundness": "2;2;3;4", "contribution": "2;3;4;4", "presentation": "1;2;4;4", "wc_summary": "136;198;159;118", "wc_strengths": "32;80;22;27", "wc_weaknesses": "390;352;38;106", "wc_questions": "39;11;19;75", "wc_review": "597;641;238;326", "wc_reply_reviewers": "271;28;0;136", "wc_reply_authors": "1532;1074;175;337", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;1;2", "rating_avg": [ 7.25, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.299038105676658 ], "wc_summary_avg": [ 152.75, 29.894606536965828 ], "wc_strengths_avg": [ 40.25, 23.220411279734044 ], "wc_weaknesses_avg": [ 221.5, 152.01562419698837 ], "wc_questions_avg": [ 36.0, 24.71841418861655 ], "wc_review_avg": [ 450.5, 172.05304414627486 ], "wc_reply_reviewers_avg": [ 108.75, 106.55368365288926 ], "wc_reply_authors_avg": [ 779.5, 550.9566679876013 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7364596943186587, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5022401845944297732&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=7QI7tVrh2c", "pdf": "https://openreview.net/pdf?id=7QI7tVrh2c", "email": ";shanghaitech.edu.cn;lsu.edu;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "ShanghaiTech University;Louisiana State University;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://www.lsu.edu;http://www.pku.edu.cn", "aff_unique_abbr": "ShanghaiTech;LSU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "7QUwypJ8Vq", "title": "Taming Self-Training for Open-Vocabulary Object Detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent studies have shown promising performance in open-vocabulary object detection (OVD) by utilizing pseudo labels (PLs) from pretrained vision and language models (VLMs). However, teacher-student self-training, a powerful and widely used paradigm to leverage PLs, is rarely explored for OVD. This work identifies two challenges of using self-training in OVD: noisy PLs from VLMs and frequent distribution changes of PLs. To address these challenges, we propose SAS-Det that tames self-training for OVD in two key aspects. First, we present a split-and-fusion (SAF) head that splits a standard detection into an open-branch and a closed-branch. This design can prevent noisy boxes of PLs from supervision. Moreover, the two branches learn complementary knowledge from different training data, significantly enhancing performance when fused together. Second, in our view, unlike in closed-set tasks, the PL's distributions in OVD are solely determined by the teacher model. Consequently, we introduce a periodic update strategy to decrease the number of updates to the teacher, thereby decreasing the frequency of changes in PL distributions. Extensive experiments demonstrate SAS-Det is both efficient and effective. Our pseudo labeling is three times faster than prior methods. SAS-Det outperforms prior state-of-the-art models of the same scale by a clear margin and achieves 37.4 AP50 and 29.1 APr on novel categories of the COCO and LVIS benchmarks, respectively.", "keywords": "Open-vocabulary object detection;pseudo labels;vision and language pretraining", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Shiyu Zhao;Samuel Schulter;Long Zhao;Zhixing Zhang;Vijay Kumar b g;Yumin Suh;Manmohan Chandraker;Dimitris N. Metaxas", "authorids": "~Shiyu_Zhao1;~Samuel_Schulter1;~Long_Zhao2;~Zhixing_Zhang1;~Vijay_Kumar_b_g1;~Yumin_Suh1;~Manmohan_Chandraker3;~Dimitris_N._Metaxas1", "gender": "M;;M;M;;;;", "homepage": "https://xiaofeng94.github.io/;https://samschulter.github.io;http://garyzhao.github.io/;https://zhang-zx.github.io/;;https://yuminsuh.github.io/;;", "dblp": ";27/9990;31/5383-3;;;119/1522;;", "google_scholar": "https://scholar.google.com.sg/citations?hl=en;VQ6dsFEAAAAJ;YTyBTmgAAAAJ;RhM5qHoAAAAJ;;a9k4nwQAAAAJ;;", "orcid": "0000-0002-4978-725X;;0000-0001-8921-8564;;;;;", "linkedin": "shiyu-zhao-8ba8ab301/;;garyzhao9012/;zhixing-zhang-174959198/;;;;", "or_profile": "~Shiyu_Zhao1;~Samuel_Schulter1;~Long_Zhao2;~Zhixing_Zhang1;~Vijay_Kumar_b_g1;~Yumin_Suh1;~Manmohan_Chandraker3;~Dimitris_N._Metaxas1", "aff": "Meta;NEC-Labs;Google DeepMind;Snap Inc.;;NEC-Labs;;", "aff_domain": "meta.com;nec-labs.com;google.com;snapchat.com;;nec-labs.com;;", "position": "Intern;Researcher;Research scientist;Intern;;Researcher;;", "bibtex": "@misc{\nzhao2024taming,\ntitle={Taming Self-Training for Open-Vocabulary Object Detection},\nauthor={Shiyu Zhao and Samuel Schulter and Long Zhao and Zhixing Zhang and Vijay Kumar b g and Yumin Suh and Manmohan Chandraker and Dimitris N. Metaxas},\nyear={2024},\nurl={https://openreview.net/forum?id=7QUwypJ8Vq}\n}", "github": "", "project": "", "reviewers": "wquL;NEiz;vFm8;rX3B", "site": "https://openreview.net/forum?id=7QUwypJ8Vq", "pdf_size": 11147330, "rating": "3;5;5;6", "confidence": "4;5;4;3", "soundness": "1;2;3;2", "contribution": "1;3;3;2", "presentation": "1;3;3;3", "wc_summary": "71;44;74;119", "wc_strengths": "31;41;17;45", "wc_weaknesses": "280;150;166;93", "wc_questions": "4;18;14;2", "wc_review": "386;253;271;259", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 77.0, 26.91653766738954 ], "wc_strengths_avg": [ 33.5, 10.805091392487155 ], "wc_weaknesses_avg": [ 172.25, 67.86889935751131 ], "wc_questions_avg": [ 9.5, 6.689544080129826 ], "wc_review_avg": [ 292.25, 54.51318647813573 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16942470371922372045&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Meta;NEC Laboratories;Google;Snap Inc.", "aff_unique_dep": "Meta Platforms, Inc.;;Google DeepMind;", "aff_unique_url": "https://meta.com;https://www.nec-labs.com;https://deepmind.com;https://www.snapinc.com", "aff_unique_abbr": "Meta;NEC-Labs;DeepMind;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "7QlKLvfVge", "title": "Directional Rank Reduction for Backdoor Defense", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent studies have indicated the effectiveness of neuron pruning for backdoor defense. In this work, we explore the limitations of pruning-based defense through theoretical and empirical investigations. We argue that pruning-based defense necessitates the removal of neurons that affect normal performance when the effect of backdoor is entangled across normal neurons. To address this challenge, we propose an extended neuron pruning framework, named \\emph{Directional Rank Reduction (\\method)}. \\method consists of three procedures: orthogonal transformation, pruning, and inverse transformation. Through the transformation of the feature space prior to pruning, \\method is able to focus the trigger effects on a limited number of neurons for more efficient pruning with less damage, outperforming existing pruning-based defense strategies. We implement \\method using Sarle's Bimodality Coefficient (SBC) which is optimized as the criterion for the transformation matrix based on the separability assumption of benign and poisoned features. Extensive experimental results demonstrate the superiority of our method. On average, our approach substantially reduces the ASR by 4.5x and increases the ACC by 1.45\\% compared with the recently strong baselines.", "keywords": "backdoor defense;backdoor attack;neuron pruning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Runkai Zheng;Jindong Wang;Xuanchang Xu;Li Liu;Jianze Li;Xing Xie", "authorids": "~Runkai_Zheng1;~Jindong_Wang1;~Xuanchang_Xu2;~Li_Liu8;~Jianze_Li1;~Xing_Xie3", "gender": "M;M;F;M;M;M", "homepage": ";https://github.com/EricXuXuanchang;https://liliu-avril.github.io/;http://www.sribd.cn/teacher/21;http://research.microsoft.com/en-us/people/xingx/;https://jd92.wang/", "dblp": ";;33/4528-36;195/6246;08/6809-1;19/2969-1", "google_scholar": ";;KQ2S01UAAAAJ;;5EQfAFIAAAAJ;hBZ_tKsAAAAJ", "orcid": ";;;0000-0002-0760-7994;0000-0002-8608-8482;0000-0002-4833-0880", "linkedin": "%E6%B6%A6%E9%94%B4-%E9%83%91-551606156/;;;;xingx/;jindong-wang/", "or_profile": "~Runkai_Zheng1;~Xuanchang_Xu2;~Li_Liu8;~Jianze_Li1;~Xing_Xie3;~Jindong_Wang4", "aff": "Carnegie Mellon University;National University of Singapore;The Hong Kong University of Science and Technology (Guangzhou);;Microsoft Research Asia;Microsoft Research", "aff_domain": "andrew.cmu.edu;nus.edu;hkust-gz.edu.cn;;microsoft.com;microsoft.com", "position": "MS student;MS student;Assistant Professor;;Senior Principal Researcher;Researcher", "bibtex": "@misc{\nzheng2024directional,\ntitle={Directional Rank Reduction for Backdoor Defense},\nauthor={Runkai Zheng and Jindong Wang and Xuanchang Xu and Li Liu and Jianze Li and Xing Xie},\nyear={2024},\nurl={https://openreview.net/forum?id=7QlKLvfVge}\n}", "github": "", "project": "", "reviewers": "X3hw;niUP;7hoR;1xjT", "site": "https://openreview.net/forum?id=7QlKLvfVge", "pdf_size": 546454, "rating": "5;5;5;6", "confidence": "3;4;4;4", "soundness": "2;3;2;3", "contribution": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "68;68;94;33", "wc_strengths": "44;46;30;30", "wc_weaknesses": "178;136;132;79", "wc_questions": "44;78;26;8", "wc_review": "334;328;282;150", "wc_reply_reviewers": "101;0;0;0", "wc_reply_authors": "1126;900;1661;630", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.75, 21.683807322516035 ], "wc_strengths_avg": [ 37.5, 7.533259586659682 ], "wc_weaknesses_avg": [ 131.25, 35.13812032536743 ], "wc_questions_avg": [ 39.0, 25.865034312755125 ], "wc_review_avg": [ 273.5, 74.0860985610661 ], "wc_reply_reviewers_avg": [ 25.25, 43.73428289111415 ], "wc_reply_authors_avg": [ 1079.25, 379.00354549792803 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ABg7mFflFPsJ:scholar.google.com/&scioq=Directional+Rank+Reduction+for+Backdoor+Defense&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Carnegie Mellon University;National University of Singapore;Hong Kong University of Science and Technology;Microsoft", "aff_unique_dep": ";;;Research", "aff_unique_url": "https://www.cmu.edu;https://www.nus.edu.sg;https://www.ust.hk;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "CMU;NUS;HKUST;MSR Asia", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Guangzhou;Asia", "aff_country_unique_index": "0;1;2;2;0", "aff_country_unique": "United States;Singapore;China" }, { "id": "7QncaLObzi", "title": "Binary Hyperbolic Embeddings", "track": "main", "status": "Reject", "tldr": "", "abstract": "As datasets continue to grow, vector-based search becomes more storage and compute intensive, requiring large-scale systems to support retrieval. Proposed solutions range from quantization techniques that balance speed and accuracy, to hashing methods that learn compact binary representations. This paper promotes the use of hyperbolic space for its compact nature whilst overcoming its slow retrieval via binarization. Specifically, we address hyperbolic space's inherent slowness by proving that its complex similarity calculations can be equated to a binary XOR operation. Our approach allows for 90% less storage and at least 4.7 times faster search while maintaining performance of full-precision Euclidean embeddings.", "keywords": "Hyperbolic;Binary;Hierarchical", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Teng Long;Pascal Mettes;Nanne Van Noord", "authorids": "~Teng_Long2;~Pascal_Mettes1;~Nanne_Van_Noord1", "gender": "M;M;M", "homepage": "http://tenglon.github.io/;https://staff.fnwi.uva.nl/p.s.m.mettes/;https://nanne.github.io/", "dblp": ";147/4008;123/5104.html", "google_scholar": "5Iv3ul0AAAAJ;https://scholar.google.nl/citations?user=sMQxA3AAAAAJ;wFDJzDkAAAAJ", "orcid": "0000-0002-2380-9502;0000-0001-9275-5942;0000-0002-5145-3603", "linkedin": "tenglong-926500116/;;", "or_profile": "~Teng_Long2;~Pascal_Mettes1;~Nanne_Van_Noord1", "aff": "University of Amsterdam, University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "ivi.uva.nl;uva.nl;uva.nl", "position": "Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nlong2024binary,\ntitle={Binary Hyperbolic Embeddings},\nauthor={Teng Long and Pascal Mettes and Nanne Van Noord},\nyear={2024},\nurl={https://openreview.net/forum?id=7QncaLObzi}\n}", "github": "", "project": "", "reviewers": "5iHW;4fSp;AY1W;d3do", "site": "https://openreview.net/forum?id=7QncaLObzi", "pdf_size": 10093397, "rating": "5;6;6;6", "confidence": "3;5;3;3", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "96;39;67;67", "wc_strengths": "32;41;26;72", "wc_weaknesses": "156;43;144;250", "wc_questions": "34;34;114;147", "wc_review": "318;157;351;536", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1077;373;370;424", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.25, 20.154093876927337 ], "wc_strengths_avg": [ 42.75, 17.711225254058512 ], "wc_weaknesses_avg": [ 148.25, 73.32930860167713 ], "wc_questions_avg": [ 82.25, 49.6405831956072 ], "wc_review_avg": [ 340.5, 134.63747620926353 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 561.0, 298.68461627609815 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4136104022654752726&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "id": "7Rf2j94H1x", "title": "Episode Transformer: Model-based Episodic Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Episodic Reinforcement Learning (ERL) with movement primitives (MPs) has recently achieved significant success, especially in sparse and non-Markovian reward scenarios. By reasoning directly at the trajectory level via MPs, ERL results in smoother, energy-efficient policies and improved exploration capabilities for many real-world tasks. However, these black-box optimization approaches have very poor data-efficiency making them impractical for real-world applications. To mitigate this drawback, we propose Episode Transformer, a model-based ERL algorithm. Here, we learn a transformer-based episodic world model. To perform control we train a policy, with trust region constraints, purely in the world model's imagination. We compare our approach to state-of-the-art step-based and episodic RL methods on a variety of challenging robotic tasks under dense, sparse, and non-Markovian reward settings. The results show that the Episode Transformer is able to learn high-quality policies that retain all the benefits of previous deep ERL methods while requiring up to 5x fewer environment samples.", "keywords": "Episodic RL;Model-based RL;Movement Primitives", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Ruben Jacob;Vaisakh Shaj;Philipp Becker;Gerhard Neumann", "authorids": "~Ruben_Jacob1;~Vaisakh_Shaj1;~Philipp_Becker1;~Gerhard_Neumann2", "gender": ";M;M;M", "homepage": ";;;https://alr.anthropomatik.kit.edu/", "dblp": ";190/3994;66/1316;60/4878", "google_scholar": ";;https://scholar.google.de/citations?user=jXx-LuQAAAAJ;https://scholar.google.com.tw/citations?user=GL360kMAAAAJ", "orcid": ";;;", "linkedin": "ruben-jacob-067735249/;;;", "or_profile": "~Ruben_Jacob1;~Vaisakh_Shaj1;~Philipp_Becker1;~Gerhard_Neumann1", "aff": ";Karlsruhe Institute of Technology;FZI Forschungszentrum Informatik ;Karlsruhe Institute of Technology", "aff_domain": ";kit.edu;fzi.de;kit.edu", "position": ";PhD student;Researcher;Full Professor", "bibtex": "@misc{\njacob2024episode,\ntitle={Episode Transformer: Model-based Episodic Reinforcement Learning},\nauthor={Ruben Jacob and Vaisakh Shaj and Philipp Becker and Gerhard Neumann},\nyear={2024},\nurl={https://openreview.net/forum?id=7Rf2j94H1x}\n}", "github": "", "project": "", "reviewers": "Q5Ym;in4T;kg3S;AjWh", "site": "https://openreview.net/forum?id=7Rf2j94H1x", "pdf_size": 2200445, "rating": "3;3;3;5", "confidence": "2;3;3;4", "soundness": "2;2;2;2", "contribution": "2;2;2;2", "presentation": "2;2;2;3", "wc_summary": "64;145;91;87", "wc_strengths": "13;35;54;15", "wc_weaknesses": "65;246;334;116", "wc_questions": "29;42;61;79", "wc_review": "171;468;540;297", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.75, 29.701641368786337 ], "wc_strengths_avg": [ 29.25, 16.67895380412093 ], "wc_weaknesses_avg": [ 190.25, 106.03389788176231 ], "wc_questions_avg": [ 52.75, 18.952242611363964 ], "wc_review_avg": [ 369.0, 144.42125882293092 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YNHREC3LGtsJ:scholar.google.com/&scioq=Episode+Transformer:+Model-based+Episodic+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Karlsruhe Institute of Technology;FZI Forschungszentrum Informatik", "aff_unique_dep": ";", "aff_unique_url": "https://www.kit.edu;https://www.fzi.de", "aff_unique_abbr": "KIT;FZI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "7Scc7Nl7lg", "title": "Revealing Vision-Language Integration in the Brain with Multimodal Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "We use multimodal deep neural networks to identify sites of multimodal integration in the human brain. These are regions where a multimodal language-vision model is better at predicting neural recordings (stereoelectroencephalography, SEEG) than either a unimodal language, unimodal vision model, or a linearly-integrated language-vision model. We use a wide range of state-of-the-art models spanning different architectures including Transformers and CNNs (ALBEF, BLIP, Flava, ConvNeXt, BEIT, SIMCLR, CLIP, SLIP) with different multimodal integration approaches to model the SEEG signal while subjects watched movies. As a key enabling step, we first demonstrate that the approach has the resolution to distinguish trained from randomly-initialized models for both language and vision; the inability to do so would fundamentally hinder further analysis. We show that trained models systematically outperform randomly initialized models in their ability to predict the SEEG signal. We then compare unimodal and multimodal models against one another. A key contribution is standardizing the methodology for doing so while carefully avoiding statistical artifacts. Since models all have different architectures, number of parameters, and training sets which can obscure the results, we then carry out a test between two controlled models: SLIP-Combo and SLIP-SimCLR which keep all of these attributes the same aside from multimodal input. Using this method, we identify neural sites (on average 141 out of 1090 total sites or 12.94\\%) and brain regions where multimodal integration is occurring. We find numerous new sites of multimodal integration, many of which lie around the temporoparietal junction, long theorized to be a hub of multimodal integration.", "keywords": "Vision and language in the brain;multimodal processing;encoding models", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/eb0f25e91d6c84d21fa6637156456d34106a4e27.zip", "author": "Vighnesh Subramaniam;Colin Conwell;Christopher Wang;Gabriel Kreiman;Boris Katz;Ignacio Cases;Andrei Barbu", "authorids": "~Vighnesh_Subramaniam1;~Colin_Conwell1;~Christopher_Wang1;~Gabriel_Kreiman1;~Boris_Katz1;~Ignacio_Cases2;~Andrei_Barbu3", "gender": ";;;M;M;Non-Binary;M", "homepage": "https://vsubramaniam851.github.io;;https://czlwang.github.io/;http://klab.tch.harvard.edu;http://people.csail.mit.edu/boris/boris.html;;https://0xab.com", "dblp": ";;;12/1367;k/BorisKatz;;58/8365", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;WxZ_6nsAAAAJ;FdNuUb8AAAAJ;9-TdgYMAAAAJ;t1rjgHgAAAAJ", "orcid": ";0000-0002-7754-1580;;0000-0003-3505-8475;;;", "linkedin": "vighnesh-subramaniam-34549717b/;;;kreiman/;;;andrei-barbu-1166131", "or_profile": "~Vighnesh_Subramaniam1;~Colin_Conwell1;~Christopher_Wang1;~Gabriel_Kreiman1;~Boris_Katz1;~Ignacio_Cases2;~Andrei_Barbu3", "aff": "Massachusetts Institute of Technology;Johns Hopkins University;Computer Science and Artificial Intelligence Laboratory, Electrical Engineering & Computer Science;Harvard Medical School;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;jhu.edu;csail.mit.edu;harvard.edu;mit.edu;mit.edu;mit.edu", "position": "MS student;Postdoc;PhD student;Full Professor;Principal Research Scientist;Postdoc;Researcher", "bibtex": "@misc{\nsubramaniam2024revealing,\ntitle={Revealing Vision-Language Integration in the Brain with Multimodal Networks},\nauthor={Vighnesh Subramaniam and Colin Conwell and Christopher Wang and Gabriel Kreiman and Boris Katz and Ignacio Cases and Andrei Barbu},\nyear={2024},\nurl={https://openreview.net/forum?id=7Scc7Nl7lg}\n}", "github": "", "project": "", "reviewers": "KL3N;JsYq;HcSy;gGN5;9iyA", "site": "https://openreview.net/forum?id=7Scc7Nl7lg", "pdf_size": 30079476, "rating": "3;3;6;6;6", "confidence": "4;5;3;5;4", "soundness": "1;2;3;2;3", "contribution": "1;2;2;3;3", "presentation": "1;2;3;3;3", "wc_summary": "73;114;42;289;55", "wc_strengths": "16;66;68;91;93", "wc_weaknesses": "252;436;175;799;125", "wc_questions": "92;3;1;244;45", "wc_review": "433;619;286;1423;318", "wc_reply_reviewers": "0;187;0;0;20", "wc_reply_authors": "782;1372;588;1221;801", "reply_reviewers": "0;1;0;0;1", "reply_authors": "1;3;1;2;2", "rating_avg": [ 4.8, 1.469693845669907 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "contribution_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 114.6, 90.51983208115224 ], "wc_strengths_avg": [ 66.8, 27.76616646208115 ], "wc_weaknesses_avg": [ 357.4, 244.7763060428848 ], "wc_questions_avg": [ 77.0, 89.87769467448528 ], "wc_review_avg": [ 615.8, 420.10731962202226 ], "wc_reply_reviewers_avg": [ 41.4, 73.21092814600836 ], "wc_reply_authors_avg": [ 952.8, 294.26341940513095 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.32732683535398854, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12071620120661180690&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;0;2;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Johns Hopkins University;Harvard University", "aff_unique_dep": ";;Medical School", "aff_unique_url": "https://web.mit.edu;https://www.jhu.edu;https://hms.harvard.edu", "aff_unique_abbr": "MIT;JHU;HMS", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Boston", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Removing Biases from Molecular Representations via Information Maximization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19360", "id": "7TOs9gjAg1", "author_site": "Chenyu Wang, Sharut Gupta, Caroline Uhler, Tommi Jaakkola", "tldr": "", "abstract": "High-throughput drug screening -- using cell imaging or gene expression measurements as readouts of drug effect -- is a critical tool in biotechnology to assess and understand the relationship between the chemical structure and biological activity of a drug. Since large-scale screens have to be divided into multiple experiments, a key difficulty is dealing with batch effects, which can introduce systematic errors and non-biological associations in the data. We propose InfoCORE, an Information maximization approach for COnfounder REmoval, to effectively deal with batch effects and obtain refined molecular representations. InfoCORE establishes a variational lower bound on the conditional mutual information of the latent representations given a batch identifier. It adaptively reweights samples to equalize their implied batch distribution. Extensive experiments on drug screening data reveal InfoCORE's superior performance in a multitude of tasks including molecular property prediction and molecule-phenotype retrieval. Additionally, we show results for how InfoCORE offers a versatile framework and resolves general distribution shifts and issues of data fairness by minimizing correlation with spurious features or removing sensitive attributes.", "keywords": "Molecular Representation;Batch Effect;Contrastive Learning;Information Maximization;Drug Discovery", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Chenyu Wang;Sharut Gupta;Caroline Uhler;Tommi S. Jaakkola", "authorids": "~Chenyu_Wang7;~Sharut_Gupta1;~Caroline_Uhler1;~Tommi_S._Jaakkola1", "gender": ";F;F;", "homepage": ";https://www.mit.edu/~sharut/;https://www.carolineuhler.com/;", "dblp": ";;66/10813;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=dIJFcaoAAAAJ;", "orcid": ";;;", "linkedin": ";sharut-gupta/;;", "or_profile": "~Chenyu_Wang7;~Sharut_Gupta1;~Caroline_Uhler1;~Tommi_S._Jaakkola1", "aff": ";Google;Electrical Engineering & Computer Science, Massachusetts Institute of Technology;", "aff_domain": ";google.com;eecs.mit.edu;", "position": ";Student Researcher;Associate Professor;", "bibtex": "@inproceedings{\nwang2024removing,\ntitle={Removing Biases from Molecular Representations via Information Maximization},\nauthor={Chenyu Wang and Sharut Gupta and Caroline Uhler and Tommi S. Jaakkola},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7TOs9gjAg1}\n}", "github": "", "project": "", "reviewers": "Tyeq;JEFo;g7wR;FJph", "pdf_size": 5869947, "rating": "6;6;6;8", "confidence": "2;3;3;3", "soundness": "3;2;3;3", "contribution": "3;2;2;3", "presentation": "3;2;3;4", "wc_summary": "107;63;204;96", "wc_strengths": "81;12;97;80", "wc_weaknesses": "85;27;419;159", "wc_questions": "30;278;137;2", "wc_review": "303;380;857;337", "wc_reply_reviewers": "20;42;254;155", "wc_reply_authors": "634;2445;1944;382", "reply_reviewers": "1;1;1;2", "reply_authors": "1;5;4;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 117.5, 52.5 ], "wc_strengths_avg": [ 67.5, 32.745228660065884 ], "wc_weaknesses_avg": [ 172.5, 149.80904512078035 ], "wc_questions_avg": [ 111.75, 108.40289433405364 ], "wc_review_avg": [ 469.25, 225.5242503590246 ], "wc_reply_reviewers_avg": [ 117.75, 93.86792583199014 ], "wc_reply_authors_avg": [ 1351.25, 866.2468975413418 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4157904962319405770&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=7TOs9gjAg1", "pdf": "https://openreview.net/pdf?id=7TOs9gjAg1", "email": ";google.com;eecs.mit.edu;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.google.com;https://web.mit.edu", "aff_unique_abbr": "Google;MIT", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "BooookScore: A systematic exploration of book-length summarization in the era of LLMs", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19359", "id": "7Ttk3RzDeu", "author_site": "Yapei Chang, Kyle Lo, Tanya Goyal, Mohit Iyyer", "tldr": "", "abstract": "Summarizing book-length documents ($>$100K tokens) that exceed the context window size of large language models (LLMs) requires first breaking the input document into smaller chunks and then prompting an LLM to merge, update, and compress chunk-level summaries. Despite the complexity and importance of this task, it has yet to be meaningfully studied due to the challenges of evaluation: existing book-length summarization datasets (e.g., BookSum) are in the pretraining data of most public LLMs, and existing evaluation methods struggle to capture errors made by modern LLM summarizers. In this paper, we present the first study of the coherence of LLM-based book-length summarizers implemented via two prompting workflows: (1) hierarchically merging chunk-level summaries, and (2) incrementally updating a running summary. We obtain 1193 fine-grained human annotations on GPT-4 generated summaries of 100 recently-published books and identify eight common types of coherence errors made by LLMs. Because human evaluation is expensive and time-consuming, we develop an automatic metric, BooookScore, that measures the proportion of sentences in a summary that do not contain any of the identified error types. BooookScore has high agreement with human annotations and allows us to systematically evaluate the impact of many other critical parameters (e.g., chunk size, base LLM) while saving \\$15K USD and 500 hours in human evaluation costs. We find that closed-source LLMs such as GPT-4 and Claude 2 produce summaries with higher BooookScore than those generated by open-source models. While LLaMA 2 falls behind other models, Mixtral achieves performance on par with GPT-3.5-Turbo. Incremental updating yields lower BooookScore but higher level of detail than hierarchical merging, a trade-off sometimes preferred by annotators. We release code and annotations to spur more principled research on book-length summarization.", "keywords": "summarization;evaluation;long context;prompting;LLM", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Yapei Chang;Kyle Lo;Tanya Goyal;Mohit Iyyer", "authorids": "~Yapei_Chang1;~Kyle_Lo1;~Tanya_Goyal1;~Mohit_Iyyer1", "gender": "F;;F;M", "homepage": "https://lilakk.github.io/;https://kyleclo.github.io/;;http://cs.umass.edu/~miyyer", "dblp": "316/9933;220/2020;176/9145;148/9178", "google_scholar": "qCjnm-UAAAAJ;VJS12uMAAAAJ;w72MSFoAAAAJ;rBVA5tcAAAAJ", "orcid": ";;;", "linkedin": "ella-yapei-chang/;kylelo/;;", "or_profile": "~Yapei_Chang1;~Kyle_Lo1;~Tanya_Goyal1;~Mohit_Iyyer1", "aff": "University of Massachusetts at Amherst;Allen Institute for Artificial Intelligence;Princeton University;University of Massachusetts Amherst", "aff_domain": "umass.edu;allenai.org;princeton.edu;cs.umass.edu", "position": "PhD student;Researcher;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nchang2024booookscore,\ntitle={BooookScore: A systematic exploration of book-length summarization in the era of {LLM}s},\nauthor={Yapei Chang and Kyle Lo and Tanya Goyal and Mohit Iyyer},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7Ttk3RzDeu}\n}", "github": "", "project": "", "reviewers": "vK8q;hJ5Z;rPVA;SAFf", "pdf_size": 599451, "rating": "8;8;8;10", "confidence": "4;4;4;5", "soundness": "3;3;4;4", "contribution": "3;4;4;4", "presentation": "3;4;4;4", "wc_summary": "39;202;36;61", "wc_strengths": "52;55;50;250", "wc_weaknesses": "145;145;45;43", "wc_questions": "114;67;1;11", "wc_review": "350;469;132;365", "wc_reply_reviewers": "546;66;0;0", "wc_reply_authors": "796;604;288;234", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 8.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 68.52189431123456 ], "wc_strengths_avg": [ 101.75, 85.61067398403075 ], "wc_weaknesses_avg": [ 94.5, 50.50495025242575 ], "wc_questions_avg": [ 48.25, 45.53775905773142 ], "wc_review_avg": [ 329.0, 122.6234072271685 ], "wc_reply_reviewers_avg": [ 153.0, 228.49288829195538 ], "wc_reply_authors_avg": [ 480.5, 230.54880177524237 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17968620361685249119&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=7Ttk3RzDeu", "pdf": "https://openreview.net/pdf?id=7Ttk3RzDeu", "email": "umass.edu;allenai.org;princeton.edu;cs.umass.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Massachusetts Amherst;Allen Institute for Artificial Intelligence;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umass.edu;https://allenai.org;https://www.princeton.edu", "aff_unique_abbr": "UMass Amherst;AI2;Princeton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "7U5QE9T4hI", "title": "Learning to Extrapolate and Adjust: Two-Stage Meta-Learning for Concept Drift in Online Time Series Forecasting", "track": "main", "status": "Reject", "tldr": "", "abstract": "The non-stationary nature of time series data in many real-world applications makes accurate time series forecasting challenging. In this paper, we consider concept drift where the underlying distribution or environment of time series changes. We first classify concepts into two categories, macro-drift corresponding to stable and long-term changes and micro-drift referring to sudden or short-term changes. Next, we propose a unified meta-learning framework called LEAF (Learning to Extrapolate and Adjust for Forecasting). Specifically, an extrapolation module is first meta-learnt to track the dynamics of the prediction model in latent space and extrapolate to the future considering macro-drift. Then an adjustment module incorporates meta-learnable surrogate loss to capture sample-specific micro-drift patterns. Through this two-stage framework, different types of concept drifts can be handled. In particular, LEAF is model-agnostic and can be applied to any deep prediction model. To further advance the research of concept drift on time series, we open source three electric load time series datasets collected from real-world scenarios, which exhibit diverse and typical concept drifts and are ideal benchmark datasets for further research. Extensive experiments on multiple datasets demonstrate the effectiveness of LEAF.", "keywords": "time series forecasting;concept drift;meta learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/c2d1a81a6325615723201ac7aa0fdf556cc11293.pdf", "author": "Zhaoyang Zhu;Weiqi Chen;YiFan Zhang;Qingsong Wen;Liang Sun", "authorids": "~Zhaoyang_Zhu1;~Weiqi_Chen1;~YiFan_Zhang8;~Qingsong_Wen2;~Liang_Sun2", "gender": ";M;M;M;M", "homepage": "https://github.com/DAMO-DI-ML;https://github.com/DAMO-DI-ML;https://www.linkedin.com/in/liang-sun-a0a87621/;https://sites.google.com/site/qingsongwen8/;https://yfzhang114.github.io/", "dblp": "https://dblp.org/rec/journals/corr/abs-2403-14949;;18/5837-1;27/561;", "google_scholar": ";dMg_soMAAAAJ;D_cOMBgAAAAJ;vjPJvwYAAAAJ;lUnt8X4AAAAJ", "orcid": "0009-0009-0265-9910;0009-0007-9246-9402;0009-0002-5835-7259;0000-0003-4516-2524;0000-0002-6227-0183", "linkedin": ";;;qingsong-wen-22814156/;", "or_profile": "~Zhaoyang_Zhu1;~Weiqi_Chen1;~Liang_Sun2;~Qingsong_Wen1;~yifan_zhang7", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Squirrel Ai Learning;Institute of automation, Chinese academy of science", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;squirrelai.com;nlpr.ia.ac.cn", "position": "Researcher;Researcher;Staff Software Engineer;Principal Researcher;PhD student", "bibtex": "@misc{\nzhu2024learning,\ntitle={Learning to Extrapolate and Adjust: Two-Stage Meta-Learning for Concept Drift in Online Time Series Forecasting},\nauthor={Zhaoyang Zhu and Weiqi Chen and YiFan Zhang and Qingsong Wen and Liang Sun},\nyear={2024},\nurl={https://openreview.net/forum?id=7U5QE9T4hI}\n}", "github": "", "project": "", "reviewers": "rgPM;RMU1;CCky;9YY8;5qcj;x7gB", "site": "https://openreview.net/forum?id=7U5QE9T4hI", "pdf_size": 2673574, "rating": "3;5;5;5;6;8", "confidence": "4;4;4;4;3;4", "soundness": "2;2;3;3;3;3", "contribution": "2;2;3;3;3;3", "presentation": "2;2;3;3;3;3", "wc_summary": "63;150;96;197;138;68", "wc_strengths": "34;50;50;81;67;61", "wc_weaknesses": "290;183;46;197;214;88", "wc_questions": "35;122;5;72;103;5", "wc_review": "422;505;197;547;522;222", "wc_reply_reviewers": "125;189;20;0;0;63", "wc_reply_authors": "1245;1622;406;656;1135;549", "reply_reviewers": "1;1;1;0;0;1", "reply_authors": "4;3;1;2;2;1", "rating_avg": [ 5.333333333333333, 1.4907119849998596 ], "confidence_avg": [ 3.8333333333333335, 0.3726779962499649 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 118.66666666666667, 47.73421786889941 ], "wc_strengths_avg": [ 57.166666666666664, 14.82584080433739 ], "wc_weaknesses_avg": [ 169.66666666666666, 80.96638534327413 ], "wc_questions_avg": [ 57.0, 45.566800780100124 ], "wc_review_avg": [ 402.5, 141.9257434952048 ], "wc_reply_reviewers_avg": [ 66.16666666666667, 70.05573178231425 ], "wc_reply_authors_avg": [ 935.5, 431.0347047125865 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.1666666666666665, 1.0671873729054748 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.19999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zuPLExgdwQ8J:scholar.google.com/&scioq=Learning+to+Extrapolate+and+Adjust:+Two-Stage+Meta-Learning+for+Concept+Drift+in+Online+Time+Series+Forecasting&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Alibaba Group;Squirrel Ai Learning;Chinese Academy of Sciences", "aff_unique_dep": ";;Institute of Automation", "aff_unique_url": "https://www.alibaba.com;https://www.squirrelai.com/;http://www.ia.cas.cn", "aff_unique_abbr": "Alibaba;;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7UHlKybsQM", "title": "EFFICIENT QUANTUM STATE RECONSTRUCTION USING UNSUPERVISED LEARNING FOR QUANTUM CIRCUIT CUTTING", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Current quantum computer (QC) fabrication encounters challenges when attempting to scale up the number of qubits. These challenges include errors, physical limitations, interference, and various other factors. As a remedy, quantum circuit cutting holds the promise for studying large quantum systems with the limited qubit capacity of quantum computers today. With quantum circuit cutting, the output of a large quantum circuit could be obtained through classical post-processing of fragmented circuit outputs acquired through different measurement and preparation bases. However, such reconstruction process results in exponential quantum measurement cost with the increase in the number of circuit cuts. In this paper, we demonstrate efficient state reconstruction using a Restricted Boltzmann Machine (RBM) with polynomial resource scaling. We explore the benefits of unsupervised learning for simulating extensive quantum systems, exemplified by the reconstruction of highly entangled multi-qubit Greenberger\u2013Horne\u2013Zeilinger (GHZ) states from fragmented circuits. Our experiments illustrate that fragmented GHZ circuits, at the state-of-the-art scale of up to $18$ qubits, can be reconstructed with near-perfect fidelity using only $100$ sample measurements compared to $4^{18}$ sample measurements needed otherwise.", "keywords": "Unsupervised learning;state tomography;quantum computing", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "vinitha balachandran;Benjamin Chen Ming Choong;Nitin Shivaraman;Zhehui Wang;Liwei Yang;Rick Siow Mong Goh;Tao Luo", "authorids": "~vinitha_balachandran1;~Benjamin_Chen_Ming_Choong1;~Nitin_Shivaraman1;~Zhehui_Wang2;~Liwei_Yang2;~Rick_Siow_Mong_Goh1;~Tao_Luo2", "gender": "F;;M;M;;M;", "homepage": ";https://www.linkedin.com/in/benjamin-choong-ba6001179/;https://nitinshivaraman.github.io;;https://sites.google.com/view/rickgoh/home;;http://zhehui-wang.github.io", "dblp": ";322/4020;;;https://dblp.uni-trier.de/pers/g/Goh:Rick_Siow_Mong;43/4720-14;", "google_scholar": "https://scholar.google.com.sg/citations?user=b7LGxKkAAAAJ;;umM6yZsAAAAJ;;https://scholar.google.com.sg/citations?user=fBsBJjoAAAAJ;d4KZI8MAAAAJ;", "orcid": ";;0000-0002-3208-8495;0000-0002-0327-714X;0000-0001-9116-1595;0000-0002-3415-3676;", "linkedin": ";;nitinshivaraman/;;rickgoh/;;", "or_profile": "~vinitha_balachandran1;~Benjamin_Chen_Ming_Choong1;~Nitin_Shivaraman1;~Liwei_Yang2;~Rick_Siow_Mong_Goh1;~Tao_Luo2;~zhehui_wang1", "aff": "Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR", "aff_domain": "ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg", "position": "Researcher;Researcher;Researcher;Researcher;Director;Researcher;Researcher", "bibtex": "@misc{\nbalachandran2024efficient,\ntitle={{EFFICIENT} {QUANTUM} {STATE} {RECONSTRUCTION} {USING} {UNSUPERVISED} {LEARNING} {FOR} {QUANTUM} {CIRCUIT} {CUTTING}},\nauthor={vinitha balachandran and Benjamin Chen Ming Choong and Nitin Shivaraman and Zhehui Wang and Liwei Yang and Rick Siow Mong Goh and Tao Luo},\nyear={2024},\nurl={https://openreview.net/forum?id=7UHlKybsQM}\n}", "github": "", "project": "", "reviewers": "kuyP;YKiP;DHPz;GKSV", "site": "https://openreview.net/forum?id=7UHlKybsQM", "pdf_size": 2249405, "rating": "3;3;3;5", "confidence": "4;4;4;3", "soundness": "2;3;3;2", "contribution": "1;1;2;2", "presentation": "2;3;2;3", "wc_summary": "63;53;73;59", "wc_strengths": "21;48;49;98", "wc_weaknesses": "89;150;65;327", "wc_questions": "42;276;228;48", "wc_review": "215;527;415;532", "wc_reply_reviewers": "26;10;0;0", "wc_reply_authors": "508;255;139;190", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.0, 7.280109889280518 ], "wc_strengths_avg": [ 54.0, 27.7758888246623 ], "wc_weaknesses_avg": [ 157.75, 102.51188955433413 ], "wc_questions_avg": [ 148.5, 104.90352710943517 ], "wc_review_avg": [ 422.25, 128.47446244293064 ], "wc_reply_reviewers_avg": [ 9.0, 10.63014581273465 ], "wc_reply_authors_avg": [ 273.0, 141.76917859675987 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YBATzIG-VC4J:scholar.google.com/&scioq=EFFICIENT+QUANTUM+STATE+RECONSTRUCTION+USING+UNSUPERVISED+LEARNING+FOR+QUANTUM+CIRCUIT+CUTTING&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Institute of High Performance Computing", "aff_unique_dep": "", "aff_unique_url": "https://www.ihpc.a-star.edu.sg", "aff_unique_abbr": "IHPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Beam Enumeration: Probabilistic Explainability For Sample Efficient Self-conditioned Molecular Design", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19358", "id": "7UhxsmbdaQ", "author_site": "Jeff Guo, Philippe Schwaller", "tldr": "", "abstract": "Generative molecular design has moved from proof-of-concept to real-world applicability, as marked by the surge in very recent papers reporting experimental validation. Key challenges in explainability and sample efficiency present opportunities to enhance generative design to directly optimize expensive high-fidelity oracles and provide actionable insights to domain experts. Here, we propose Beam Enumeration to exhaustively enumerate the most probable sub-sequences from language-based molecular generative models and show that molecular substructures can be extracted. When coupled with reinforcement learning, extracted substructures become meaningful, providing a source of explainability and improving sample efficiency through self-conditioned generation. Beam Enumeration is generally applicable to any language-based molecular generative model and notably further improves the performance of the recently reported Augmented Memory algorithm, which achieved the new state-of-the-art on the Practical Molecular Optimization benchmark for sample efficiency. The combined algorithm generates more high reward molecules and faster, given a fixed oracle budget. Beam Enumeration shows that improvements to explainability and sample efficiency for molecular design can be made synergistic.", "keywords": "Molecular generative models;reinforcement learning;natural language processing;drug discovery;sample-efficiency;explainability", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Jeff Guo;Philippe Schwaller", "authorids": "~Jeff_Guo1;~Philippe_Schwaller1", "gender": "M;M", "homepage": "https://guojeff.github.io/;https://schwallergroup.github.io", "dblp": ";209/9632", "google_scholar": "yzhfk_YAAAAJ;Tz0I4ywAAAAJ", "orcid": "0000-0002-4633-3199;0000-0003-3046-6576", "linkedin": "jeffguo1/;", "or_profile": "~Jeff_Guo1;~Philippe_Schwaller1", "aff": "Microsoft AI4Science;Swiss Federal Institute of Technology Lausanne", "aff_domain": "research.microsoft.com;epfl.ch", "position": "Intern;Assistant Professor", "bibtex": "@inproceedings{\nguo2024beam,\ntitle={Beam Enumeration: Probabilistic Explainability For Sample Efficient Self-conditioned Molecular Design},\nauthor={Jeff Guo and Philippe Schwaller},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7UhxsmbdaQ}\n}", "github": "", "project": "", "reviewers": "wYf5;d4Xh;La1F;3svX", "pdf_size": 19002272, "rating": "3;8;8;8", "confidence": "4;5;4;4", "soundness": "2;3;4;4", "contribution": "1;3;3;4", "presentation": "2;3;4;3", "wc_summary": "34;78;90;150", "wc_strengths": "19;52;164;123", "wc_weaknesses": "205;202;488;67", "wc_questions": "9;92;12;23", "wc_review": "267;424;754;363", "wc_reply_reviewers": "0;140;137;0", "wc_reply_authors": "2112;3411;2552;831", "reply_reviewers": "0;2;4;0", "reply_authors": "4;8;5;2", "rating_avg": [ 6.75, 2.165063509461097 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.0, 41.42463035441596 ], "wc_strengths_avg": [ 89.5, 57.11610981150589 ], "wc_weaknesses_avg": [ 240.5, 153.3794314763228 ], "wc_questions_avg": [ 34.0, 33.88952640566109 ], "wc_review_avg": [ 452.0, 183.1215443359956 ], "wc_reply_reviewers_avg": [ 69.25, 69.25812226735576 ], "wc_reply_authors_avg": [ 2226.5, 931.3325131229984 ], "reply_reviewers_avg": [ 1.5, 1.6583123951777 ], "reply_authors_avg": [ 4.75, 2.165063509461097 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=101114070671548311&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7UhxsmbdaQ", "pdf": "https://openreview.net/pdf?id=7UhxsmbdaQ", "email": "research.microsoft.com;epfl.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "AI4Science;", "aff_unique_url": "https://www.microsoft.com;https://www.epfl.ch", "aff_unique_abbr": "Microsoft;EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Provable Compositional Generalization for Object-Centric Learning", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19357", "id": "7VPTUWkiDQ", "author_site": "Thadd\u00e4us Wiedemer, Jack Brady, Alexander Panfilov, Attila Juhos, Matthias Bethge, Wieland Brendel", "tldr": "", "abstract": "Learning representations that generalize to novel compositions of known concepts is crucial for bridging the gap between human and machine perception. One prominent effort is learning object-centric representations, which are widely conjectured to enable compositional generalization. Yet, it remains unclear when this conjecture will be true, as a principled theoretical or empirical understanding of compositional generalization is lacking. In this work, we investigate when compositional generalization is guaranteed for object-centric representations through the lens of identifiability theory. We show that autoencoders that satisfy structural assumptions on the decoder and enforce encoder-decoder consistency will learn object-centric representations that provably generalize compositionally. We validate our theoretical result and highlight the practical relevance of our assumptions through experiments on synthetic image data.", "keywords": "compositional generalization;identifiability;object-centric learning;generalization;OOD generalization;unsupervised learning;slot attention;disentanglement;autoencoders;representation learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/33fd1c9b517dded8403690a54c5761cbb95fa832.zip", "author": "Thadd\u00e4us Wiedemer;Jack Brady;Alexander Panfilov;Attila Juhos;Matthias Bethge;Wieland Brendel", "authorids": "~Thadd\u00e4us_Wiedemer1;~Jack_Brady1;~Alexander_Panfilov1;~Attila_Juhos1;~Matthias_Bethge1;~Wieland_Brendel1", "gender": "M;;M;;M;M", "homepage": ";https://github.com/JackBrady;https://kotekjedi.github.io/;;https://bethgelab.org;", "dblp": "327/3433;;305/8752;228/6943;77/3005;37/11107", "google_scholar": "aeCiRSYAAAAJ;;https://scholar.google.com/citations?hl=en;35hg1Z8AAAAJ;https://scholar.google.com/citations?hl=en;v-JL-hsAAAAJ", "orcid": "0009-0003-6280-0804;;;;;", "linkedin": "thaddaeuswiedemer/;;kotekjedi/;;;", "or_profile": "~Thadd\u00e4us_Wiedemer1;~Jack_Brady1;~Alexander_Panfilov1;~Attila_Juhos1;~Matthias_Bethge1;~Wieland_Brendel1", "aff": "Max Planck Institute for Intelligent Systems;Max-Planck Institute;University of Tuebingen;Max-Planck Institute for Intelligent Systems;University of Tuebingen;ELLIS Institute T\u00fcbingen", "aff_domain": "is.tuebingen.mpg.de;mpg.de;tuebingen.de;mpg.tuebingen.de;uni-tuebingen.de;tue.ellis.eu", "position": "PhD student;PhD student;MS student;PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nwiedemer2024provable,\ntitle={Provable Compositional Generalization for Object-Centric Learning},\nauthor={Thadd{\\\"a}us Wiedemer and Jack Brady and Alexander Panfilov and Attila Juhos and Matthias Bethge and Wieland Brendel},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7VPTUWkiDQ}\n}", "github": "", "project": "", "reviewers": "KGuC;JA2G;wPGp", "pdf_size": 1564010, "rating": "6;8;8", "confidence": "3;4;3", "soundness": "2;4;3", "contribution": "2;3;3", "presentation": "3;4;4", "wc_summary": "173;92;202", "wc_strengths": "53;45;122", "wc_weaknesses": "117;38;89", "wc_questions": "3;84;19", "wc_review": "346;259;432", "wc_reply_reviewers": "0;42;19", "wc_reply_authors": "730;631;315", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 155.66666666666666, 46.549853800940014 ], "wc_strengths_avg": [ 73.33333333333333, 34.56716489515576 ], "wc_weaknesses_avg": [ 81.33333333333333, 32.70406023042943 ], "wc_questions_avg": [ 35.333333333333336, 35.02697373295171 ], "wc_review_avg": [ 345.6666666666667, 70.62734755193786 ], "wc_reply_reviewers_avg": [ 20.333333333333332, 17.172329163188344 ], "wc_reply_authors_avg": [ 558.6666666666666, 176.97520228049527 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10657915378413188094&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=7VPTUWkiDQ", "pdf": "https://openreview.net/pdf?id=7VPTUWkiDQ", "email": "is.tuebingen.mpg.de;mpg.de;tuebingen.de;mpg.tuebingen.de;uni-tuebingen.de;tue.ellis.eu", "author_num": 6, "aff_unique_index": "0;1;2;3;2;4", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;University of Tuebingen;Max-Planck Institute for Intelligent Systems;ELLIS Institute", "aff_unique_dep": "Intelligent Systems;;;;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpg.de;https://www.uni-tuebingen.de/;https://www.mpi-is.mpg.de;https://ellis.eu/", "aff_unique_abbr": "MPI-IS;MPG;Uni T\u00fcbingen;MPI-IS;ELLIS", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "7VVGO0kuuY", "title": "Learning Causal Dynamics Models in Object-Oriented Environments", "track": "main", "status": "Reject", "tldr": "", "abstract": "Causal Dynamics Models (CDMs) have demonstrated significant potential in addressing various challenges in reinforcement learning. Recent studies have incorporated causal discovery to capture the causal dependencies among environmental variables in the learning of CDMs. However, the learning of CDMs is still confined to small-scale environments due to computational complexity and sample efficiency constraints. This paper aims to extend CDMs to large-scale object-oriented environments, which consist of a multitude of objects classified into different categories. We introduce the Object-Oriented CDM (OOCDM) that shares causalities and parameters among objects belonging to the same class. Furthermore, we propose a learning method for OOCDM that enables it to adapt to a varying number of objects. Experimental results from large-scale tasks indicate that OOCDM outperforms existing CDMs in terms of causal discovery, prediction accuracy, generalization, and computational efficiency.", "keywords": "reinforcement learning;causality;dynamics model", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/f993e5fe10e42ccfb63a0b4a23195036c43cc171.zip", "author": "Zhongwei Yu;Jingqing Ruan;Dengpeng Xing", "authorids": "~Zhongwei_Yu1;~Jingqing_Ruan1;~Dengpeng_Xing1", "gender": "M;F;M", "homepage": ";https://github.com/Amanda-1997/;https://people.ucas.edu.cn/~xingdengpeng?language=en", "dblp": "96/4996;304/3544;85/8134", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0000-0003-3372-2256;0000-0002-4857-9053;", "linkedin": ";;", "or_profile": "~Zhongwei_Yu1;~Jingqing_Ruan1;~Dengpeng_Xing1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "MS student;PhD student;Associate Professor", "bibtex": "@misc{\nyu2024learning,\ntitle={Learning Causal Dynamics Models in Object-Oriented Environments},\nauthor={Zhongwei Yu and Jingqing Ruan and Dengpeng Xing},\nyear={2024},\nurl={https://openreview.net/forum?id=7VVGO0kuuY}\n}", "github": "", "project": "", "reviewers": "LMvE;miEU;dZfY;mQjQ;6VtU", "site": "https://openreview.net/forum?id=7VVGO0kuuY", "pdf_size": 1852025, "rating": "5;6;6;6;6", "confidence": "4;4;2;3;3", "soundness": "3;3;3;3;2", "contribution": "3;2;3;3;2", "presentation": "3;1;2;2;3", "wc_summary": "107;95;106;173;73", "wc_strengths": "74;76;122;118;67", "wc_weaknesses": "428;436;433;371;168", "wc_questions": "75;227;165;11;73", "wc_review": "684;834;826;673;381", "wc_reply_reviewers": "267;55;264;38;125", "wc_reply_authors": "2201;1279;1542;594;1268", "reply_reviewers": "3;2;3;1;1", "reply_authors": "5;3;5;2;3", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 110.8, 33.420951512486894 ], "wc_strengths_avg": [ 91.4, 23.5762592452662 ], "wc_weaknesses_avg": [ 367.2, 102.42538747790998 ], "wc_questions_avg": [ 110.2, 76.30045871421744 ], "wc_review_avg": [ 679.6, 164.01048746955175 ], "wc_reply_reviewers_avg": [ 149.8, 98.87244307692615 ], "wc_reply_authors_avg": [ 1376.8, 517.8368082707138 ], "reply_reviewers_avg": [ 2.0, 0.8944271909999159 ], "reply_authors_avg": [ 3.6, 1.2 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5345224838248488, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17002781624394995765&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Human Feedback is not Gold Standard", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19356", "id": "7W3GLNImfS", "author_site": "Tom Hosking, Phil Blunsom, Max Bartolo", "tldr": "", "abstract": "Human feedback has become the de facto standard for evaluating the performance of Large Language Models, and is increasingly being used as a training objective. However, it is not clear which properties of a generated output this single `preference' score captures. We hypothesise that preference scores are subjective and open to undesirable biases. We critically analyse the use of human feedback for both training and evaluation, to verify whether it fully captures a range of crucial error criteria. We find that while preference scores have fairly good coverage, they under-represent important aspects like factuality. We further hypothesise that both preference scores and error annotation may be affected by confounders, and leverage instruction-tuned models to generate outputs that vary along two possible confounding dimensions: assertiveness and complexity. We find that the assertiveness of an output skews the perceived rate of factuality errors, indicating that human annotations are not a fully reliable evaluation metric or training objective. Finally, we offer preliminary evidence that using human feedback as a training objective disproportionately increases the assertiveness of model outputs. We encourage future work to carefully consider whether preference scores are well aligned with the desired objective.", "keywords": "human evaluation;large language models;evaluation;natural language generation", "primary_area": "generative models", "supplementary_material": "", "author": "Tom Hosking;Phil Blunsom;Max Bartolo", "authorids": "~Tom_Hosking1;~Phil_Blunsom1;~Max_Bartolo1", "gender": ";;", "homepage": ";;https://maxbartolo.com", "dblp": ";96/4705;227/3290", "google_scholar": ";https://scholar.google.co.uk/citations?user=eJwbbXEAAAAJ;jPSWYn4AAAAJ", "orcid": ";;0009-0007-3301-7895", "linkedin": ";;maxbartolo/", "or_profile": "~Tom_Hosking1;~Phil_Blunsom1;~Max_Bartolo1", "aff": ";Department of Computer Science, University of Oxford;University College London", "aff_domain": ";cs.ox.ac.uk;ucl.ac.uk", "position": ";Associate Professor;PhD student", "bibtex": "@inproceedings{\nhosking2024human,\ntitle={Human Feedback is not Gold Standard},\nauthor={Tom Hosking and Phil Blunsom and Max Bartolo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7W3GLNImfS}\n}", "github": "", "project": "", "reviewers": "Rbzk;iLy7;knEY;mWEt", "pdf_size": 402977, "rating": "6;6;6;8", "confidence": "3;4;4;3", "soundness": "3;3;3;4", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "140;91;433;224", "wc_strengths": "72;53;168;111", "wc_weaknesses": "111;515;198;417", "wc_questions": "37;139;5;73", "wc_review": "360;798;804;825", "wc_reply_reviewers": "91;169;0;218", "wc_reply_authors": "517;1257;526;747", "reply_reviewers": "1;1;0;1", "reply_authors": "2;3;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 222.0, 130.77652694577876 ], "wc_strengths_avg": [ 101.0, 43.971581731841304 ], "wc_weaknesses_avg": [ 310.25, 162.4951922365705 ], "wc_questions_avg": [ 63.5, 49.78704650810289 ], "wc_review_avg": [ 696.75, 194.68098905645616 ], "wc_reply_reviewers_avg": [ 119.5, 82.53029746705145 ], "wc_reply_authors_avg": [ 761.75, 300.40420686135536 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9519322896122389978&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7W3GLNImfS", "pdf": "https://openreview.net/pdf?id=7W3GLNImfS", "email": ";cs.ox.ac.uk;ucl.ac.uk", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oxford;University College London", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ox.ac.uk;https://www.ucl.ac.uk", "aff_unique_abbr": "Oxford;UCL", "aff_campus_unique_index": "0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "7W4boWjb3Q", "title": "Partitioned-Learned Count-Min Sketch", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose Partitioned Learned Count-Min Sketch (PL-CMS), a new approach to learning augmented frequent item identification in data streams. Our method builds on the learned Count-Min Sketch (LCMS) algorithm of Hsu et al. (ICLR 2019), which combines a standard Count-Min Sketch frequency estimation data structure with a learned model, by partitioning items in the input stream into two sets. Items with sufficiently high predicted frequencies have their frequencies tracked exactly, while the remaining items, with low predicted frequencies, are placed into the Count-Min Sketch data structure. \n \nInspired by an approach of Vaidya et al. for learning augmented Bloom filters (ICLR 2021), our PL-CMS algorithm partitions items into different sets, based on multiple predicted frequency thresholds. Each set is handled by a separate Count-Min Sketch data structure. Unlike classic LCMS, this allows the algorithm to take advantage of the full prediction space of the learned model. We demonstrate that, given fixed partitioning thresholds, the parameters of our data structure can be efficiently optimized using a convex program. Empirically, we show that, on a variety of benchmarks, PL-CMS obtains a lower false positive rate for frequent item identification as compared to LCMS and standard Count-Min Sketch.", "keywords": "count-min sketch;heavy hitters;frequent items;learning augmented algorithms;streaming algorithms", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Thuy Trang Nguyen;Cameron N Musco", "authorids": "~Thuy_Trang_Nguyen1;~Cameron_N_Musco1", "gender": "F;M", "homepage": ";https://people.cs.umass.edu/~cmusco/", "dblp": ";149/2327", "google_scholar": "dmFU5vcAAAAJ;EeYGZCwAAAAJ", "orcid": ";", "linkedin": "thuytrang--nguyen/;", "or_profile": "~Thuy_Trang_Nguyen1;~Cameron_N_Musco1", "aff": "University of Massachusetts at Amherst;University of Massachusetts, Amherst", "aff_domain": "umass.edu;umass.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nnguyen2024partitionedlearned,\ntitle={Partitioned-Learned Count-Min Sketch},\nauthor={Thuy Trang Nguyen and Cameron N Musco},\nyear={2024},\nurl={https://openreview.net/forum?id=7W4boWjb3Q}\n}", "github": "", "project": "", "reviewers": "C7mx;jB1e;4TXw;PmnJ", "site": "https://openreview.net/forum?id=7W4boWjb3Q", "pdf_size": 1331632, "rating": "5;5;6;6", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "contribution": "2;2;2;2", "presentation": "3;3;3;4", "wc_summary": "113;161;55;116", "wc_strengths": "70;55;61;48", "wc_weaknesses": "118;121;124;102", "wc_questions": "227;111;2;77", "wc_review": "528;448;242;343", "wc_reply_reviewers": "0;125;59;0", "wc_reply_authors": "829;746;911;646", "reply_reviewers": "0;1;2;0", "reply_authors": "1;1;3;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 111.25, 37.632266740126084 ], "wc_strengths_avg": [ 58.5, 8.077747210701755 ], "wc_weaknesses_avg": [ 116.25, 8.496322733983215 ], "wc_questions_avg": [ 104.25, 81.102943842995 ], "wc_review_avg": [ 390.25, 107.84334703633785 ], "wc_reply_reviewers_avg": [ 46.0, 51.58003489723519 ], "wc_reply_authors_avg": [ 783.0, 98.28275535413117 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Jb3gORY006sJ:scholar.google.com/&scioq=Partitioned-Learned+Count-Min+Sketch&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "7W4rbphLht", "title": "A Semi-smooth, Self-shifting, and Singular Newton Method for Sparse Optimal Transport", "track": "main", "status": "Reject", "tldr": "", "abstract": "Newton's method is an important second-order optimization algorithm that has been extensively studied. However, many challenging optimization problems break the classical assumptions of Newton's method. For example, the objective function may not be twice differentiable, and the optimal solution may be non-unique. In this article, we propose a general Newton-type algorithm named S5N, to solve problems that have possibly non-differentiable gradients and non-isolated solutions, a setting highly motivated by the sparse optimal transport problem. Compared with existing Newton-type approaches, the proposed S5N algorithm has broad applicability, does not require hyperparameter tuning, and possesses rigorous global and local convergence guarantees. Extensive numerical experiments show that on sparse optimal transport problems, S5N gains superior performance on convergence speed and computational efficiency.", "keywords": "Newton's method;semi-smooth function;non-isolated solution;global convergence;quadratic convergence;optimal transport", "primary_area": "optimization", "supplementary_material": "", "author": "Zihao Tang;Yixuan Qiu", "authorids": "~Zihao_Tang3;~Yixuan_Qiu1", "gender": "M;", "homepage": "https://github.com/TangZihao1997;https://statr.me", "dblp": "176/5858-1.html;209/7159", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0009-0008-4716-8616;", "linkedin": ";", "or_profile": "~Zihao_Tang3;~Yixuan_Qiu1", "aff": "Shanghai University of Finance and Economics;Shanghai University of Finance and Economics", "aff_domain": "sufe.edu;sufe.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@misc{\ntang2024a,\ntitle={A Semi-smooth, Self-shifting, and Singular Newton Method for Sparse Optimal Transport},\nauthor={Zihao Tang and Yixuan Qiu},\nyear={2024},\nurl={https://openreview.net/forum?id=7W4rbphLht}\n}", "github": "", "project": "", "reviewers": "h4ty;tikC;35vU;7Z3B;cPiV", "site": "https://openreview.net/forum?id=7W4rbphLht", "pdf_size": 3257542, "rating": "3;3;6;6;6", "confidence": "3;4;2;2;3", "soundness": "1;2;3;3;2", "contribution": "2;2;3;3;3", "presentation": "2;2;3;3;2", "wc_summary": "51;63;79;32;75", "wc_strengths": "9;30;125;42;45", "wc_weaknesses": "271;404;66;73;124", "wc_questions": "2;125;3;43;1", "wc_review": "333;622;273;190;245", "wc_reply_reviewers": "89;799;14;0;24", "wc_reply_authors": "690;1806;408;360;355", "reply_reviewers": "1;2;1;0;2", "reply_authors": "2;5;2;1;3", "rating_avg": [ 4.8, 1.469693845669907 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 60.0, 17.08800749063506 ], "wc_strengths_avg": [ 50.2, 39.48366750949055 ], "wc_weaknesses_avg": [ 187.6, 130.9573976528245 ], "wc_questions_avg": [ 34.8, 47.817988247102164 ], "wc_review_avg": [ 332.6, 151.86388642465332 ], "wc_reply_reviewers_avg": [ 185.2, 308.4149153332244 ], "wc_reply_authors_avg": [ 723.8, 555.0482501548852 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.6, 1.3564659966250538 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7637626158259733, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lwxVI53t-2AJ:scholar.google.com/&scioq=A+Semi-smooth,+Self-shifting,+and+Singular+Newton+Method+for+Sparse+Optimal+Transport&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai University of Finance and Economics", "aff_unique_dep": "", "aff_unique_url": "http://www.sufe.edu.cn", "aff_unique_abbr": "SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "7W9zRGhLq7", "title": "A New Theoretical Perspective on Data Heterogeneity in Federated Averaging", "track": "main", "status": "Reject", "tldr": "", "abstract": "In federated learning, data heterogeneity is the main reason that existing theoretical analyses are pessimistic about the convergence error caused by local updates. However, empirical studies have shown that more local updates can improve the convergence rate and reduce the communication cost when data are heterogeneous. This paper aims to bridge this gap between the theoretical understanding and the practical performance by providing a theoretical analysis for federated averaging (FedAvg) with non-convex objective functions from a new perspective on data heterogeneity. Identifying the limitations in the commonly used assumption of bounded gradient divergence, we propose a new assumption, termed the heterogeneity-driven Lipschitz assumption, which characterizes the fundamental effect of data heterogeneity on local updates. In the convergence analysis, we use the heterogeneity-driven Lipschitz constant and the global Lipschitz constant to substitute the widely used local Lipschitz constant and we show that our assumptions are weaker than those used in the literature. Based on the new assumption, we derive novel convergence bounds for both full participation and partial participation, which are tighter compared to the state-of-the-art analysis of FedAvg. This result can also imply that more local updates can improve the convergence rate even when data are highly heterogeneous. Further, we discuss the insights behind the proposed heterogeneity-driven Lipschitz assumption, by which we identify a region where FedAvg (also known as local SGD) can outperform mini-batch SGD even when the gradient divergence is arbitrarily large.", "keywords": "Federated Learning;Data Heterogeneity;Theoretical Analysis", "primary_area": "optimization", "supplementary_material": "/attachment/db647562ac0cb088ebafd8f91e572dc2f56b6ae2.zip", "author": "Jiayi Wang;Shiqiang Wang;Rong-Rong Chen;Mingyue Ji", "authorids": "~Jiayi_Wang4;~Shiqiang_Wang1;~Rong-Rong_Chen1;~Mingyue_Ji1", "gender": "F;M;;M", "homepage": ";https://shiqiang.wang;;https://mingyueji.ece.ufl.edu/", "dblp": ";87/5094-1;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;kA_vmOcAAAAJ;G2pEqUQAAAAJ;rWLfxVgAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jiayi_Wang4;~Shiqiang_Wang1;~Rong-Rong_Chen1;~Mingyue_Ji1", "aff": "Oak Ridge National Laboratory;IBM, International Business Machines;University of Utah;University of Florida", "aff_domain": "ornl.gov;us.ibm.com;utah.edu;ufl.edu", "position": "Postdoc;Research Staff Member;Associate Professor;Associate Professor", "bibtex": "@misc{\nwang2024a,\ntitle={A New Theoretical Perspective on Data Heterogeneity in Federated Averaging},\nauthor={Jiayi Wang and Shiqiang Wang and Rong-Rong Chen and Mingyue Ji},\nyear={2024},\nurl={https://openreview.net/forum?id=7W9zRGhLq7}\n}", "github": "", "project": "", "reviewers": "NMhc;wiji;RuXa;ziE8", "site": "https://openreview.net/forum?id=7W9zRGhLq7", "pdf_size": 1682092, "rating": "3;5;5;6", "confidence": "3;4;3;3", "soundness": "2;2;3;3", "contribution": "2;2;2;2", "presentation": "3;3;3;2", "wc_summary": "58;172;122;65", "wc_strengths": "65;154;108;69", "wc_weaknesses": "268;291;297;189", "wc_questions": "4;2;26;105", "wc_review": "395;619;553;428", "wc_reply_reviewers": "152;0;115;47", "wc_reply_authors": "2346;725;1064;1171", "reply_reviewers": "1;0;1;1", "reply_authors": "5;1;2;3", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.25, 46.32696299132936 ], "wc_strengths_avg": [ 99.0, 35.92352989337211 ], "wc_weaknesses_avg": [ 261.25, 43.09509832916036 ], "wc_questions_avg": [ 34.25, 41.91882035553959 ], "wc_review_avg": [ 498.75, 91.06693966528138 ], "wc_reply_reviewers_avg": [ 78.5, 58.92580080066795 ], "wc_reply_authors_avg": [ 1326.5, 611.2014806919237 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JKimkEaRFksJ:scholar.google.com/&scioq=A+New+Theoretical+Perspective+on+Data+Heterogeneity+in+Federated+Averaging&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Oak Ridge National Laboratory;International Business Machines;University of Utah;University of Florida", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ornl.gov;https://www.ibm.com;https://www.utah.edu;https://www.ufl.edu", "aff_unique_abbr": "ORNL;IBM;Utah;UF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "You Only Query Once: An Efficient Label-Only Membership Inference Attack", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19355", "id": "7WsivwyHrS", "author_site": "Yutong Wu, Han Qiu, Shangwei Guo, Jiwei Li, Tianwei Zhang", "tldr": "", "abstract": "As one of the privacy threats to machine learning models, the membership inference attack (MIA) tries to infer whether a given sample is in the original training set of a victim model by analyzing its outputs. Recent studies only use the predicted hard labels to achieve impressive membership inference accuracy. However, such label-only MIA approach requires very high query budgets to evaluate the distance of the target sample from the victim model's decision boundary. \n We propose YOQO, a novel label-only attack to overcome the above limitation.YOQO aims at identifying a special area (called improvement area) around the target sample and crafting a query sample, whose hard label from the victim model can reliably reflect the target sample's membership. YOQO can successfully reduce the query budget from more than 1,000 times to only ONCE. Experiments demonstrate that YOQO is not only as effective as SOTA attack methods, but also performs comparably or even more robustly against many sophisticated defenses.", "keywords": "Machine learning;Membership Inference Attack;Computer Vision", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/9c3699b4de96d41735ae13a134b7dcc950cec14c.zip", "author": "YUTONG WU;Han Qiu;Shangwei Guo;Jiwei Li;Tianwei Zhang", "authorids": "~YUTONG_WU3;~Han_Qiu3;~Shangwei_Guo1;~Jiwei_Li1;~Tianwei_Zhang1", "gender": "M;M;M;M;M", "homepage": ";https://qiuhan.info;http://www.cs.cqu.edu.cn/info/1332/5290.htm;https://nlp.stanford.edu/~bdlijiwei/;https://personal.ntu.edu.sg/tianwei.zhang/index.html", "dblp": ";15/4507-1;176/6479;73/5746-1;77/7902-4", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.fr/citations?user=6JWNv6gAAAAJ;wQrVkBYAAAAJ;PwU16JEAAAAJ;9vpiYDIAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~YUTONG_WU3;~Han_Qiu3;~Shangwei_Guo1;~Jiwei_Li1;~Tianwei_Zhang1", "aff": "National Technological University;Tsinghua University;Chongqing University;Zhejiang University;Nanyang Technological University", "aff_domain": "ntu.edu;tsinghua.edu.cn;cqu.edu.cn;zju.edu.cn;ntu.edu.sg", "position": "PhD student;Assistant Professor;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2024you,\ntitle={You Only Query Once: An Efficient Label-Only Membership Inference Attack},\nauthor={YUTONG WU and Han Qiu and Shangwei Guo and Jiwei Li and Tianwei Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7WsivwyHrS}\n}", "github": "", "project": "", "reviewers": "1W2d;bo89;1cwK", "pdf_size": 426882, "rating": "6;6;8", "confidence": "3;4;4", "soundness": "3;2;3", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "72;119;100", "wc_strengths": "66;36;50", "wc_weaknesses": "91;29;77", "wc_questions": "63;262;347", "wc_review": "292;446;574", "wc_reply_reviewers": "43;258;19", "wc_reply_authors": "705;1874;773", "reply_reviewers": "1;1;1", "reply_authors": "2;4;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.0, 19.30457631409368 ], "wc_strengths_avg": [ 50.666666666666664, 12.256517540566822 ], "wc_weaknesses_avg": [ 65.66666666666667, 26.5497436689865 ], "wc_questions_avg": [ 224.0, 119.01540516532583 ], "wc_review_avg": [ 437.3333333333333, 115.289008823719 ], "wc_reply_reviewers_avg": [ 106.66666666666667, 107.45645112737014 ], "wc_reply_authors_avg": [ 1117.3333333333333, 535.7638368618108 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16365776509541219820&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=7WsivwyHrS", "pdf": "https://openreview.net/pdf?id=7WsivwyHrS", "email": "ntu.edu;tsinghua.edu.cn;cqu.edu.cn;zju.edu.cn;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "National Technological University;Tsinghua University;Chongqing University;Zhejiang University;Nanyang Technological University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ntu.edu;https://www.tsinghua.edu.cn;https://www.cqu.edu.cn;https://www.zju.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "NTU;THU;CQU;ZJU;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;2", "aff_country_unique": "United States;China;Singapore" }, { "id": "7XXineVQeU", "title": "Faster Maximum Inner Product Search in High Dimensions", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Maximum Inner Product Search (MIPS) is a ubiquitous task in machine learning applications such as recommendation systems. \nGiven a query vector and $n$ atom vectors in $d$-dimensional space, the goal of MIPS is to find the atom that has the highest inner product with the query vector. \nExisting MIPS algorithms scale at least as $O(\\sqrt{d})$, which becomes computationally prohibitive in high-dimensional settings. In this work, we present BanditMIPS, a novel randomized MIPS algorithm whose complexity is independent of $d$. \nBanditMIPS estimates the inner product for each atom by subsampling coordinates and adaptively evaluates more coordinates for more promising atoms. The specific adaptive sampling strategy is motivated by multi-armed bandits. We provide theoretical guarantees that BanditMIPS returns the correct answer with high probability, while improving the complexity in $d$ from $O(\\sqrt{d})$ to $O(1)$. We also perform experiments on four synthetic and real-world datasets and demonstrate that BanditMIPS outperforms prior state-of-the-art algorithms. \nFor example, in the Movie Lens dataset ($n$=4,000, $d$=6,000), BanditMIPS is 20$\\times$ faster than the next best algorithm while returning the same answer. BanditMIPS requires no preprocessing of the data and includes a hyperparameter that practitioners may use to trade off accuracy and runtime.\nWe also propose a variant of our algorithm, named BanditMIPS-$\\alpha$, which achieves further speedups by employing non-uniform sampling across coordinates. \nFinally, we demonstrate how known preprocessing techniques can be used to further accelerate BanditMIPS, and discuss applications to Matching Pursuit and Fourier analysis.", "keywords": "multi-armed bandits;maximum inner product search;MIPS;best-arm identification", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/a99288def3c9d0cd2f8686fe3d79b6b9e29a71f2.pdf", "author": "Mo Tiwari;Ryan Kang;Donghyun Lee;Jaeyong Lee;Sebastian Thrun;Christopher J Piech;Ilan Shomorony;Martin Jinye Zhang", "authorids": "~Mo_Tiwari1;~Ryan_Kang1;~Donghyun_Lee2;~Jaeyong_Lee1;~Sebastian_Thrun1;~Christopher_J_Piech1;~Ilan_Shomorony1;~Martin_Jinye_Zhang1", "gender": ";M;M;M;M;M;M;M", "homepage": "http://www.motiwari.com/;;;https://kr.linkedin.com/in/jeyong-lee-6a7838190;http://robot.cc;;http://www.ilanshomorony.com;https://mzhanglab.github.io/", "dblp": "267/5421;https://dblp.org/rec/conf/nips/TiwariKLPSTZ22.html;298/4489;336/2490;t/SebastianThrun;35/10987.html;31/9223;184/9278", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;;;;fMAg4zEAAAAJ;zjr6n-QAAAAJ", "orcid": ";;;;;;;0000-0003-0006-2466", "linkedin": "motiwari;ryan-kang-554819221/;donghyun-lee-aa789422a;;sebastian-thrun-59a0b273/;;;", "or_profile": "~Mo_Tiwari1;~Ryan_Kang1;~Donghyun_Lee2;~Jaeyong_Lee1;~Sebastian_Thrun1;~Christopher_J_Piech1;~Ilan_Shomorony1;~Martin_J._Zhang1", "aff": "OpenAI;Stanford University;University College London, University of London;University of Oxford;;;University of Illinois, Urbana Champaign;Carnegie Mellon University", "aff_domain": "openai.com;stanford.edu;ucl.ac.uk;oxford.ac.uk;;;illinois.edu;andrew.cmu.edu", "position": "Member of Technical Staff;MS student;MS student;Undergrad student;;;Assistant Professor;Assistant Professor", "bibtex": "@misc{\ntiwari2024faster,\ntitle={Faster Maximum Inner Product Search in High Dimensions},\nauthor={Mo Tiwari and Ryan Kang and Donghyun Lee and Jaeyong Lee and Sebastian Thrun and Christopher J Piech and Ilan Shomorony and Martin Jinye Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=7XXineVQeU}\n}", "github": "", "project": "", "reviewers": "CJG9;hfLb;tsE3;FGUA", "site": "https://openreview.net/forum?id=7XXineVQeU", "pdf_size": 2650778, "rating": "1;1;3;3", "confidence": "4;5;3;3", "soundness": "1;3;2;2", "contribution": "1;1;3;2", "presentation": "1;3;2;2", "wc_summary": "77;225;44;73", "wc_strengths": "3;1;32;29", "wc_weaknesses": "3;147;46;160", "wc_questions": "3;6;47;6", "wc_review": "86;379;169;268", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 104.75, 70.5846123457514 ], "wc_strengths_avg": [ 16.25, 14.306903927824496 ], "wc_weaknesses_avg": [ 89.0, 66.42665127793211 ], "wc_questions_avg": [ 15.5, 18.227726133558185 ], "wc_review_avg": [ 225.5, 109.56847174255923 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12912019336735211116&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "OpenAI;Stanford University;University College London;University of Oxford;University of Illinois Urbana-Champaign;Carnegie Mellon University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://openai.com;https://www.stanford.edu;https://www.ucl.ac.uk;https://www.ox.ac.uk;https://illinois.edu;https://www.cmu.edu", "aff_unique_abbr": "OpenAI;Stanford;UCL;Oxford;UIUC;CMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Urbana-Champaign", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "7YEXo5qUmN", "title": "Organ-DETR: 3D Organ Detection Transfomer with Multiscale Attention and Dense Query Matching", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Query-based Transformers have been yielding impressive results in object detection. The potential of DETR-like methods for 3D data, especially in volumetric medical imaging, remains largely unexplored. This study presents Organ-DETR that contains two novel modules, MultiScale Attention (MSA) and Dense Query Matching (DQM), for boosting the performance of DEtection TRansformers (DETRs) for 3D organ detection. MSA introduces a novel top-down representation learning approach for efficient encoding of 3D visual data. \nMSA has a multiscale attention architecture that leverages dual self-attention and cross-attention mechanisms to provide the most relevant features for DETRs. It aims to employ long- and short-range spatial interactions in the attention mechanism, leveraging the self-attention module. Organ-DETR also introduces DQM, an approach for one-to-many matching that tackles the difficulties in detecting organs.\nDQM increases positive queries for enhancing both recall scores and training efficiency without the need for additional learnable parameters. \nExtensive results on five 3D Computed Tomography (CT) datasets indicate that the proposed Organ-DETR outperforms comparable techniques by achieving a remarkable improvement of +10.6 mAP COCO and +10.2 mAR COCO. \nCode and pre-trained models are available at \\url{https://---}.", "keywords": "Organ Detection;Representation Learning;DEtection TRansformer (DETR);Attention;Transformer;One-to-Many Matching;One-to-One Matching;Segmentation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/f62c032dcfe3094c85c0aac53ae9816f7df501a3.zip", "author": "MORTEZA GHAHREMANI;Benjamin Raphael Ernhofer;Jiajun Wang;Christian Wachinger", "authorids": "~MORTEZA_GHAHREMANI3;~Benjamin_Raphael_Ernhofer1;~Jiajun_Wang2;~Christian_Wachinger1", "gender": "M;;M;M", "homepage": "https://mogvision.github.io/;;;https://ai-med.de/people/christian-wachinger/", "dblp": "152/6299;;;79/5985", "google_scholar": "yhXUlXsAAAAJ;;;https://scholar.google.de/citations?user=UOIBNdUAAAAJ", "orcid": "0000-0001-6423-6475;0009-0007-4189-9576;;0000-0002-3652-1874", "linkedin": "morteza-ghahremani-3a040421a/;;jiajun-wang-a82725251/;", "or_profile": "~MORTEZA_GHAHREMANI3;~Benjamin_Raphael_Ernhofer1;~Jiajun_Wang2;~Christian_Wachinger1", "aff": "Technische Universit\u00e4t M\u00fcnchen;;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": "tum.de;;tum.de;tum.de", "position": "AI Scientist;;MS student;Professor", "bibtex": "@misc{\nanonymous2024organdetr,\ntitle={Organ-{DETR}: 3D Organ Detection Transfomer with Multiscale Attention and Dense Query Matching},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=7YEXo5qUmN}\n}", "github": "", "project": "", "reviewers": "nhJL;2UmR;xGvm", "site": "https://openreview.net/forum?id=7YEXo5qUmN", "pdf_size": 1212160, "rating": "3;3;8", "confidence": "5;5;5", "soundness": "2;2;4", "contribution": "2;1;3", "presentation": "2;3;4", "wc_summary": "89;42;71", "wc_strengths": "43;39;18", "wc_weaknesses": "95;220;88", "wc_questions": "3;50;4", "wc_review": "230;351;181", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "517;632;150", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 67.33333333333333, 19.362047641943477 ], "wc_strengths_avg": [ 33.333333333333336, 10.96458946893235 ], "wc_weaknesses_avg": [ 134.33333333333334, 60.642852468824024 ], "wc_questions_avg": [ 19.0, 21.924111536540465 ], "wc_review_avg": [ 254.0, 71.44695001654492 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 433.0, 205.54480452365286 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dEyQpfH8J8wJ:scholar.google.com/&scioq=Organ-DETR:+3D+Organ+Detection+Transfomer+with+Multiscale+Attention+and+Dense+Query+Matching&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen", "aff_unique_dep": "", "aff_unique_url": "https://www.tum.de", "aff_unique_abbr": "TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "7Yg5eylBHe", "title": "ZGS-Based Event-Driven Algorithms for Bayesian Optimization in Fully Distributed Multi-Agent Systems", "track": "main", "status": "Reject", "tldr": "", "abstract": "Bayesian optimization (BO) is a well-established framework for globally optimizing expensive-to-evaluate black-box functions with impressive efficiency. Although numerous BO algorithms have been developed for the centralized machine learning setting and some recent works have extended BO to the tree-structured federated learning, no previous studies have investigated BO within a fully distributed multi-agent system (MAS) in the field of distributed learning (DL). Addressing this gap, we introduce and investigate a novel paradigm, Distributed Bayesian Optimization (DBO), in which agents cooperatively optimize the same costly-to-evaluate black-box objectives. An innovative generalized algorithm, Zero-Gradient-Sum-Based Event-Driven Distributed Lower Confidence Bound (ZGS-ED-DLCB), is proposed to overcome the significant challenges of DBO and DL: We (a) adopt a surrogate model based on random Fourier features as an approximate alternative to a typical Gaussian process to enable the exchange of local knowledge between neighboring agents, and (b) employ the event-driven mechanism to enhance communication efficiency in MASs. Moreover, we propose a novel generalized fully distributed convergence theorem, which represents a substantial theoretical and practical breakthrough wrt the ZGS-based DL. The performance of our proposed algorithm has been rigorously evaluated through theoretical analysis and extensive experiments, demonstrating substantial advantages over the state-of-the-art baselines.", "keywords": "distributed machine learning;Bayesian optimization;multi-agent systems;zero-gradient-sum optimization;event-driven mechanism", "primary_area": "optimization", "supplementary_material": "/attachment/2c4857962fdfc2176867928f2585de383fdd66a9.zip", "author": "Pengfei Ren;Cheng-zhong Xu", "authorids": "~Pengfei_Ren5;~Cheng-zhong_Xu1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nren2024zgsbased,\ntitle={{ZGS}-Based Event-Driven Algorithms for Bayesian Optimization in Fully Distributed Multi-Agent Systems},\nauthor={Pengfei Ren and Cheng-zhong Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=7Yg5eylBHe}\n}", "github": "", "project": "", "reviewers": "BqAx;ugDi;N4Gg;EeJ5", "site": "https://openreview.net/forum?id=7Yg5eylBHe", "pdf_size": 488705, "rating": "1;3;3;6", "confidence": "3;4;4;2", "soundness": "1;2;2;3", "contribution": "2;2;2;3", "presentation": "1;1;1;2", "wc_summary": "102;91;40;67", "wc_strengths": "54;18;20;35", "wc_weaknesses": "724;192;374;36", "wc_questions": "10;2;9;2", "wc_review": "890;303;443;140", "wc_reply_reviewers": "155;0;0;0", "wc_reply_authors": "996;353;763;129", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 23.843238035132728 ], "wc_strengths_avg": [ 31.75, 14.428704030508076 ], "wc_weaknesses_avg": [ 331.5, 256.2435365038502 ], "wc_questions_avg": [ 5.75, 3.766629793329841 ], "wc_review_avg": [ 444.0, 278.9327876030353 ], "wc_reply_reviewers_avg": [ 38.75, 67.11696879329399 ], "wc_reply_authors_avg": [ 560.25, 339.0850741333213 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5488604301969737, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t7ISjxiSuAYJ:scholar.google.com/&scioq=ZGS-Based+Event-Driven+Algorithms+for+Bayesian+Optimization+in+Fully+Distributed+Multi-Agent+Systems&hl=en&as_sdt=0,47", "gs_version_total": 0 }, { "id": "7Zbg38nA0J", "title": "Explaining grokking through circuit efficiency", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present a theory of grokking in neural networks which explains grokking in terms of the relative efficiency of competing emergent sub-networks (circuits). Grokking is an important generalisation phenomenon where continuing to train a network which already achieves nearly perfect training loss can still dramatically improve the test loss. Our theory explains why generalising circuits gradually out-compete memorising circuits. This is because memorising circuits are inefficient for compressing large datasets---the per-example cost is high---while generalising circuits have a larger fixed cost but better per-example efficiency. Strikingly, our theory is precise enough to produce novel predictions of previously unobserved phenomena: ungrokking and semi-grokking.", "keywords": "grokking;interpretability;generalisation;regularisation;weight decay", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Vikrant Varma;Rohin Shah;Zachary Kenton;Janos Kramar;Ramana Kumar", "authorids": "~Vikrant_Varma1;~Rohin_Shah1;~Zachary_Kenton2;~Janos_Kramar1;~Ramana_Kumar1", "gender": ";M;M;M;", "homepage": ";http://rohinshah.com/;https://zackenton.github.io/;;", "dblp": "281/7099;145/1009;209/9980;49/9013;", "google_scholar": "EPYHbToAAAAJ;odFQXSYAAAAJ;https://scholar.google.co.uk/citations?hl=en;;OyX1-qYAAAAJ", "orcid": ";;;;", "linkedin": ";rohin-shah-76405832/;zac-kenton-824429124/;;", "or_profile": "~Vikrant_Varma1;~Rohin_Shah1;~Zachary_Kenton2;~Janos_Kramar1;~Ramana_Kumar1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "deepmind.com;deepmind.com;google.com;deepmind.com;deepmind.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nvarma2024explaining,\ntitle={Explaining grokking through circuit efficiency},\nauthor={Vikrant Varma and Rohin Shah and Zachary Kenton and Janos Kramar and Ramana Kumar},\nyear={2024},\nurl={https://openreview.net/forum?id=7Zbg38nA0J}\n}", "github": "", "project": "", "reviewers": "JVYB;Rk5Z;m2MB;A2EW", "site": "https://openreview.net/forum?id=7Zbg38nA0J", "pdf_size": 1682327, "rating": "3;5;6;6", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "4;3;4;3", "wc_summary": "67;54;72;127", "wc_strengths": "45;34;107;59", "wc_weaknesses": "249;24;125;261", "wc_questions": "93;219;300;98", "wc_review": "454;331;604;545", "wc_reply_reviewers": "616;173;178;0", "wc_reply_authors": "2183;911;1432;786", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;3;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 80.0, 27.919527216627433 ], "wc_strengths_avg": [ 61.25, 27.860141779969464 ], "wc_weaknesses_avg": [ 164.75, 97.15033453364944 ], "wc_questions_avg": [ 177.5, 86.8749100718959 ], "wc_review_avg": [ 483.5, 102.99150450401237 ], "wc_reply_reviewers_avg": [ 241.75, 227.64926422020343 ], "wc_reply_authors_avg": [ 1328.0, 549.8849879747582 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3942436453220632592&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "7ZiFtNzzQA", "title": "A Neural Tangent Kernel Approach for Constrained Policy Gradient Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper presents a constrained policy gradient method where we introduce constraints for safe learning, augmenting the traditional REINFORCE algorithm by taking the following steps. First, we analyze how the agent's policy changes if a new data batch is applied, leading to a nonlinear differential equation system in continuous time (gradient flow). This description of learning dynamics is connected to the neural tangent kernel (NTK) which enables us to evaluate the policy change at arbitrary states. \nNext, we introduce constraints for action probabilities based on the assumption that there are some environment states where we know how the agent should behave, ensuring safety during learning. Then, we augment the training batch with these states and compute fictitious rewards for them, making the policy obey the constraints with the help of the NTK-based formulation. More specifically, exogenous discounted sum of future rewards (returns) are computed at these constrained state-action pairs such that the policy network satisfies the constraints. Computing the constraining returns is based on solving a system of linear equations (equality constraints) or a constrained quadratic program (inequality constraints). To tackle high-dimensional environments, a dynamic constraint selection methodology is proposed. \nSimulation results demonstrate that adding constraints (external information) to the learning can improve learning in terms of speed and transparency reasonably if they are selected appropriately.", "keywords": "Reinforcement learning;Policy gradient methods;Constrained learning;Neural Tangent Kernel", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Balazs Varga;Attila Lischka;Balazs Kulcsar;Morteza Haghir Chehreghani", "authorids": "~Balazs_Varga1;~Attila_Lischka1;~Balazs_Kulcsar1;~Morteza_Haghir_Chehreghani2", "gender": "M;;;", "homepage": ";https://www.chalmers.se/personer/lischka/;;", "dblp": ";;;", "google_scholar": "OO4UlEIAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-2945-7974;;;", "linkedin": ";;;", "or_profile": "~Balazs_Varga1;~Attila_Lischka1;~Balazs_Kulcsar1;~Morteza_Haghir_Chehreghani2", "aff": "Budapest University of Technology and Economics;Chalmers University of Technology;Chalmers University of Technology;", "aff_domain": "bme.hu;chalmers.se;chalmers.se;", "position": "Researcher;PhD student;Prof;", "bibtex": "@misc{\nvarga2024a,\ntitle={A Neural Tangent Kernel Approach for Constrained Policy Gradient Reinforcement Learning},\nauthor={Balazs Varga and Attila Lischka and Balazs Kulcsar and Morteza Haghir Chehreghani},\nyear={2024},\nurl={https://openreview.net/forum?id=7ZiFtNzzQA}\n}", "github": "", "project": "", "reviewers": "RmLh;4Ecu;xmYU;P7MJ", "site": "https://openreview.net/forum?id=7ZiFtNzzQA", "pdf_size": 14551656, "rating": "3;5;5;6", "confidence": "3;4;3;3", "soundness": "1;2;3;3", "contribution": "1;3;3;3", "presentation": "2;3;2;3", "wc_summary": "70;90;24;134", "wc_strengths": "50;94;30;52", "wc_weaknesses": "180;151;155;68", "wc_questions": "318;95;136;5", "wc_review": "618;430;345;259", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.5, 39.53163290328392 ], "wc_strengths_avg": [ 56.5, 23.296995514443488 ], "wc_weaknesses_avg": [ 138.5, 42.19300889957956 ], "wc_questions_avg": [ 138.5, 113.95284112298386 ], "wc_review_avg": [ 413.0, 132.90410076442336 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HCbKudIvjDYJ:scholar.google.com/&scioq=A+Neural+Tangent+Kernel+Approach+for+Constrained+Policy+Gradient+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Budapest University of Technology and Economics;Chalmers University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.bme.hu;https://www.chalmers.se", "aff_unique_abbr": "BME;Chalmers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Hungary;Sweden" }, { "title": "Flag Aggregator: Scalable Distributed Training under Failures and Augmented Losses using Convex Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19354", "id": "7avlrpzWqo", "author_site": "Hamidreza Almasi, Harsh Mishra, Balajee Vamanan, Sathya N. Ravi", "tldr": "", "abstract": "Modern ML applications increasingly rely on complex deep learning models and large datasets. There has been an exponential growth in the amount of computation needed to train the largest models. Therefore, to scale computation and data, these models are inevitably trained in a distributed manner in clusters of nodes, and their updates are aggregated before being applied to the model. However, a distributed setup is prone to Byzantine failures of individual nodes, components, and software. With data augmentation added to these settings, there is a critical need for robust and efficient aggregation systems. We define the quality of workers as reconstruction ratios $\\in (0,1]$, and formulate aggregation as a Maximum Likelihood Estimation procedure using Beta densities. We show that the Regularized form of log-likelihood wrt subspace can be approximately solved using iterative least squares solver, and provide convergence guarantees using recent Convex Optimization landscape results. Our empirical findings demonstrate that our approach significantly enhances the robustness of state-of-the-art Byzantine resilient aggregators. We evaluate our method in a distributed setup with a parameter server, and show simultaneous improvements in communication efficiency and accuracy across various tasks.", "keywords": "Robust;Aggregation;Distributed;Training;Failure;Augmented;Byzantine;Resilience", "primary_area": "optimization", "supplementary_material": "/attachment/aae05857d3367152772f83487cd072c367bd1e99.zip", "author": "Hamidreza Almasi;Harsh Mishra;Balajee Vamanan;Sathya N. Ravi", "authorids": "~Hamidreza_Almasi1;~Harsh_Mishra1;~Balajee_Vamanan1;~Sathya_N._Ravi1", "gender": "M;M;M;M", "homepage": "https://hamidralmasi.github.io/;;https://www.cs.uic.edu/~balajee/;http://sathyaravi.com", "dblp": "241/0508;;;159/2123", "google_scholar": "OkUGKRAAAAAJ;;https://scholar.google.com.tw/citations?user=GKvAsQMAAAAJ;FW-0thoAAAAJ", "orcid": "0000-0002-4479-6464;;;0000-0003-3881-6323", "linkedin": "hamidralmasi/;harsh-mishra-515624144;;sathya-narayanan-ravi-74a5a128/", "or_profile": "~Hamidreza_Almasi1;~Harsh_Mishra1;~Balajee_Vamanan1;~Sathya_N._Ravi1", "aff": ";Rothamsted Research ;University of Illinois at Chicago;University of Illinois, Chicago", "aff_domain": ";rothamsted.ac.uk;uic.edu;uic.edu", "position": ";Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nalmasi2024flag,\ntitle={Flag Aggregator: Scalable Distributed Training under Failures and Augmented Losses using Convex Optimization},\nauthor={Hamidreza Almasi and Harsh Mishra and Balajee Vamanan and Sathya N. Ravi},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7avlrpzWqo}\n}", "github": "", "project": "", "reviewers": "Es7u;PqPx;YEpb", "pdf_size": 1909941, "rating": "6;6;6", "confidence": "5;2;3", "soundness": "4;2;3", "contribution": "3;2;3", "presentation": "2;3;3", "wc_summary": "76;73;49", "wc_strengths": "99;34;28", "wc_weaknesses": "147;104;76", "wc_questions": "172;1;12", "wc_review": "494;212;165", "wc_reply_reviewers": "105;0;0", "wc_reply_authors": "1487;435;340", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 66.0, 12.083045973594572 ], "wc_strengths_avg": [ 53.666666666666664, 32.14895885647863 ], "wc_weaknesses_avg": [ 109.0, 29.20045661743437 ], "wc_questions_avg": [ 61.666666666666664, 78.14658590680011 ], "wc_review_avg": [ 290.3333333333333, 145.28668976276603 ], "wc_reply_reviewers_avg": [ 35.0, 49.49747468305833 ], "wc_reply_authors_avg": [ 754.0, 519.7582771507027 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17474371774670655893&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=7avlrpzWqo", "pdf": "https://openreview.net/pdf?id=7avlrpzWqo", "email": ";rothamsted.ac.uk;uic.edu;uic.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Rothamsted Research;University of Illinois at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.rothamsted.ac.uk;https://www.uic.edu", "aff_unique_abbr": "Rothamsted;UIC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "7b2itdrxMa", "title": "From Child's Play to AI: Insights into Automated Causal Curriculum Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study how reinforcement learning algorithms and children develop their causal curriculum to achieve a challenging goal that is not solvable at first. Adopting the Procgen environments that comprise various tasks as challenging goals, we found that 5- to 7-year-old children actively used their current level progress to determine their next step in the curriculum and made improvements to solving the goal during this process. To evaluate RL agents, we exposed them to the same demanding Procgen environments as children and employed several curriculum learning methodologies. Our results demonstrate that RL agents that emulate children by incorporating level progress as an intrinsic reward signal exhibit greater stability and are more likely to converge during training, compared to RL agents solely reliant on extrinsic reward signals for game-solving. Curriculum learning may also offer a significant reduction in the number of frames needed to solve a target environment. Taken together, our human-inspired findings suggest a potential path forward for addressing catastrophic forgetting or domain shift during curriculum learning in RL agents.", "keywords": "reinforcement learning;curriculum learning;cognitive science;cognitive development", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/c6239845071ec68cc8483ce302f108b8eb3ff372.pdf", "author": "Annya Dahmani;Eunice Yiu;Tabitha Edith Lee;Nan Rosemary Ke;Oliver Kroemer;Alison Gopnik", "authorids": "~Annya_Dahmani1;~Eunice_Yiu1;~Tabitha_Edith_Lee1;~Nan_Rosemary_Ke1;~Oliver_Kroemer1;~Alison_Gopnik1", "gender": "F;F;F;M;F;F", "homepage": ";https://ey242.github.io/;https://nke001.github.io/;https://www.ri.cmu.edu/ri-faculty/oliver-kroemer/;http://alisongopnik.com/;https://tabula-rosa.github.io/", "dblp": ";;120/5291;04/7743;49/2088;", "google_scholar": "75hAWYsAAAAJ;oqQDfCEAAAAJ;https://scholar.google.ca/citations?user=dxwPYhQAAAAJ;_tbXjP4AAAAJ;https://scholar.google.co.uk/citations?user=2tt6ZJ0AAAAJ;ZD6QUvYAAAAJ", "orcid": ";0000-0002-3505-5525;;;;", "linkedin": ";euniceyiu/;;;;tabithaedith", "or_profile": "~Annya_Dahmani1;~Eunice_Yiu1;~Nan_Rosemary_Ke1;~Oliver_Kroemer1;~Alison_Gopnik1;~Timothy_E_Lee1", "aff": "University of California, Berkeley;University of California, Berkeley;Google DeepMind;Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "berkeley.edu;berkeley.edu;deepmind.com;cmu.edu;;cmu.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor;;PhD Student", "bibtex": "@misc{\ndahmani2024from,\ntitle={From Child's Play to {AI}: Insights into Automated Causal Curriculum Learning},\nauthor={Annya Dahmani and Eunice Yiu and Tabitha Edith Lee and Nan Rosemary Ke and Oliver Kroemer and Alison Gopnik},\nyear={2024},\nurl={https://openreview.net/forum?id=7b2itdrxMa}\n}", "github": "", "project": "", "reviewers": "FUEG;MFeg;tuxP;Rafo;5JBB", "site": "https://openreview.net/forum?id=7b2itdrxMa", "pdf_size": 8383889, "rating": "3;3;3;5;6", "confidence": "4;4;3;3;4", "soundness": "2;3;3;2;3", "contribution": "1;1;2;2;4", "presentation": "3;3;1;4;4", "wc_summary": "15;104;56;74;297", "wc_strengths": "32;28;32;15;130", "wc_weaknesses": "1249;113;135;6;133", "wc_questions": "2;65;130;162;65", "wc_review": "1298;310;353;257;625", "wc_reply_reviewers": "1014;310;171;0;0", "wc_reply_authors": "1862;681;455;849;312", "reply_reviewers": "3;2;1;0;0", "reply_authors": "4;2;2;2;1", "rating_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.0, 1.0954451150103321 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 109.2, 98.22301156042815 ], "wc_strengths_avg": [ 47.4, 41.76888794306116 ], "wc_weaknesses_avg": [ 327.2, 463.34022057231334 ], "wc_questions_avg": [ 84.8, 55.933531982166116 ], "wc_review_avg": [ 568.6, 386.1754005630084 ], "wc_reply_reviewers_avg": [ 299.0, 375.9074354146244 ], "wc_reply_authors_avg": [ 831.8, 547.0866110589803 ], "reply_reviewers_avg": [ 1.2, 1.1661903789690602 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17615996309583098648&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "University of California, Berkeley;Google;Carnegie Mellon University", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.berkeley.edu;https://deepmind.com;https://www.cmu.edu", "aff_unique_abbr": "UC Berkeley;DeepMind;CMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "7bIpWYhCdu", "title": "FILI: Syntax Repair By Learning From Own Mistakes", "track": "main", "status": "Reject", "tldr": "", "abstract": "Automatically fixing syntax errors in programs is a key challenge in Software Engineering community. Although, there are millions of programs on the web, both syntactically correct and incorrect, finding a large number of paired examples of programs is difficult. This makes training a program fixer using supervised learning difficult. Recently, BIFI, an unsupervised approach for learning a syntax fixer was proposed, in which an additional model (Breaker model) is used to augment data in each learning iteration to match real-world error distribution. In this paper, we propose a novel approach, FILI (Fix-It-Learn-It) for learning a syntax fixer without having to train any additional models for data augmentation. In each iteration, FILI carefully selects examples from the fixer's own predictions, both correct and incorrect, and uses those to fine-tune the fixer. We also show that gradually increasing the complexity of the examples during training leads to a more accurate fixer. Our evaluation on the Github-Python dataset shows that FILI outperforms BIFI by 1% while being significantly easier to train. Moreover, FILI avoids training the breaker model training a 13 million parameter breaker model in each iteration, which can take about 2 days on a modest DNN accelerator.", "keywords": "Automatic Program Repair;Software Engineering;Neural Syntax Fix", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/01b0c55cb4f8e8c344a1f03d0f02ed1f69f5cfdd.pdf", "author": "Sahil Bhatia;Navneet Potti;Rishabh Singh", "authorids": "~Sahil_Bhatia3;~Navneet_Potti1;~Rishabh_Singh1", "gender": ";;M", "homepage": ";;https://rishabhmit.bitbucket.io/", "dblp": ";;25/7056", "google_scholar": ";;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Sahil_Bhatia3;~Navneet_Potti1;~Rishabh_Singh1", "aff": ";;Meta", "aff_domain": ";;meta.com", "position": ";;Researcher", "bibtex": "@misc{\nbhatia2024fili,\ntitle={{FILI}: Syntax Repair By Learning From Own Mistakes},\nauthor={Sahil Bhatia and Navneet Potti and Rishabh Singh},\nyear={2024},\nurl={https://openreview.net/forum?id=7bIpWYhCdu}\n}", "github": "", "project": "", "reviewers": "9dQP;yQj5;5pCb;ydVV", "site": "https://openreview.net/forum?id=7bIpWYhCdu", "pdf_size": 281433, "rating": "3;5;5;6", "confidence": "5;3;4;4", "soundness": "3;3;3;3", "contribution": "2;2;3;2", "presentation": "3;3;3;4", "wc_summary": "113;49;171;138", "wc_strengths": "30;39;70;76", "wc_weaknesses": "58;137;83;163", "wc_questions": "117;16;104;50", "wc_review": "318;241;428;427", "wc_reply_reviewers": "0;28;0;0", "wc_reply_authors": "215;281;279;331", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 117.75, 44.70668294561787 ], "wc_strengths_avg": [ 53.75, 19.62619423117992 ], "wc_weaknesses_avg": [ 110.25, 41.74550874046213 ], "wc_questions_avg": [ 71.75, 40.831207427652686 ], "wc_review_avg": [ 353.5, 78.84954026498824 ], "wc_reply_reviewers_avg": [ 7.0, 12.12435565298214 ], "wc_reply_authors_avg": [ 276.5, 41.16734142496938 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BoC80p5zSlMJ:scholar.google.com/&scioq=FILI:+Syntax+Repair+By+Learning+From+Own+Mistakes&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "7c3ZOKGQ6s", "title": "YOLOV6: A SINGLE-STAGE OBJECT DETECTION FRAMEWORK FOR INDUSTRIAL APPLICATIONS", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We inaugurate YOLOv6, shipped with hardware-friendly architectural designs and a composite of novel training schemes tailored for industrial scenarios, which marks a new state-of-the-art real-time object detector as of early 2023. For a glimpse of performance, our YOLOv6-N hits 37.5% AP on the COCO dataset at a throughput of 1187 FPS tested with an NVIDIA Tesla T4 GPU. YOLOv6-S strikes 45.0% AP at 484 FPS, outperforming other mainstream detectors at the same scale (YOLOv5-S, YOLOv8-S, YOLOX-S, and PPYOLOE-S). Meantime, YOLOv6-M and L achieve better accuracy performance (50.0%/52.8% respectively) than other detectors at a similar inference speed. Additionally, with an extended backbone and neck design, our YOLOv6-L6 achieves state-of-the-art accuracy in real-time object detection. We carefully conducted extensive experiments to validate the effectiveness of each proposed component.", "keywords": "object detection;single-stage", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Chuyi Li;Bo Zhang;Lulu Li;Liang Li;Yifei Geng;Meng Cheng;Xu Xiaoming;Xiangxiang Chu;Xiaoming Wei", "authorids": "~Chuyi_Li1;~Bo_Zhang7;~Lulu_Li1;~Liang_Li10;~Yifei_Geng2;~Meng_Cheng2;~Xu_Xiaoming1;~Xiangxiang_Chu1;~Xiaoming_Wei1", "gender": "F;M;;M;M;;M;M;M", "homepage": "https://www.linkedin.cn/incareer/in/ACoAAD3J2pgBMaevrBlIBbqscuJvvv037e8MRyA;;https://github.com/meituan/YOLOv6;https://myaccount.google.com/?hl=zh-CN;https://github.com/meituan/YOLOv6;;;https://cxxgtxy.github.io/;https://www.linkedin.com/in/%E6%99%93%E6%98%8E-%E9%AD%8F-a0571b1a1/", "dblp": ";36/2259-46;;;;;;207/8002;", "google_scholar": ";uUNQnu0AAAAJ;;z_fYeJoAAAAJ;;;fFjtYN8AAAAJ;jn21pUsAAAAJ;", "orcid": ";0000-0003-0564-617X;;;;0000-0003-1734-5550;;0000-0003-2548-0605;0000-0002-7471-8344", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAAD3J2pgBMaevrBlIBbqscuJvvv037e8MRyA;bo-zhang-20a86588/;;;;;;;", "or_profile": "~Chuyi_Li1;~Bo_Zhang7;~Lulu_Li1;~Liang_Li10;~Yifei_Geng2;~Meng_Cheng2;~Xu_Xiaoming1;~Xiangxiang_Chu1;~Wei_Xiaoming1", "aff": ";Meituan Inc.;;Meituan;;;;MeiTuan;Meituan", "aff_domain": ";meituan.com;;meituan.com;;;;meituan.com;meituan.com", "position": ";Senior Software Engineer;;Researcher;;;;Senior Engineer;Researcher", "bibtex": "@misc{\nli2024yolov,\ntitle={{YOLOV}6: A {SINGLE}-{STAGE} {OBJECT} {DETECTION} {FRAMEWORK} {FOR} {INDUSTRIAL} {APPLICATIONS}},\nauthor={Chuyi Li and Bo Zhang and Lulu Li and Liang Li and Yifei Geng and Meng Cheng and Xu Xiaoming and Xiangxiang Chu and Xiaoming Wei},\nyear={2024},\nurl={https://openreview.net/forum?id=7c3ZOKGQ6s}\n}", "github": "", "project": "", "reviewers": "W4rd;jptY;3aeD", "site": "https://openreview.net/forum?id=7c3ZOKGQ6s", "pdf_size": 761950, "rating": "3;3;3", "confidence": "5;4;5", "soundness": "2;1;3", "contribution": "2;2;2", "presentation": "2;2;3", "wc_summary": "47;55;103", "wc_strengths": "35;24;85", "wc_weaknesses": "155;460;215", "wc_questions": "1;2;64", "wc_review": "238;541;467", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 68.33333333333333, 24.729649321321876 ], "wc_strengths_avg": [ 48.0, 26.54555832275273 ], "wc_weaknesses_avg": [ 276.6666666666667, 131.93011618108866 ], "wc_questions_avg": [ 22.333333333333332, 29.465610840812758 ], "wc_review_avg": [ 415.3333333333333, 128.98148015208315 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13702720529764835843&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Meituan Inc.;Meituan", "aff_unique_dep": ";", "aff_unique_url": "https://www.meituan.com;https://www.meituan.com", "aff_unique_abbr": "Meituan;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "7d2sWFIIPF", "title": "Backdoor Attack for Federated Learning with Fake Clients", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated Learning (FL) is a popular distributed machine learning paradigm that enables joint model training without sharing clients\u2019 data. Recent studies show that federated learning can be vulnerable to potential backdoor attacks from malicious clients: such attacks aim to mislead the global model into a targeted misprediction when a specific trigger pattern is presented. Although various types of federated backdoor attacks are proposed, most of them rely on the malicious client's local data to inject the backdoor trigger into the model. In this paper, we consider a new and more challenging scenario that the attacker can only control the fake clients, who do not possess any real data at all. Such a threat model sets a higher standard for the attacker that the attack must be conducted without relying on any real client data (only knowing the target class label). Meanwhile, the resulting malicious update should not be easily detected by the potential defenses. Specifically, we first simulate the normal client updates via modeling the historical global model trajectory. Then we simultaneously optimize the backdoor trigger and manipulate the model parameters in a data-free manner to achieve our attacking goal. Extensive experiments on multiple benchmark datasets show the effectiveness of the proposed attack in the fake client setting under state-of-the-art defenses.", "keywords": "Backdoor Atttack;Federated Learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Pei Fang;Bochuan Cao;Jinyuan Jia;Jinghui Chen", "authorids": "~Pei_Fang1;~Bochuan_Cao1;~Jinyuan_Jia2;~Jinghui_Chen1", "gender": "M;;;M", "homepage": "https://greilfang.github.io/;https://aaaaaasuka.github.io/;https://jinyuan-jia.github.io/;https://jinghuichen.github.io/", "dblp": ";334/3881;24/5124-1.html;67/5633", "google_scholar": ";eOZCg2IAAAAJ;iyg4ytkAAAAJ;mKia7Y4AAAAJ", "orcid": ";;0000-0002-9785-7769;", "linkedin": ";;;", "or_profile": "~Pei_Fang1;~Bochuan_Cao1;~Jinyuan_Jia2;~Jinghui_Chen1", "aff": "Tongji University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University", "aff_domain": "tongji.edu.cn;psu.edu;psu.edu;psu.edu", "position": "MS student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nfang2024backdoor,\ntitle={Backdoor Attack for Federated Learning with Fake Clients},\nauthor={Pei Fang and Bochuan Cao and Jinyuan Jia and Jinghui Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=7d2sWFIIPF}\n}", "github": "", "project": "", "reviewers": "9itP;JM2E;DwZd;Sevc", "site": "https://openreview.net/forum?id=7d2sWFIIPF", "pdf_size": 450129, "rating": "3;3;5;5", "confidence": "3;4;4;4", "soundness": "2;2;3;2", "contribution": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "47;108;117;51", "wc_strengths": "35;44;58;28", "wc_weaknesses": "262;88;73;76", "wc_questions": "122;80;156;160", "wc_review": "466;320;404;315", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 80.75, 31.940374136819376 ], "wc_strengths_avg": [ 41.25, 11.211043662389331 ], "wc_weaknesses_avg": [ 124.75, 79.43983572490568 ], "wc_questions_avg": [ 129.5, 32.16753021293366 ], "wc_review_avg": [ 376.25, 62.73107284273082 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7138459469686333820&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Tongji University;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tongji.edu.cn;https://www.psu.edu", "aff_unique_abbr": "Tongji;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;United States" }, { "id": "7duh4Ml5rc", "title": "Based on What We Can Control Artificial Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "How can the stability and efficiency of Artificial Neural Networks (ANNs) be ensured through a systematic analysis method? This paper seeks to address that query. While numerous factors can influence the learning process of ANNs, utilizing knowledge from control systems allows us to analyze its system function and simulate system responses. Although the complexity of most ANNs is extremely high, we still can analyze each factor (e.g., optimiser, hyperparameters) by simulating their system response. This new method also can potentially benefit the development of new optimiser and learning system, especially when discerning which components adversely affect ANNs. Controlling ANNs can benefit from the design of optimiser and learning system, as (1) all optimisers act as controllers, (2) all learning systems operate as control systems with inputs and outputs, and (3) the optimiser should match the learning system. We will share the source code of this work after the paper has been accepted for publication.", "keywords": "optimizer;controller;learning system;control system;fuzzy logic;filter", "primary_area": "optimization", "supplementary_material": "/attachment/4827c5e1144c8aadb33c88a7d92c1a0b65ba8a27.zip", "author": "Cheng Kang;Xujing Yao", "authorids": "~Cheng_Kang1;~Xujing_Yao1", "gender": "M;F", "homepage": "https://chengkang520.github.io/about/;", "dblp": ";", "google_scholar": "https://scholar.google.com/citations?hl=tr;", "orcid": "0000-0001-9546-4585;0000-0001-8735-5573", "linkedin": ";", "or_profile": "~Cheng_Kang1;~Xujing_Yao1", "aff": "Czech Technical Univeresity in Prague, Czech Technical University of Prague;University of Leicester", "aff_domain": "fel.cvut.cz;le.ac.uk", "position": "PhD student;PhD student", "bibtex": "@misc{\nkang2024based,\ntitle={Based on What We Can Control Artificial Neural Networks},\nauthor={Cheng Kang and Xujing Yao},\nyear={2024},\nurl={https://openreview.net/forum?id=7duh4Ml5rc}\n}", "github": "", "project": "", "reviewers": "jZKP;WG3Y;egTB", "site": "https://openreview.net/forum?id=7duh4Ml5rc", "pdf_size": 1102247, "rating": "1;1;3", "confidence": "4;2;3", "soundness": "2;1;1", "contribution": "1;2;2", "presentation": "2;1;1", "wc_summary": "30;60;64", "wc_strengths": "31;61;45", "wc_weaknesses": "125;103;130", "wc_questions": "24;38;208", "wc_review": "210;262;447", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 1.6666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 1.3333333333333333, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 51.333333333333336, 15.173075568988056 ], "wc_strengths_avg": [ 45.666666666666664, 12.256517540566822 ], "wc_weaknesses_avg": [ 119.33333333333333, 11.728408057172787 ], "wc_questions_avg": [ 90.0, 83.63412381717565 ], "wc_review_avg": [ 306.3333333333333, 101.7065495542063 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:G0yUQEppL-4J:scholar.google.com/&scioq=Based+on+What+We+Can+Control+Artificial+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Czech Technical University in Prague;University of Leicester", "aff_unique_dep": ";", "aff_unique_url": "https://www.ctu.cz;https://www.leicester.ac.uk", "aff_unique_abbr": "CTU;Leicester", "aff_campus_unique_index": "0", "aff_campus_unique": "Prague;", "aff_country_unique_index": "0;1", "aff_country_unique": "Czech Republic;United Kingdom" }, { "id": "7eYmijcuqO", "title": "On the Dynamics of Learning Time-Aware Behavior with RNNs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recurrent Neural Networks (RNNs) have shown great success in modeling time-dependent patterns, but there is limited research on how they develop representations of temporal features during training. To address this gap, we use timed automata (TA) to introduce a family of supervised learning tasks modeling behavior dependent on hidden temporal variables whose complexity is directly controllable. Building upon past studies from the perspective of dynamical systems theory, we train RNNs to emulate a new class of TA called temporal flipflops, and we find they undergo *phase transitions during training* characterized by sudden and rapid discovery of the hidden time-dependent features. In the case of periodic \"time-of-day\" aware flipflop, we show that the RNNs learn stable periodic cycles that encode time modulo the period of the transition rules. We then use fixed point stability analysis to monitor changes in the RNN dynamics during training, and we observe that the phase transition coincides with a *bifurcation* from which stable periodic behavior emerges. We also show that these cycles initially lose stability if the RNN is later trained on the same TA task but with a different period, and we explain this result through analysis of a simple differential equation for learning oscillations via gradient flow. Through this work, we demonstrate how dynamical systems theory can provide insights into not only learned representations, but also the dynamics and pathologies of the learning process itself.", "keywords": "recurrent neural networks;latent temporal features;developmental interpretability;phase transitions;dynamical systems theory", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Peter DelMastro;Rushiv Arora;Edward Rietman;Hava T Siegelmann", "authorids": "~Peter_DelMastro1;~Rushiv_Arora1;~Edward_Rietman1;~Hava_T_Siegelmann1", "gender": "M;M;;F", "homepage": ";https://rushivarora.github.io;;https://www.cics.umass.edu/faculty/directory/siegelmann_hava", "dblp": ";;32/3819.html;s/HavaTSiegelmann.html", "google_scholar": ";LxFWdpgAAAAJ;https://scholar.google.com/scholar?hl=en;https://scholar.google.co.il/citations?user=A2fiOI0AAAAJ", "orcid": ";;;0000-0003-4938-8723", "linkedin": "pdelmastro;rushiv-arora/;erietman/?midToken=AQGMbfgM-nipWg&midSig=2tulkv3VchiGM1&trk=eml-email_next_best_action_digest_01-header-68-profile&trkEmail=eml-email_next_best_action_digest_01-header-68-profile-null-1i4vs%7Elhnyy0hm%7Ezo-null-neptune%2Fprofile%7Evanity%2Eview;hava-siegelmann-4b272a/", "or_profile": "~Peter_DelMastro1;~Rushiv_Arora1;~Edward_Rietman1;~Hava_T_Siegelmann1", "aff": "Virginia Polytechnic Institute and State University;Dell AI Research;University of Massachusetts at Amherst;University of Massachusetts at Amherst", "aff_domain": "vt.edu;dell.com;umass.edu;umass.edu", "position": "PhD student;Research Scientist;Principal Researcher;Full Professor", "bibtex": "@misc{\ndelmastro2024on,\ntitle={On the Dynamics of Learning Time-Aware Behavior with {RNN}s},\nauthor={Peter DelMastro and Rushiv Arora and Edward Rietman and Hava T Siegelmann},\nyear={2024},\nurl={https://openreview.net/forum?id=7eYmijcuqO}\n}", "github": "", "project": "", "reviewers": "oza4;HfNN;kx7S;cxSU", "site": "https://openreview.net/forum?id=7eYmijcuqO", "pdf_size": 2725167, "rating": "3;3;3;3", "confidence": "3;2;3;4", "soundness": "3;3;3;3", "contribution": "2;3;2;2", "presentation": "3;3;1;2", "wc_summary": "70;51;65;96", "wc_strengths": "83;28;94;36", "wc_weaknesses": "148;161;169;271", "wc_questions": "87;16;217;12", "wc_review": "388;256;545;415", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 70.5, 16.28649747490233 ], "wc_strengths_avg": [ 60.25, 28.656369274560934 ], "wc_weaknesses_avg": [ 187.25, 48.93043531382078 ], "wc_questions_avg": [ 83.0, 82.91863481751253 ], "wc_review_avg": [ 401.0, 102.62309681548301 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CWhABIkI-XQJ:scholar.google.com/&scioq=On+the+Dynamics+of+Learning+Time-Aware+Behavior+with+RNNs&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Virginia Tech;Dell;University of Massachusetts Amherst", "aff_unique_dep": ";Dell AI Research;", "aff_unique_url": "https://www.vt.edu;https://www.dell.com;https://www.umass.edu", "aff_unique_abbr": "VT;Dell;UMass Amherst", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "7em7Jl0qMm", "title": "Fourier Ordinary Differential Equations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Continuous models such as Neural Ordinary Differential Equations (NODEs) are powerful approaches for modeling time series data, known for their ability to capture underlying dynamics and generalization. Current continuous models focus on learning mappings within finite-dimensional Euclidean spaces, raising two critical questions for enhancing their effectiveness. First, Is Euclidean space the optimal representation for capturing the underlying patterns and features in time series data? Second, how can we maintain granularity while benefiting from the generalization capabilities of continuous models? To address the first question, we propose a novel approach for learning dynamics in the Fourier domain. In contrast to Euclidean space, each point in Fourier space summarizes the original signal at a specific frequency, enabling more comprehensive data representations. Additionally, time differentiation in the Fourier domain simplifies the modeling of dynamics as it becomes a multiplication operation. To answer the second question, we introduce element-wise filtering, a method designed to compensate for the bias of continuous models when fitting discrete data points. These techniques culminate in the introduction of a new approach\u2014Fourier Ordinary Differential Equations (FODEs). Our experiments provide compelling evidence of FODEs' superiority in terms of accuracy, efficiency, and generalization capabilities when compared to existing methods across various time series datasets. By offering a novel method for modeling time series data capable of capturing both short-term and long-term patterns, FODEs have the potential to significantly enhance the modeling and prediction of complex dynamic systems.", "keywords": "Neural Ordinary Differential Equations;Time Series;Fourier;FFT", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Muhao Guo;Yang Weng", "authorids": "~Muhao_Guo1;~Yang_Weng1", "gender": "M;", "homepage": ";", "dblp": "345/6430;", "google_scholar": "wIOmifAAAAAJ;", "orcid": "0000-0002-9890-8214;", "linkedin": "muhaoguo/;", "or_profile": "~Muhao_Guo1;~Yang_Weng1", "aff": "Arizona State University;", "aff_domain": "asu.edu;", "position": "PhD student;", "bibtex": "@misc{\nguo2024fourier,\ntitle={Fourier Ordinary Differential Equations},\nauthor={Muhao Guo and Yang Weng},\nyear={2024},\nurl={https://openreview.net/forum?id=7em7Jl0qMm}\n}", "github": "", "project": "", "reviewers": "GqMG;MSqR;hxik;cX5F", "site": "https://openreview.net/forum?id=7em7Jl0qMm", "pdf_size": 4609813, "rating": "3;5;5;6", "confidence": "4;4;4;4", "soundness": "2;3;2;3", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "71;79;194;51", "wc_strengths": "48;61;91;26", "wc_weaknesses": "265;172;259;102", "wc_questions": "158;319;2;1", "wc_review": "542;631;546;180", "wc_reply_reviewers": "10;111;21;11", "wc_reply_authors": "512;611;485;161", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 98.75, 55.930202037897196 ], "wc_strengths_avg": [ 56.5, 23.521266972678152 ], "wc_weaknesses_avg": [ 199.5, 67.2551113299205 ], "wc_questions_avg": [ 120.0, 131.4629225295102 ], "wc_review_avg": [ 474.75, 173.84673566104138 ], "wc_reply_reviewers_avg": [ 38.25, 42.22188413607332 ], "wc_reply_authors_avg": [ 442.25, 169.01978434491033 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Can Sensitive Information Be Deleted From LLMs? Objectives for Defending Against Extraction Attacks", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19353", "id": "7erlRDoaV8", "author_site": "Vaidehi Ramesh Patil, Peter Hase, Mohit Bansal", "tldr": "", "abstract": "Pretrained language models sometimes possess knowledge that we do not wish them to, including memorized personal information and knowledge that could be used to harm people. They can also output toxic or harmful text. To mitigate these safety and informational issues, we propose an attack-and-defense framework for studying the task of deleting sensitive information directly from model weights. We study direct edits to model weights because (1) this approach should guarantee that particular deleted information is never extracted by future prompt attacks, and (2) it should protect against whitebox attacks, which is necessary for making claims about safety/privacy in a setting where publicly available model weights could be used to elicit sensitive information. Our threat model assumes that an attack succeeds if the answer to a sensitive question is located among a set of B generated candidates, based on scenarios where the information would be insecure if the answer is among B candidates. Experimentally, we show that even state-of-the-art model editing methods such as ROME struggle to truly delete factual information from models like GPT-J, as our whitebox and blackbox attacks can recover \u201cdeleted\u201d information from an edited model 38% of the time. These attacks leverage two key observations: (1) that traces of deleted information can be found in intermediate model hidden states, and (2) that applying an editing method for one question may not delete information across rephrased versions of the question. Finally, we provide new defense methods that protect against some extraction attacks, but we do not find a single universally effective defense method. Our results suggest that truly deleting sensitive information is a tractable but difficult problem, since even relatively low attack success rates have potentially severe implications for the deployment of language models in a world where individuals enjoy ownership of their personal data, a right to privacy, and safety from harmful model outputs.", "keywords": "Sensitive Information Deletion;Privacy Attacks;Model editing;Language Models", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/fb642952d68748c4c3da0a925fd0602ac68ae134.zip", "author": "Vaidehi Patil;Peter Hase;Mohit Bansal", "authorids": "~Vaidehi_Patil1;~Peter_Hase1;~Mohit_Bansal2", "gender": "F;;M", "homepage": "https://vaidehi99.github.io/;;https://www.cs.unc.edu/~mbansal/", "dblp": "294/5205;;32/5243.html", "google_scholar": "wCt6wSAAAAAJ;;DN8QtscAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Vaidehi_Patil1;~Peter_Hase1;~Mohit_Bansal2", "aff": "Department of Computer Science, University of North Carolina at Chapel Hill;;University of North Carolina at Chapel Hill", "aff_domain": "cs.unc.edu;;unc.edu", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\npatil2024can,\ntitle={Can Sensitive Information Be Deleted From {LLM}s? Objectives for Defending Against Extraction Attacks},\nauthor={Vaidehi Patil and Peter Hase and Mohit Bansal},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7erlRDoaV8}\n}", "github": "", "project": "", "reviewers": "X7HP;KWLr;GnJt;174w", "pdf_size": 644259, "rating": "6;8;8;8", "confidence": "3;4;3;3", "soundness": "3;4;4;2", "contribution": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "164;127;67;118", "wc_strengths": "112;32;27;57", "wc_weaknesses": "202;110;30;150", "wc_questions": "84;91;84;151", "wc_review": "562;360;208;476", "wc_reply_reviewers": "0;21;0;0", "wc_reply_authors": "694;507;427;759", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 119.0, 34.61935874622752 ], "wc_strengths_avg": [ 57.0, 33.726843908080106 ], "wc_weaknesses_avg": [ 123.0, 62.82515419798029 ], "wc_questions_avg": [ 102.5, 28.146935890075138 ], "wc_review_avg": [ 401.5, 132.73563952458284 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 596.75, 134.77087036893394 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 96, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17466616399348737261&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=7erlRDoaV8", "pdf": "https://openreview.net/pdf?id=7erlRDoaV8", "email": "cs.unc.edu;;unc.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of North Carolina at Chapel Hill;University of North Carolina", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "UNC Chapel Hill;UNC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "7essnmWOK5", "title": "Graph Neural Networks for Multivariate Time-Series Forecasting via Learning Hierarchical Spatiotemporal Dependencies", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multivariate time-series forecasting is one of the essential tasks to draw insights from sequential data. Spatiotemporal Graph Neural Networks (STGNNs) have attracted much attention in this field due to their capability to capture the underlying spatiotemporal dependencies. However, current STGNN solutions still fall short of providing trustworthy predictions due to insufficient modeling of the dependencies and dynamics at different levels. In this paper, we propose a graph neural network model for multivariate time-series forecasting via learning hierarchical spatiotemporal dependencies (HSDGNN). Specifically, we organize variables as nodes in a graph while each node serves as a subgraph consisting of the attributes of variables. Then we design two-level convolutions on the hierarchical graph to model the spatial dependencies with different granularities. The changes in graph topologies are also encoded for strengthening dependency modeling across time and spatial dimensions. We test the proposed model on real-world datasets from different domains. The experimental results demonstrate the superiority of HSDGNN over state-of-the-art baselines in terms of prediction accuracy.", "keywords": "Multivariate time-series forecasting;Spatiotemporal graph neural networks;Deep learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Zhou Zhou;Ronisha Basker;Dit-Yan Yeung", "authorids": "~Zhou_Zhou6;~Ronisha_Basker1;~Dit-Yan_Yeung2", "gender": "Not Specified;F;M", "homepage": ";;https://cse.hkust.edu.hk/faculty/dyyeung/", "dblp": ";;41/5668", "google_scholar": ";;nEsOOx8AAAAJ", "orcid": "0000-0002-6490-8945;;0000-0003-3716-8125", "linkedin": ";ronisha-basker;", "or_profile": "~Zhou_Zhou6;~Ronisha_Basker1;~Dit-Yan_Yeung2", "aff": "Hong Kong University of Science and Technology;;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;;ust.hk", "position": "Postdoc;;Chair Professor", "bibtex": "@misc{\nzhou2024graph,\ntitle={Graph Neural Networks for Multivariate Time-Series Forecasting via Learning Hierarchical Spatiotemporal Dependencies},\nauthor={Zhou Zhou and Ronisha Basker and Dit-Yan Yeung},\nyear={2024},\nurl={https://openreview.net/forum?id=7essnmWOK5}\n}", "github": "", "project": "", "reviewers": "MDiL;MTEy;kfob;7UwB", "site": "https://openreview.net/forum?id=7essnmWOK5", "pdf_size": 13118596, "rating": "3;3;3;5", "confidence": "4;4;4;4", "soundness": "2;3;2;3", "contribution": "1;2;1;2", "presentation": "2;3;1;3", "wc_summary": "64;99;58;100", "wc_strengths": "22;47;26;233", "wc_weaknesses": "356;170;61;103", "wc_questions": "14;4;126;152", "wc_review": "456;320;271;588", "wc_reply_reviewers": "32;16;28;23", "wc_reply_authors": "800;653;658;687", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 80.25, 19.369757355217438 ], "wc_strengths_avg": [ 82.0, 87.6954958934608 ], "wc_weaknesses_avg": [ 172.5, 112.85056490775754 ], "wc_questions_avg": [ 74.0, 65.7419196555744 ], "wc_review_avg": [ 408.75, 123.70807370580144 ], "wc_reply_reviewers_avg": [ 24.75, 5.973901572674261 ], "wc_reply_authors_avg": [ 699.5, 59.457968347396466 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oHwDBhFEwyMJ:scholar.google.com/&scioq=Graph+Neural+Networks+for+Multivariate+Time-Series+Forecasting+via+Learning+Hierarchical+Spatiotemporal+Dependencies&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "SpikePoint: An Efficient Point-based Spiking Neural Network for Event Cameras Action Recognition", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19352", "id": "7etoNfU9uF", "author_site": "Hongwei Ren, Yue ZHOU, Xiaopeng LIN, Yulong Huang, Haotian FU, Jie Song, Bojun Cheng", "tldr": "", "abstract": "Event cameras are bio-inspired sensors that respond to local changes in light intensity and feature low latency, high energy efficiency, and high dynamic range. Meanwhile, Spiking Neural Networks (SNNs) have gained significant attention due to their remarkable efficiency and fault tolerance. By synergistically harnessing the energy efficiency inherent in event cameras and the spike-based processing capabilities of SNNs, their integration could enable ultra-low-power application scenarios, such as action recognition tasks. However, existing approaches often entail converting asynchronous events into conventional frames, leading to additional data mapping efforts and a loss of sparsity, contradicting the design concept of SNNs and event cameras. To address this challenge, we propose SpikePoint, a novel end-to-end point-based SNN architecture. SpikePoint excels at processing sparse event cloud data, effectively extracting both global and local features through a singular-stage structure. Leveraging the surrogate training method, SpikePoint achieves high accuracy with few parameters and maintains low power consumption, specifically employing the identity mapping feature extractor on diverse datasets. SpikePoint achieves state-of-the-art (SOTA) performance on four event-based action recognition datasets using only 16 timesteps, surpassing other SNN methods. Moreover, it also achieves SOTA performance across all methods on three datasets, utilizing approximately 0.3 % of the parameters and 0.5 % of power consumption employed by artificial neural networks (ANNs). These results emphasize the significance of Point Cloud and pave the way for many ultra-low-power event-based data processing applications.", "keywords": "Spiking Neural Betwork;Point Cloud;Event Camera;Action Recognition", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Hongwei Ren;Yue Zhou;Xiaopeng LIN;Yulong Huang;Haotian FU;Jie Song;Bojun Cheng", "authorids": "~Hongwei_Ren2;~Yue_Zhou8;~Xiaopeng_LIN1;~Yulong_Huang2;~Haotian_FU4;~Jie_Song1;~Bojun_Cheng1", "gender": "M;F;;;M;M;M", "homepage": "http://rhwdmx.github.io;;https://github.com/xplin13;;;https://ait.ethz.ch/people/song/;https://personal.hkust-gz.edu.cn/bojuncheng/index.html", "dblp": ";;;;;09/4756-6;285/0564", "google_scholar": "https://scholar.google.com.hk/citations?user=eD60q1YAAAAJ;;;;;https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=Zisp-_IAAAAJ", "orcid": ";0000-0001-9323-4524;;;0000-0001-5445-4487;0009-0003-7484-1937;", "linkedin": ";;;;;;", "or_profile": "~Hongwei_Ren2;~Yue_Zhou8;~Xiaopeng_LIN1;~Yulong_Huang2;~Haotian_FU4;~Jie_Song1;~Bojun_Cheng1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;;Hong Kong University of Science and Technology;ETHZ - ETH Zurich;The Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "connect.hkust-gz.edu.cn;hkust.edu;hkust.edu;;hkust.edu;ethz.ch;hkust-gz.edu.cn", "position": "PhD student;PhD student;PhD student;;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nren2024spikepoint,\ntitle={SpikePoint: An Efficient Point-based Spiking Neural Network for Event Cameras Action Recognition},\nauthor={Hongwei Ren and Yue Zhou and Xiaopeng LIN and Yulong Huang and Haotian FU and Jie Song and Bojun Cheng},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7etoNfU9uF}\n}", "github": "", "project": "", "reviewers": "NnWo;tiMk;qCNh;XXP9", "pdf_size": 11954613, "rating": "3;6;6;8", "confidence": "5;4;4;3", "soundness": "2;3;3;4", "contribution": "2;3;3;4", "presentation": "3;2;3;4", "wc_summary": "106;45;47;145", "wc_strengths": "47;69;19;87", "wc_weaknesses": "59;133;122;36", "wc_questions": "33;284;4;1", "wc_review": "245;531;192;269", "wc_reply_reviewers": "49;0;15;0", "wc_reply_authors": "809;872;731;106", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.75, 42.07953778263255 ], "wc_strengths_avg": [ 55.5, 25.391927851189244 ], "wc_weaknesses_avg": [ 87.5, 41.00304866714181 ], "wc_questions_avg": [ 80.5, 118.1535018524631 ], "wc_review_avg": [ 309.25, 131.02361428383816 ], "wc_reply_reviewers_avg": [ 16.0, 20.0124960961895 ], "wc_reply_authors_avg": [ 629.5, 306.34172095880115 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9901475429766743, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2749373382167214942&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7etoNfU9uF", "pdf": "https://openreview.net/pdf?id=7etoNfU9uF", "email": "connect.hkust-gz.edu.cn;hkust.edu;hkust.edu;;hkust.edu;ethz.ch;hkust-gz.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.ethz.ch", "aff_unique_abbr": "HKUST;ETHZ", "aff_campus_unique_index": "0;0;0;0;2", "aff_campus_unique": "Hong Kong SAR;;Guangzhou", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Switzerland" }, { "id": "7ezBaMwOqY", "title": "Trading-off Multiple Properties for Molecular Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Molecular optimization, a critical research area in drug discovery, aims to enhance the properties or performance of molecules through systematic modifications of their chemical structures. Recently, existing Multi-Objective Molecular Optimization (MOMO) methods are extended from Single-Objective Molecular Optimization (SOMO) approaches by employing techniques such as Linear Scalarization, Evolutionary Algorithms, and Multi-Objective Bayesian Optimization. In Multi-Objective Optimization, the ideal goal is to find Pareto optimal solutions over different preferences, which indicate the importance of different objectives. However, these straightforward extensions often struggle with trading off multiple properties due to the conflicting or correlated nature of certain properties. More specifically, current MOMO methods derived from SOMO are still challenged in finding preference-conditioned Pareto solutions and exhibit low efficiency in Pareto search. To address the aforementioned problems, we propose the \\textbf{P}reference-\\textbf{C}onditioned \\textbf{I}nversion (PCI) framework, efficiently ``inverting'' a pre-trained surrogate oracle under the guidance of a non-dominated gradient, to generate candidate Pareto optimal molecules over preference-conditioned distributions. Additionally, we provide theoretical guarantees for PCI's capability in converging to preference-conditioned solutions. This unique characteristic enables PCI to search the full Pareto front approximately, thereby assisting in the discovery of diverse molecules with varying ratios of properties. Comprehensive experimental evaluations show that our model significantly outperforms state-of-the-art baselines in multi-objective molecular optimization settings.", "keywords": "Molecular Optimization;Multiple Properties", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Yifan Niu;Ziqi Gao;Tingyang Xu;Yatao Bian;Yu Rong;Jia Li", "authorids": "~Yifan_Niu1;~Ziqi_Gao1;~Tingyang_Xu1;~Yatao_Bian1;~Yu_Rong1;~Jia_Li4", "gender": "Non-Binary;;M;M;M;M", "homepage": "https://nyf0808.github.io/;;;https://royrong.me/;https://sites.google.com/view/lijia;https://yataobian.com", "dblp": ";;157/0940;24/10036-1;23/6950-9;222/2694", "google_scholar": ";https://scholar.google.com.hk/citations?user=UHwNFy8AAAAJ;6gIs5YMAAAAJ;https://scholar.google.com.hk/citations?user=itezhEMAAAAJ;1gSbcYoAAAAJ;oZBTlBkAAAAJ", "orcid": ";;0009-0002-0106-8376;0000-0001-7387-302X;0000-0002-6362-4385;0000-0002-2368-4084", "linkedin": ";;;;;", "or_profile": "~Yifan_Niu1;~Ziqi_Gao1;~Tingyang_Xu1;~Yu_Rong1;~Jia_Li4;~An_Bian1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Tencent AI Lab;Tencent AI Lab;Hong Kong University of Science and Technology (Guangzhou);Tencent AI Lab", "aff_domain": "connect.hkust-gz.edu.cn;ust.hk;tencent.com;tencent.com;ust.hk;tencent.com", "position": "PhD student;PhD student;Researcher;Principal Researcher;Assistant Professor;Senior researcher ", "bibtex": "@misc{\nniu2024tradingoff,\ntitle={Trading-off Multiple Properties for Molecular Optimization},\nauthor={Yifan Niu and Ziqi Gao and Tingyang Xu and Yatao Bian and Yu Rong and Jia Li},\nyear={2024},\nurl={https://openreview.net/forum?id=7ezBaMwOqY}\n}", "github": "", "project": "", "reviewers": "8aLW;dF44;xqpc;xC4n", "site": "https://openreview.net/forum?id=7ezBaMwOqY", "pdf_size": 1983659, "rating": "3;5;5;6", "confidence": "4;4;4;2", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "114;221;132;50", "wc_strengths": "31;202;165;23", "wc_weaknesses": "269;249;372;88", "wc_questions": "31;182;10;53", "wc_review": "445;854;679;214", "wc_reply_reviewers": "363;308;85;0", "wc_reply_authors": "653;1056;863;579", "reply_reviewers": "1;1;1;0", "reply_authors": "1;3;2;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 129.25, 61.11208963863042 ], "wc_strengths_avg": [ 105.25, 79.38631809071384 ], "wc_weaknesses_avg": [ 244.5, 101.69685344198217 ], "wc_questions_avg": [ 69.0, 66.9888050348713 ], "wc_review_avg": [ 548.0, 241.33068598916302 ], "wc_reply_reviewers_avg": [ 189.0, 150.8094824604872 ], "wc_reply_authors_avg": [ 787.75, 186.65124564277625 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:__VbIQAApc8J:scholar.google.com/&scioq=Trading-off+Multiple+Properties+for+Molecular+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;1;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.ust.hk;https://ai.tencent.com", "aff_unique_abbr": "HKUST;Tencent AI Lab", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7ffJo4vtTY", "title": "Robust multimodal models have outlier features and encode more concepts", "track": "main", "status": "Reject", "tldr": "", "abstract": "What distinguishes robust models from non-robust ones? This question has gained traction with the appearance of large-scale multimodal models, such as CLIP. These models have demonstrated unprecedented robustness with respect to natural distribution shifts. While it has been shown that such differences in robustness can be traced back to differences in training data, so far it is not known what that translates to in terms of what the model has learned. In this work, we bridge this gap by probing the representation spaces of 12 robust multimodal models with various backbones (ResNets and ViTs) and pretraining sets (OpenAI, LAION-400M, LAION-2B, YFCC15M, CC12M and DataComp). We find two signatures of robustness in the representation spaces of these models: (1) Robust models exhibit outlier features characterized by their activations, with some being several orders of magnitude above average. These outlier features induce privileged directions in the model's representation space. We demonstrate that these privileged directions explain most of the predictive power of the model by pruning up to $80 \\\\%$ of the least important representation space directions without negative impacts on model accuracy and robustness; (2) Robust models encode substantially more concepts in their representation space. While this superposition of concepts allows robust models to store much information, it also results in highly polysemantic features, which makes their interpretation challenging. We discuss how these insights pave the way for future research in various fields, such as model pruning and mechanistic interpretability.", "keywords": "interpretability;explainability;robustness", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Jonathan Crabb\u00e9;Pau Rodriguez;Vaishaal Shankar;Luca Zappella;Arno Blaas", "authorids": "~Jonathan_Crabb\u00e91;~Pau_Rodriguez2;~Vaishaal_Shankar1;~Luca_Zappella1;~Arno_Blaas1", "gender": "M;M;;;M", "homepage": "https://jonathancrabbe.github.io/;http://www.cis.jhu.edu/~luca/;https://github.com/arblox/;https://prlz77.github.io;http://vaishaal.com", "dblp": "278/8353.html;38/2520;;190/7735;159/3628", "google_scholar": "Y_Nmd2sAAAAJ;bmh6mxAAAAAJ;;https://scholar.google.es/citations?user=IwBx73wAAAAJ;", "orcid": "0000-0002-0341-7712;;;0000-0002-1689-8084;", "linkedin": "jonathan-crabb%C3%A9-4ab5701a5/;zappella?trk=people-guest_profile-result-card_result-card_full-click;;;", "or_profile": "~Jonathan_Crabb\u00e91;~Luca_Zappella1;~Arno_Blaas1;~Pau_Rodriguez_Lopez1;~vaishaal_naanny_shankar1", "aff": "University of Cambridge;Apple;Apple;Apple;Apple", "aff_domain": "cam.ac.uk;apple.com;apple.com;apple.com;apple.com", "position": "PhD student;Principal Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\ncrabb{\\'e}2024robust,\ntitle={Robust multimodal models have outlier features and encode more concepts},\nauthor={Jonathan Crabb{\\'e} and Pau Rodriguez and Vaishaal Shankar and Luca Zappella and Arno Blaas},\nyear={2024},\nurl={https://openreview.net/forum?id=7ffJo4vtTY}\n}", "github": "", "project": "", "reviewers": "8h4M;KW5N;qBSE;YHoY", "site": "https://openreview.net/forum?id=7ffJo4vtTY", "pdf_size": 2238262, "rating": "5;5;6;8", "confidence": "3;1;3;4", "soundness": "1;2;3;4", "contribution": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "103;25;163;63", "wc_strengths": "161;30;297;94", "wc_weaknesses": "649;90;245;64", "wc_questions": "105;2;143;90", "wc_review": "1018;147;848;311", "wc_reply_reviewers": "187;0;66;0", "wc_reply_authors": "2177;959;2389;940", "reply_reviewers": "1;0;2;0", "reply_authors": "5;3;6;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.5, 51.09549882328188 ], "wc_strengths_avg": [ 145.5, 98.97600719366285 ], "wc_weaknesses_avg": [ 262.0, 233.9048952031573 ], "wc_questions_avg": [ 85.0, 51.66720429827803 ], "wc_review_avg": [ 581.0, 361.7713366202469 ], "wc_reply_reviewers_avg": [ 63.25, 76.35893857303151 ], "wc_reply_authors_avg": [ 1616.25, 670.9833734899845 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.0, 1.5811388300841898 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7492686492653551, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1179695393162082103&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Cambridge;Apple", "aff_unique_dep": ";Apple Inc.", "aff_unique_url": "https://www.cam.ac.uk;https://www.apple.com", "aff_unique_abbr": "Cambridge;Apple", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "7fwzPsn1lJ", "title": "LLark: A Multimodal Foundation Model for Music", "track": "main", "status": "Reject", "tldr": "", "abstract": "Music has a unique and complex structure which is challenging for both expert humans and existing AI systems to understand, and presents unique challenges relative to other forms of audio. \nWe present LLark, an instruction-tuned multimodal model for music understanding. We detail our process for dataset creation, which involves augmenting the annotations of diverse open-source music datasets and converting them to a unified instruction-tuning format. We propose a multimodal architecture for LLark, integrating a pretrained generative model for music with a pretrained language model. \nIn evaluations on three types of tasks (music understanding, captioning, and reasoning), we show that our model outperforms existing baselines in zero-shot generalization for music understanding, and that humans show a high degree of agreement with the model's responses in captioning and reasoning tasks. LLark is trained entirely from open-source music data and models, and we make our training code available along with the release of this paper.\nAdditional results and audio examples are at https://bit.ly/3ZyzbGG .", "keywords": "multimodal;music;MIR;music captioning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/7e2f04752908ad969e6f6b23cea8a9708cfdfe76.zip", "author": "Joshua P Gardner;Simon Durand;Daniel Stoller;Rachel M Bittner", "authorids": "~Joshua_P_Gardner1;~Simon_Durand1;~Daniel_Stoller1;~Rachel_M_Bittner1", "gender": ";M;;", "homepage": ";https://scholar.google.com/citations?user=N2oBCKkAAAAJ&hl=en&oi=ao;;", "dblp": ";;;", "google_scholar": ";;;pXn1kQEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Joshua_P_Gardner1;~Simon_Durand1;~Daniel_Stoller1;~Rachel_M_Bittner1", "aff": ";;Queen Mary University London;Spotify", "aff_domain": ";;qmul.ac.uk;spotify.com", "position": ";;PhD student;Researcher", "bibtex": "@misc{\ngardner2024llark,\ntitle={{LL}ark: A Multimodal Foundation Model for Music},\nauthor={Joshua P Gardner and Simon Durand and Daniel Stoller and Rachel M Bittner},\nyear={2024},\nurl={https://openreview.net/forum?id=7fwzPsn1lJ}\n}", "github": "", "project": "", "reviewers": "p6wE;JrBS;zBmL;gHco", "site": "https://openreview.net/forum?id=7fwzPsn1lJ", "pdf_size": 1290670, "rating": "5;6;6;6", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "contribution": "2;3;4;2", "presentation": "2;4;3;3", "wc_summary": "138;47;72;105", "wc_strengths": "88;50;88;30", "wc_weaknesses": "1082;352;709;24", "wc_questions": "180;38;209;28", "wc_review": "1488;487;1078;187", "wc_reply_reviewers": "554;49;0;0", "wc_reply_authors": "3677;1229;3454;1624", "reply_reviewers": "1;1;0;0", "reply_authors": "8;4;7;5", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 90.5, 34.2819194328439 ], "wc_strengths_avg": [ 64.0, 25.019992006393608 ], "wc_weaknesses_avg": [ 541.75, 394.94073922551974 ], "wc_questions_avg": [ 113.75, 81.47507287508247 ], "wc_review_avg": [ 810.0, 505.9560257571798 ], "wc_reply_reviewers_avg": [ 150.75, 233.67431929931882 ], "wc_reply_authors_avg": [ 2496.0, 1081.4571188909897 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 6.0, 1.5811388300841898 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10782095578294234543&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Queen Mary University of London;Spotify", "aff_unique_dep": ";", "aff_unique_url": "https://www.qmul.ac.uk;https://www.spotify.com", "aff_unique_abbr": "QMUL;Spotify", "aff_campus_unique_index": "0", "aff_campus_unique": "London;", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Sweden" }, { "id": "7fxzVTSgZC", "title": "Offline Imitation Learning without Auxiliary High-quality Behavior Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this work, we study the problem of Offline Imitation Learning (OIL), where an agent aims to learn from the demonstrations composed of expert behaviors and sub-optimal behaviors without additional online environment interactions. Previous studies typically assume that there is high-quality behavioral data mixed in the auxiliary offline data and seriously degrades when only low-quality data from an off-policy distribution is available. In this work, we break through the bottleneck of OIL relying on auxiliary high-quality behavior data and make the first attempt to demonstrate that low-quality data is also helpful for OIL. Specifically, we utilize the transition information from offline data to maximize the policy transition probability towards expert-observed states. This guidance can improve long-term returns on states that are not observed by experts when reward signals are not available, ultimately enabling imitation learning to benefit from low-quality data. We instantiate our proposition in a simple but effective algorithm, Behavioral Cloning with Dynamic Programming (BCDP), which involves executing behavioral cloning on the expert data and dynamic programming on the unlabeled offline data respectively. In the experiments on benchmark tasks, unlike most existing offline imitation learning methods that do not utilize low-quality data sufficiently, our BCDP algorithm can still achieve an average performance gain of more than 40\\% even when the offline data is purely random exploration.", "keywords": "imitation learning;offline imitation learning;offline reinforcement learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/3d5912eba6101f92028da8bad15680f502afd76a.zip", "author": "Jie-Jing Shao;Hao-Sen Shi;Tian Xu;Lan-Zhe Guo;Yang Yu;Yu-Feng Li", "authorids": "~Jie-Jing_Shao1;~Hao-Sen_Shi1;~Tian_Xu2;~Lan-Zhe_Guo2;~Yang_Yu5;~Yu-Feng_Li1", "gender": "M;M;M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/shaojj/;http://www.lamda.nju.edu.cn/xut/;http://www.lamda.nju.edu.cn/guolz;https://www.lamda.nju.edu.cn/shihs/;http://www.lamda.nju.edu.cn/yuy;https://cs.nju.edu.cn/liyf/index.htm", "dblp": "299/4982;07/2985-3;216/4845;;46/2181-1;57/413", "google_scholar": "k1tEDpQAAAAJ;e5mnk1wAAAAJ;dpunvqgAAAAJ;;PG2lDSwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-8107-114X;;;;;0000-0002-2220-5248", "linkedin": ";;;;;", "or_profile": "~Jie-Jing_Shao1;~Tian_Xu2;~Lan-Zhe_Guo2;~Haosen_Shi2;~Yang_Yu2;~Yu-feng_Li2", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;PhD student;Assistant Professor;MS student;Professor;Assistant Professor", "bibtex": "@misc{\nshao2024offline,\ntitle={Offline Imitation Learning without Auxiliary High-quality Behavior Data},\nauthor={Jie-Jing Shao and Hao-Sen Shi and Tian Xu and Lan-Zhe Guo and Yang Yu and Yu-Feng Li},\nyear={2024},\nurl={https://openreview.net/forum?id=7fxzVTSgZC}\n}", "github": "", "project": "", "reviewers": "K5Tt;uGnS;Yds8;Xzkb", "site": "https://openreview.net/forum?id=7fxzVTSgZC", "pdf_size": 4346384, "rating": "5;6;6;8", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "90;105;133;89", "wc_strengths": "14;61;238;106", "wc_weaknesses": "479;316;27;168", "wc_questions": "124;129;59;13", "wc_review": "707;611;457;376", "wc_reply_reviewers": "386;56;28;0", "wc_reply_authors": "2060;946;234;91", "reply_reviewers": "2;1;1;0", "reply_authors": "4;2;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 104.25, 17.76759691123141 ], "wc_strengths_avg": [ 104.75, 83.5265676297069 ], "wc_weaknesses_avg": [ 247.5, 168.2446135839124 ], "wc_questions_avg": [ 81.25, 48.11639533464659 ], "wc_review_avg": [ 537.75, 129.1266335811478 ], "wc_reply_reviewers_avg": [ 117.5, 156.2777975273519 ], "wc_reply_authors_avg": [ 832.75, 779.0479365867033 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10838255184797337773&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Belief-Enriched Pessimistic Q-Learning against Adversarial State Perturbations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19351", "id": "7gDENzTzw1", "author_site": "Xiaolin Sun, Zizhan Zheng", "tldr": "", "abstract": "Reinforcement learning (RL) has achieved phenomenal success in various domains. However, its data-driven nature also introduces new vulnerabilities that can be exploited by malicious opponents. Recent work shows that a well-trained RL agent can be easily manipulated by strategically perturbing its state observations at the test stage. Existing solutions either introduce a regularization term to improve the smoothness of the trained policy against perturbations or alternatively train the agent's policy and the attacker's policy. However, the former does not provide sufficient protection against strong attacks, while the latter is computationally prohibitive for large environments. In this work, we propose a new robust RL algorithm for deriving a pessimistic policy to safeguard against an agent's uncertainty about true states. This approach is further enhanced with belief state inference and diffusion-based state purification to reduce uncertainty. Empirical results show that our approach obtains superb performance under strong attacks and has a comparable training overhead with regularization-based methods. Our code is available at https://github.com/SliencerX/Belief-enriched-robust-Q-learning.", "keywords": "Reinforcement Learning;Robustness;Adversarial Attack;Adversarial Defense", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/fc5a3b1becca02090173a605a76086e275a0e612.zip", "author": "Xiaolin Sun;Zizhan Zheng", "authorids": "~Xiaolin_Sun1;~Zizhan_Zheng1", "gender": "M;M", "homepage": "https://xsun01.wixsite.com/mysite-1;https://www.cs.tulane.edu/~zzheng3/", "dblp": "26/;23/286", "google_scholar": "6VM5rAYAAAAJ;B1v2AUYAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xiaolin_Sun1;~Zizhan_Zheng1", "aff": "Tulane University;Tulane University", "aff_domain": "tulane.edu;tulane.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nsun2024beliefenriched,\ntitle={Belief-Enriched Pessimistic Q-Learning against Adversarial State Perturbations},\nauthor={Xiaolin Sun and Zizhan Zheng},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7gDENzTzw1}\n}", "github": "", "project": "", "reviewers": "achV;cDHy;GsDy;QHWx", "pdf_size": 663907, "rating": "5;6;6;8", "confidence": "4;2;2;4", "soundness": "2;3;3;3", "contribution": "3;3;2;4", "presentation": "3;3;3;3", "wc_summary": "64;46;76;64", "wc_strengths": "55;19;47;28", "wc_weaknesses": "112;12;40;57", "wc_questions": "39;130;66;2", "wc_review": "270;207;229;151", "wc_reply_reviewers": "131;0;21;21", "wc_reply_authors": "1224;989;480;406", "reply_reviewers": "1;0;1;1", "reply_authors": "3;3;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 62.5, 10.712142642814275 ], "wc_strengths_avg": [ 37.25, 14.394009170484782 ], "wc_weaknesses_avg": [ 55.25, 36.49229370702806 ], "wc_questions_avg": [ 59.25, 46.74064077438391 ], "wc_review_avg": [ 214.25, 42.949825377991935 ], "wc_reply_reviewers_avg": [ 43.25, 51.38275488916491 ], "wc_reply_authors_avg": [ 774.75, 342.9951712488093 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PeTPByT4qxQJ:scholar.google.com/&scioq=Belief-Enriched+Pessimistic+Q-Learning+against+Adversarial+State+Perturbations&hl=en&as_sdt=0,33", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=7gDENzTzw1", "pdf": "https://openreview.net/pdf?id=7gDENzTzw1", "email": "tulane.edu;tulane.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Tulane University", "aff_unique_dep": "", "aff_unique_url": "https://www.tulane.edu", "aff_unique_abbr": "Tulane", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Proper Laplacian Representation Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19350", "id": "7gLfQT52Nn", "author_site": "Diego Gomez, Michael Bowling, Marlos C. Machado", "tldr": "", "abstract": "The ability to learn good representations of states is essential for solving large reinforcement learning problems, where exploration, generalization, and transfer are particularly challenging. The _Laplacian representation_ is a promising approach to address these problems by inducing informative state encoding and intrinsic rewards for temporally-extended action discovery and reward shaping. To obtain the Laplacian representation one needs to compute the eigensystem of the graph Laplacian, which is often approximated through optimization objectives compatible with deep learning approaches. These approximations, however, depend on hyperparameters that are impossible to tune efficiently, converge to arbitrary rotations of the desired eigenvectors, and are unable to accurately recover the corresponding eigenvalues. In this paper we introduce a theoretically sound objective and corresponding optimization algorithm for approximating the Laplacian representation. Our approach naturally recovers both the true eigenvectors and eigenvalues while eliminating the hyperparameter dependence of previous approximations. We provide theoretical guarantees for our method and we show that those results translate empirically into robust learning across multiple environments.", "keywords": "Reinforcement learning;Graph Laplacian;Representation learning;Augmented Lagrangian optimization;Hyperparameter robustness", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Diego Gomez;Michael Bowling;Marlos C. Machado", "authorids": "~Diego_Gomez1;~Michael_Bowling1;~Marlos_C._Machado1", "gender": "M;M;M", "homepage": ";https://webdocs.cs.ualberta.ca/~bowling/;https://webdocs.cs.ualberta.ca/~machado/", "dblp": ";71/5161;21/10949", "google_scholar": "wj8Edy4AAAAJ;https://scholar.google.ca/citations?user=PYtPCHoAAAAJ;https://scholar.google.ca/citations?user=xf_n4xUAAAAJ", "orcid": "0000-0002-4625-233X;;", "linkedin": ";;cholodovskis/", "or_profile": "~Diego_Gomez1;~Michael_Bowling1;~Marlos_C._Machado1", "aff": "University of Alberta;Department of Computing Science, University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;cs.ualberta.ca;ualberta.ca", "position": "Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ngomez2024proper,\ntitle={Proper Laplacian Representation Learning},\nauthor={Diego Gomez and Michael Bowling and Marlos C. Machado},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7gLfQT52Nn}\n}", "github": "", "project": "", "reviewers": "DES2;tVNo;Fii6;nARE", "pdf_size": 5740877, "rating": "5;6;6;6", "confidence": "3;4;1;3", "soundness": "3;4;3;3", "contribution": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "66;78;85;56", "wc_strengths": "70;112;11;57", "wc_weaknesses": "143;197;12;21", "wc_questions": "3;43;41;91", "wc_review": "282;430;149;225", "wc_reply_reviewers": "0;284;0;109", "wc_reply_authors": "18;743;172;710", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.25, 11.121488209767612 ], "wc_strengths_avg": [ 62.5, 36.01735692690401 ], "wc_weaknesses_avg": [ 93.25, 79.15293740601166 ], "wc_questions_avg": [ 44.5, 31.22098653149833 ], "wc_review_avg": [ 271.5, 102.95751551003939 ], "wc_reply_reviewers_avg": [ 98.25, 116.10851605287185 ], "wc_reply_authors_avg": [ 410.75, 320.62234404358037 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15160893217759249782&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7gLfQT52Nn", "pdf": "https://openreview.net/pdf?id=7gLfQT52Nn", "email": "ualberta.ca;cs.ualberta.ca;ualberta.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "7gUmlgc9q0", "title": "Towards More Accurate Diffusion Model Acceleration with A Timestep Aligner", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "A diffusion model, which is formulated to produce an image using thousands of denoising steps, usually suffers from a slow inference speed. Existing acceleration algorithms simplify the sampling by skipping most steps yet observe considerable performance degradation. By viewing the generation of diffusion models as a discretized integrating process, we argue that the quality drop is partly caused by applying an inaccurate integral direction to a timestep interval. To rectify such inaccuracy, we propose a \\textbf{timestep aligner} that helps find a more accurate integral direction for a particular interval at the minimum cost. Specifically, at each denoising step, we replace the original parameterization by conditioning the network on a new timestep, which is obtained by aligning the sampling distribution to the real distribution. Extensive experiments show that our plug-in design can be trained efficiently and boost the inference performance of various state-of-the-art acceleration methods, especially for the one with few denoising steps. For example, when using 10 denoising steps on the popular LSUN Bedroom dataset, we improve the FID of DDIM from 9.65 to 6.07, simply by adopting our method for a more appropriate set of timesteps. Code will be made publicly available.", "keywords": "Generative model;diffusion model", "primary_area": "generative models", "supplementary_material": "", "author": "Mengfei Xia;Yujun Shen;Changsong Lei;Yu Zhou;Ran Yi;Deli Zhao;Wenping Wang;Yong-jin Liu", "authorids": "~Mengfei_Xia1;~Yujun_Shen1;~Changsong_Lei2;~Yu_Zhou17;~Ran_Yi1;~Deli_Zhao1;~Wenping_Wang1;~Yong-jin_Liu1", "gender": "M;;M;M;F;M;M;M", "homepage": "https://thuxmf.github.io/;;https://github.com/lcshhh;https://scholar.google.com/citations?hl=zh-CN&user=pMXjhxkAAAAJ;https://yiranran.github.io/;https://zhaodeli.github.io;https://engineering.tamu.edu/cse/profiles/Wang-Wenping.html;https://cg.cs.tsinghua.edu.cn/people/~Yongjin/Yongjin.htm", "dblp": "301/3569;;;;136/5469;77/1992;;27/2098", "google_scholar": "jmOlxQ0AAAAJ;;;;https://scholar.google.com.hk/citations?user=y68DLo4AAAAJ;https://scholar.google.com/citations?hl=en;28shvv0AAAAJ;https://scholar.google.com.tw/citations?user=GNDtwWQAAAAJ", "orcid": ";;;;0000-0003-1858-3358;0000-0002-8838-578X;0000-0002-2284-3952;0000-0001-5774-1916", "linkedin": ";;;;;;;", "or_profile": "~Mengfei_Xia1;~Yujun_Shen1;~Changsong_Lei2;~Yu_Zhou17;~Ran_Yi1;~Deli_Zhao1;~Wenping_Wang1;~Yong-jin_Liu1", "aff": "Tsinghua University;;Tsinghua University;Tsinghua University;Shanghai Jiaotong University;Alibaba Group;Texas A&M University - College Station;Tsinghua University", "aff_domain": "tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;sjtu.edu.cn;alibaba-inc.com;tamu.edu;tsinghua.edu.cn", "position": "PhD student;;PhD student;Undergrad student;Assistant Professor;Director;Full Professor;Full Professor", "bibtex": "@misc{\nxia2024towards,\ntitle={Towards More Accurate Diffusion Model Acceleration with A Timestep Aligner},\nauthor={Mengfei Xia and Yujun Shen and Changsong Lei and Yu Zhou and Ran Yi and Deli Zhao and Wenping Wang and Yong-jin Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=7gUmlgc9q0}\n}", "github": "", "project": "", "reviewers": "koqD;n1ad;j67R", "site": "https://openreview.net/forum?id=7gUmlgc9q0", "pdf_size": 3979255, "rating": "3;3;5", "confidence": "4;4;2", "soundness": "2;2;3", "contribution": "1;2;2", "presentation": "2;3;3", "wc_summary": "75;67;49", "wc_strengths": "20;59;25", "wc_weaknesses": "424;251;114", "wc_questions": "2;39;3", "wc_review": "521;416;191", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 63.666666666666664, 10.873004286866726 ], "wc_strengths_avg": [ 34.666666666666664, 17.326921891156033 ], "wc_weaknesses_avg": [ 263.0, 126.8411079526928 ], "wc_questions_avg": [ 14.666666666666666, 17.21110752456745 ], "wc_review_avg": [ 376.0, 137.65899897936205 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10405448026401765017&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;2;3;0", "aff_unique_norm": "Tsinghua University;Shanghai Jiao Tong University;Alibaba Group;Texas A&M University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.sjtu.edu.cn;https://www.alibaba.com;https://www.tamu.edu", "aff_unique_abbr": "THU;SJTU;Alibaba;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "EQA-MX: Embodied Question Answering using Multimodal Expression", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19349", "id": "7gUrYE50Rb", "author_site": "Md Mofijul Islam, Alexi Gladstone, Riashat Islam, Tariq Iqbal", "tldr": "", "abstract": "Humans predominantly use verbal utterances and nonverbal gestures (e.g., eye gaze and pointing gestures) in their natural interactions. For instance, pointing gestures and verbal information is often required to comprehend questions such as \"what object is that?\" Thus, this question-answering (QA) task involves complex reasoning of multimodal expressions (verbal utterances and nonverbal gestures). However, prior works have explored QA tasks in non-embodied settings, where questions solely contain verbal utterances from a single verbal and visual perspective. In this paper, we have introduced 8 novel embodied question answering (EQA) tasks to develop learning models to comprehend embodied questions with multimodal expressions. We have developed a novel large-scale dataset, EQA-MX, with over 8 million diverse embodied QA data samples involving multimodal expressions from multiple visual and verbal perspectives. To learn salient multimodal representations from discrete verbal embeddings and continuous wrapping of multiview visual representations, we propose a vector-quantization (VQ) based multimodal representation learning model, VQ-Fusion, for the EQA tasks. Our extensive experimental results suggest that VQ-Fusion can improve the performance of existing state-of-the-art visual-language models up to 13% across EQA tasks.", "keywords": "multimodal representation learning;visual-language models;embodied question answering", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Md Mofijul Islam;Alexi Gladstone;Riashat Islam;Tariq Iqbal", "authorids": "~Md_Mofijul_Islam1;~Alexi_Gladstone1;~Riashat_Islam1;~Tariq_Iqbal1", "gender": "M;M;M;", "homepage": "http://mmiakashs.github.io;https://alexiglad.github.io/;https://riashat.github.io/;http://www.tiqbal.com", "dblp": "271/8379;346/0923;198/0459;159/0463", "google_scholar": "FYy4ZxYAAAAJ;j9Cx6PcAAAAJ;https://scholar.google.ca/citations?user=2_4Rs44AAAAJ;t_ndTI4AAAAJ", "orcid": ";;;", "linkedin": "beingmiakashs;alexiglad/;;", "or_profile": "~Md_Mofijul_Islam1;~Alexi_Gladstone1;~Riashat_Islam1;~Tariq_Iqbal1", "aff": "Amazon;University of Virginia, Charlottesville;Saudi Data and AI Authority, Saudi Data and AI Authority;University of Virginia", "aff_domain": "amazon.com;virginia.edu;sdaia.gov.sa;virginia.edu", "position": "Applied Scientist;Undergrad student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nislam2024eqamx,\ntitle={{EQA}-{MX}: Embodied Question Answering using Multimodal Expression},\nauthor={Md Mofijul Islam and Alexi Gladstone and Riashat Islam and Tariq Iqbal},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7gUrYE50Rb}\n}", "github": "", "project": "", "reviewers": "BG1y;4CbJ;9x3E;7hsY", "pdf_size": 23982438, "rating": "8;8;8;8", "confidence": "3;4;4;4", "soundness": "4;4;2;3", "contribution": "4;3;3;3", "presentation": "4;2;2;4", "wc_summary": "82;56;83;125", "wc_strengths": "99;99;96;127", "wc_weaknesses": "121;142;627;154", "wc_questions": "77;2;255;21", "wc_review": "379;299;1061;427", "wc_reply_reviewers": "36;72;419;30", "wc_reply_authors": "1135;960;4566;1050", "reply_reviewers": "1;1;2;1", "reply_authors": "7;6;14;5", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 86.5, 24.72347063015223 ], "wc_strengths_avg": [ 105.25, 12.616952880945542 ], "wc_weaknesses_avg": [ 261.0, 211.64002457002314 ], "wc_questions_avg": [ 88.75, 99.86584751555459 ], "wc_review_avg": [ 541.5, 303.39866512560667 ], "wc_reply_reviewers_avg": [ 139.25, 162.31046639080304 ], "wc_reply_authors_avg": [ 1927.75, 1524.450782249135 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 8.0, 3.5355339059327378 ], "replies_avg": [ 43, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16387022272988085046&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=7gUrYE50Rb", "pdf": "https://openreview.net/pdf?id=7gUrYE50Rb", "email": "amazon.com;virginia.edu;sdaia.gov.sa;virginia.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Amazon;University of Virginia;Saudi Data and AI Authority", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.virginia.edu;https://sdaia.gov.sa", "aff_unique_abbr": "Amazon;UVA;SDAIA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Saudi Arabia" }, { "id": "7gVX2LxE7A", "title": "SpecAR-Net: Spectrogram Analysis and Representation Network for Time Series", "track": "main", "status": "Reject", "tldr": "", "abstract": "Time series analysis involves modeling time series to extract valuable information, which finds broad applications in domains such as device malfunction diagnosis, human activity recognition, and medical-assisted diagnosis. Representing temporal-structured samples is crucial for time series analysis tasks. Recently, several advanced deep learning models, i.e., recurrent neural networks, convolutional neural networks, and transformer-style models, have been successively applied in the field of temporal data representation, yielding notable results. Those existing methods primarily model and represent the variation patterns within time series solely in time domain. However, as a highly abstracted information entity, time series data is formed by the coupling of various patterns such as trends, seasonality, and dramatic changes (instantaneous high dynamic), it is difficult to exploit these highly coupled properties only by means of analysis in the time domain. Consequently, it would be insufficient for time-domain dependent only methods to overcome the semantic representation bottleneck or construct comprehensive feature representations of 1D time series. To this end, we present Spectrum Analysis and Representation Network (SpecAR-Net). SpecAR-Net aims at learning more comprehensive representations by modeling raw time series in time-frequency domain, where an efficient joint extraction of time-frequency features is achieved through a group of learnable 2D multi-scale parallel complex convolution blocks. Experimental results show that the SpecAR-Net achieves excellent performance in five major downstream tasks of time series analysis i.e., classification, anomaly detection, imputation, long- and short-term series forecasting.", "keywords": "Time series analysis;time series representation;time-frequency transformation;complex convolution", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/e39cdbb7be7beb449f1098b31f8d10e2b35451ae.pdf", "author": "Y Dong;Liwen Zhang;Youcheng Zhang;Shi Peng;Wen Chen;Zhe Ma;Xuhui Huang", "authorids": "~Y_Dong1;~Liwen_Zhang5;~Youcheng_Zhang1;~Shi_Peng2;~Wen_Chen7;~Zhe_Ma2;~Xuhui_Huang1", "gender": "M;M;F;;;M;M", "homepage": ";;;;https://scholar.google.com/citations?user=wZWfbLUAAAAJ&hl=zh-CN;https://dblp.org/pid/22/6672;", "dblp": ";94/905;;;;22/6672-1;", "google_scholar": "mb6jJaQAAAAJ;;;Z5UHCdUAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-8457-2943;0000-0001-9762-7966;;;;", "linkedin": ";;;;;;", "or_profile": "~Y_Dong1;~Liwen_Zhang5;~Youcheng_Zhang1;~Shi_Peng2;~Wen_Chen7;~Zhe_Ma2;~Xuhui_Huang1", "aff": ";Harbin Institute of Technology;Intelligent Science and Technology Academy of CASIC;;;Intelligent science and technology academy limited of CASIC;Intelligent Science and Technology Academy of CASIC", "aff_domain": ";hit.edu.cn;casic.com.cn;;;casic.com;casic.com.cn", "position": ";Researcher;Engineer;;;Full Professor;Full Professor", "bibtex": "@misc{\ndong2024specarnet,\ntitle={Spec{AR}-Net: Spectrogram Analysis and Representation Network for Time Series},\nauthor={Y Dong and Liwen Zhang and Youcheng Zhang and Shi Peng and Wen Chen and Zhe Ma and Xuhui Huang},\nyear={2024},\nurl={https://openreview.net/forum?id=7gVX2LxE7A}\n}", "github": "", "project": "", "reviewers": "13Yj;forZ;hAhz", "site": "https://openreview.net/forum?id=7gVX2LxE7A", "pdf_size": 543364, "rating": "3;3;5", "confidence": "3;4;3", "soundness": "1;2;2", "contribution": "2;2;1", "presentation": "2;2;2", "wc_summary": "37;67;56", "wc_strengths": "18;31;45", "wc_weaknesses": "160;179;196", "wc_questions": "179;53;42", "wc_review": "394;330;339", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 53.333333333333336, 12.39175353029407 ], "wc_strengths_avg": [ 31.333333333333332, 11.025223605694151 ], "wc_weaknesses_avg": [ 178.33333333333334, 14.704496666741854 ], "wc_questions_avg": [ 91.33333333333333, 62.15214307559224 ], "wc_review_avg": [ 354.3333333333333, 28.288199345702832 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R06JdPgvNxkJ:scholar.google.com/&scioq=SpecAR-Net:+Spectrogram+Analysis+and+Representation+Network+for+Time+Series&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Harbin Institute of Technology;China Aerospace Science and Industry Corporation", "aff_unique_dep": ";Intelligent Science and Technology Academy", "aff_unique_url": "http://www.hit.edu.cn/;http://www.casic.com.cn/", "aff_unique_abbr": "HIT;CASIC", "aff_campus_unique_index": "0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "7gg2PcT4HJ", "title": "Hybrid Representation Learning Via Epistemic Graph", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In recent years, deep models have achieved remarkable success in many vision tasks. Unfortunately, their performance largely depends on intensive training samples. In contrast, human beings typically perform hybrid learning, e.g., spontaneously integrating structured knowledge for cross-domain recognition or on a much smaller amount of data samples for few-shot learning. Thus it is very attractive to extend hybrid learning for the computer vision tasks by seamlessly integrating structured knowledge with data samples to achieve more effective representation learning. However, such a hybrid learning approach remains a great challenge due to the huge gap between the structured knowledge and the deep features (learned from data samples) on both dimensions and knowledge granularity. In this paper, a novel Epistemic Graph Layer (EGLayer) is developed to enable hybrid learning, such that the information can be exchanged more effectively between the deep features and a structured knowledge graph. Our EGLayer is composed of three major parts: (a) a local graph module to establish a local prototypical graph through the learned deep features, i.e., aligning the deep features with the structured knowledge graph at the same granularity; (b) a query aggregation model to aggregate useful information from the local graphs, and using such representations to compute their similarity with global node embeddings for final prediction; and (c) a novel correlation alignment loss function to constrain the linear consistency between the local and global adjacency matrices from both cosine similarity and Euclidean space. EGLayer is a plug-and-play module that can replace the standard linear classifier, significantly improving the performance of deep models. Extensive experiments have demonstrated that EGLayer can greatly enhance representation learning for the tasks of cross-domain recognition and few-shot learning, and the visualization of knowledge graphs can aid in model interpretation.", "keywords": "Representation Learning;Graph Network;Hybrid Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/3c40121da7316595f08288db1e08056fe8e1939f.zip", "author": "Jin Yuan;Yang Zhang;Yangzhou Du;zhongchao shi;Xin Geng;Jianping Fan;Yong Rui", "authorids": "~Jin_Yuan2;~Yang_Zhang23;~Yangzhou_Du2;~zhongchao_shi1;~Xin_Geng1;~Jianping_Fan4;~Yong_Rui2", "gender": "M;M;M;M;M;M;M", "homepage": "https://www.researchgate.net/profile/Jin-Yuan-19;;;;http://palm.seu.edu.cn/xgeng/index.htm;;", "dblp": "98/609.html;06/6785-2;40/951.html;45/5323;;69/2360.html;r/YongRui", "google_scholar": "https://scholar.google.com.hk/citations?user=S1JGPCMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;GASgQxEAAAAJ;ZOCxkIcAAAAJ;;rCGsLtcAAAAJ", "orcid": "0000-0002-9954-0693;;;;;;", "linkedin": ";;;;;;", "or_profile": "~Jin_Yuan2;~Yang_Zhang23;~Yangzhou_Du2;~zhongchao_shi1;~Xin_Geng1;~Jianping_Fan4;~Yong_Rui2", "aff": "Southeast University, Tsinghua University;Lenovo Research, AI Lab;Lenovo;Lenovo Research;Southeast University, China;Northwest University;Lenovo", "aff_domain": "seu.edu.cn;lenovo.com;lenovo.com;lenovo.com;seu.edu.cn;nwu.edu.cn;lenovo.com", "position": "PhD student;Researcher;Researcher;Research Scientist;Professor;Full Professor;Full Professor", "bibtex": "@misc{\nyuan2024hybrid,\ntitle={Hybrid Representation Learning Via Epistemic Graph},\nauthor={Jin Yuan and Yang Zhang and Yangzhou Du and zhongchao shi and Xin Geng and Jianping Fan and Yong Rui},\nyear={2024},\nurl={https://openreview.net/forum?id=7gg2PcT4HJ}\n}", "github": "", "project": "", "reviewers": "EhZr;rxaN;kGHn", "site": "https://openreview.net/forum?id=7gg2PcT4HJ", "pdf_size": 664169, "rating": "3;3;5", "confidence": "5;3;4", "soundness": "2;2;3", "contribution": "1;1;2", "presentation": "2;1;1", "wc_summary": "61;64;50", "wc_strengths": "48;44;32", "wc_weaknesses": "121;177;177", "wc_questions": "5;148;127", "wc_review": "235;433;386", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 58.333333333333336, 6.018490028422596 ], "wc_strengths_avg": [ 41.333333333333336, 6.79869268479038 ], "wc_weaknesses_avg": [ 158.33333333333334, 26.398653164297777 ], "wc_questions_avg": [ 93.33333333333333, 63.04671989000609 ], "wc_review_avg": [ 351.3333333333333, 84.46827149225257 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PZfZqTSX1KcJ:scholar.google.com/&scioq=Hybrid+Representation+Learning+Via+Epistemic+Graph&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;0;4;2", "aff_unique_norm": "Southeast University;Lenovo Research;Lenovo Group Limited;Lenovo;Northwest University", "aff_unique_dep": ";AI Lab;;Research;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.lenovo.com;https://www.lenovo.com;https://www.lenovo.com;https://www.nwu.edu.cn", "aff_unique_abbr": "SEU;Lenovo;Lenovo;Lenovo;NWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7hqNl9nP81", "title": "On Memorization and Privacy Risks of Sharpness Aware Minimization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In many recent works, there is an increased focus on designing algorithms that seek flatter optima for neural network loss optimization as there is empirical evidence that it leads to better generalization performance in many datasets. In this work, we dissect these performance gains through the lens of data memorization in overparameterized models. We define a new metric that helps us identify which data points specifically do algorithms seeking flatter optima do better when compared to vanilla SGD. We find that the generalization gains achieved by Sharpness Aware Minimization (SAM) are particularly pronounced for atypical data points, which necessitate memorization. This insight helps us unearth higher privacy risks associated with SAM, which we verify through exhaustive empirical evaluations. Finally, we propose mitigation strategies to achieve a more desirable accuracy vs privacy tradeoff.", "keywords": "SAM;privacy;memorization;generalization;sharpness aware minimization;flat minima;wider minima;sharper minima;membership inference attack", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/8936d8989ca8947a90720a34402c720b5d68dd5c.pdf", "author": "Young In Kim;Pratiksha Agrawal;Johannes Royset;Rajiv Khanna", "authorids": "~Young_In_Kim1;~Pratiksha_Agrawal2;~Johannes_Royset1;~Rajiv_Khanna1", "gender": "M;;;M", "homepage": ";;https://faculty.nps.edu/joroyset/;http://rjvak7.github.io/", "dblp": ";;;31/4624", "google_scholar": ";;vdegcWUAAAAJ;523w4w8AAAAJ", "orcid": ";;;0000-0003-1314-3126", "linkedin": "https://linkedin.com/in/kim-young-in-8034a3276/;pratiksha-agrawal-9696b5127/;;", "or_profile": "~Young_In_Kim1;~Pratiksha_Agrawal2;~Johannes_Royset1;~Rajiv_Khanna1", "aff": "Vanderbilt University;;Naval Postgraduate School;Purdue University", "aff_domain": "vanderbilt.edu;;nps.edu;purdue.edu", "position": "Undergrad student;;Full Professor;Assistant Professor", "bibtex": "@misc{\nkim2024on,\ntitle={On Memorization and Privacy Risks of Sharpness Aware Minimization},\nauthor={Young In Kim and Pratiksha Agrawal and Johannes Royset and Rajiv Khanna},\nyear={2024},\nurl={https://openreview.net/forum?id=7hqNl9nP81}\n}", "github": "", "project": "", "reviewers": "3uQd;gjgb;Dp9T;AV1Z;mP5k", "site": "https://openreview.net/forum?id=7hqNl9nP81", "pdf_size": 2034846, "rating": "3;3;5;5;5", "confidence": "4;4;3;2;3", "soundness": "2;2;2;2;2", "contribution": "3;2;2;2;2", "presentation": "1;2;3;2;3", "wc_summary": "46;135;53;104;96", "wc_strengths": "72;51;51;61;19", "wc_weaknesses": "586;448;191;113;129", "wc_questions": "2;128;2;115;16", "wc_review": "706;762;297;393;260", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.2, 0.9797958971132712 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 86.8, 33.19879515886081 ], "wc_strengths_avg": [ 50.8, 17.690675509996783 ], "wc_weaknesses_avg": [ 293.4, 189.51158275947145 ], "wc_questions_avg": [ 52.6, 56.63779656731007 ], "wc_review_avg": [ 483.6, 209.75852783617643 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8728715609439696, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11624328370537611340&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Vanderbilt University;Naval Postgraduate School;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vanderbilt.edu;https://www.nps.edu;https://www.purdue.edu", "aff_unique_abbr": "Vanderbilt;NPS;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Continuous-Multiple Image Outpainting in One-Step via Positional Query and A Diffusion-based Approach", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19348", "id": "7hxoYxKDTV", "author_site": "Shaofeng Zhang, Jinfa Huang, Qiang Zhou, zhibin wang, Fan Wang, Jiebo Luo, Junchi Yan", "tldr": "", "abstract": "Image outpainting aims to generate the content of an input sub-image beyond its original boundaries. It is an important task in content generation yet remains an open problem for generative models. This paper pushes the technical frontier of image outpainting in two directions that have not been resolved in literature: 1) outpainting with arbitrary and continuous multiples (without restriction), and 2) outpainting in a single step (even for large expansion multiples). Moreover, we develop a method that does not depend on a pre-trained backbone network, which is in contrast commonly required by the previous SOTA outpainting methods. The arbitrary multiple outpainting is achieved by utilizing randomly cropped views from the same image during training to capture arbitrary relative positional information. Specifically, by feeding one view and positional embeddings as queries, we can reconstruct another view. At inference, we generate images with arbitrary expansion multiples by inputting an anchor image and its corresponding positional embeddings. The one-step outpainting ability here is particularly noteworthy in contrast to previous methods that need to be performed for $N$ times to obtain a final multiple which is $N$ times of its basic and fixed multiple. We evaluate the proposed approach (called PQDiff as we adopt a diffusion-based generator as our embodiment, under our proposed \\textbf{P}ositional \\textbf{Q}uery scheme) on public benchmarks, demonstrating its superior performance over state-of-the-art approaches. Specifically, PQDiff achieves state-of-the-art FID scores on the Scenery (\\textbf{21.512}), Building Facades (\\textbf{25.310}), and WikiArts (\\textbf{36.212}) datasets. Furthermore, under the 2.25x, 5x and 11.7x outpainting settings, PQDiff only takes \\textbf{40.6\\%}, \\textbf{20.3\\%} and \\textbf{10.2\\%} of the time of the benchmark state-of-the-art (SOTA) method.", "keywords": "Diffusion models;image outpainting", "primary_area": "generative models", "supplementary_material": "", "author": "Shaofeng Zhang;Jinfa Huang;Qiang Zhou;zhibin wang;Fan Wang;Jiebo Luo;Junchi Yan", "authorids": "~Shaofeng_Zhang1;~Jinfa_Huang2;~Qiang_Zhou8;~zhibin_wang2;~Fan_Wang6;~Jiebo_Luo1;~Junchi_Yan2", "gender": "M;M;M;F;M;M;M", "homepage": "https://sherrylone.github.io;https://github.com/inFaaa;https://mightyzau.github.io/;;https://www.cs.rochester.edu/u/jluo/;http://thinklab.sjtu.edu.cn/;", "dblp": "132/2540;39/9426;;;25/5545;60/7949.html;", "google_scholar": "VoVVJIgAAAAJ;https://scholar.google.com/citations?hl=en;;WCRGTHsAAAAJ;CcbnBvgAAAAJ;ga230VoAAAAJ;YHzKee8AAAAJ", "orcid": ";;0000-0003-3697-9348;0000-0001-7320-1119;0000-0002-4516-9729;0000-0001-9639-7679;0000-0001-7618-7973", "linkedin": ";;;;jieboluo/;;", "or_profile": "~Shaofeng_Zhang1;~Jinfa_Huang2;~Qiang_Zhou8;~Fan_Wang6;~Jiebo_Luo3;~Junchi_Yan1;~Zhibin_Wang1", "aff": "Shanghai Jiaotong University;University of Rochester;Alibaba Group;Alibaba Group;University of Rochester;Shanghai Jiaotong University;INF Tech", "aff_domain": "sjtu.edu.cn;rochester.edu;alibaba-inc.com;alibaba-inc.com;rochester.edu;sjtu.edu.cn;inftech.ai", "position": "PhD student;PhD student;Researcher;Senior Staff Algorithm Engineer;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhang2024continuousmultiple,\ntitle={Continuous-Multiple Image Outpainting in One-Step via Positional Query and A Diffusion-based Approach},\nauthor={Shaofeng Zhang and Jinfa Huang and Qiang Zhou and zhibin wang and Fan Wang and Jiebo Luo and Junchi Yan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7hxoYxKDTV}\n}", "github": "", "project": "", "reviewers": "xgEg;HK74;urTZ", "pdf_size": 4340389, "rating": "6;6;8", "confidence": "5;4;4", "soundness": "3;3;3", "contribution": "3;3;4", "presentation": "2;3;3", "wc_summary": "44;63;47", "wc_strengths": "36;118;52", "wc_weaknesses": "340;27;30", "wc_questions": "31;21;178", "wc_review": "451;229;307", "wc_reply_reviewers": "61;0;58", "wc_reply_authors": "974;230;439", "reply_reviewers": "1;0;1", "reply_authors": "3;1;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 51.333333333333336, 8.339997335464536 ], "wc_strengths_avg": [ 68.66666666666667, 35.490217744549774 ], "wc_weaknesses_avg": [ 132.33333333333334, 146.84761565044977 ], "wc_questions_avg": [ 76.66666666666667, 71.76969338717345 ], "wc_review_avg": [ 329.0, 91.9565114605812 ], "wc_reply_reviewers_avg": [ 39.666666666666664, 28.075295585660754 ], "wc_reply_authors_avg": [ 547.6666666666666, 313.3053178964925 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8223303092029418712&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7hxoYxKDTV", "pdf": "https://openreview.net/pdf?id=7hxoYxKDTV", "email": "sjtu.edu.cn;rochester.edu;alibaba-inc.com;alibaba-inc.com;rochester.edu;sjtu.edu.cn;inftech.ai", "author_num": 7, "aff_unique_index": "0;1;2;2;1;0;3", "aff_unique_norm": "Shanghai Jiao Tong University;University of Rochester;Alibaba Group;INF Tech", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.rochester.edu;https://www.alibaba.com;", "aff_unique_abbr": "SJTU;U of R;Alibaba;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "China;United States;" }, { "id": "7iCUSBlOgh", "title": "Toward Generalizability of Graph-based Imputation on Bio-Medical Missing Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent work on graph-based imputation methods for missing features has garnered significant attention, largely due to the effectiveness of their ability to aggregate and propagate information through graph structures. However, these methods generally assume that the graph structure is readily available and manually mask the original features to simulate the scenario of missing features. This set of assumptions narrows the applicability of such techniques to real-world tabular data, where graph structure is not readily available and missing data is a prevalent issue, such as in cases involving confidential patient information. In light of this situation, and with the aim of enhancing generalizability, we propose GRASS that bridges the gap between recent graph-based imputation methods and real-world scenarios involving missing data in their initial states. Specifically, our approach begins with tabular data and employs a simple Multi-Layer Perceptron (MLP) layer to extract feature gradient, which serves as an additional resource for generating graph structures. Leveraging these gradients, we construct a graph from a feature (i.e., column) perspective and carry out column-wise feature propagation to impute missing values based on their similarity to other features. Once the feature matrix is imputed, we generate a second graph, but this time from a sample-oriented (i.e., row) perspective, which serves as the input for existing graph-based imputation models. We evaluate GRASS using real-world medical and bio-domain datasets, demonstrating their effectiveness and generalizability in handling versatile missing scenarios.", "keywords": "Missing Features;Graph-based Imputation;Tabular data", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Sukwon Yun;Yunhak Oh;Junseok Lee;Xin Liu;Tsuyoshi Murata;Dongmin Hyun;Sein Kim;Tianlong Chen;Chanyoung Park", "authorids": "~Sukwon_Yun1;~Yunhak_Oh1;~Junseok_Lee1;~Xin_Liu24;~Tsuyoshi_Murata1;~Dongmin_Hyun2;~Sein_Kim1;~Tianlong_Chen1;~Chanyoung_Park1", "gender": "M;M;M;;M;;;M;M", "homepage": "https://sukwonyun.github.io/;https://yunhak0.github.io;;;https://www.net.comp.isct.ac.jp/murata.html;;http://dsail.kaist.ac.kr;https://tianlong-chen.github.io;https://dsail.kaist.ac.kr/", "dblp": "327/3464;317/6968;77/3729-2;;77/1703;;334/3935;;170/5430.html", "google_scholar": "AgqvtZkAAAAJ;SYoXjKMAAAAJ;Vn0aynYAAAAJ;;https://scholar.google.co.jp/citations?user=ws2fHhsAAAAJ;;https://scholar.google.co.kr/citations?user=qD5z_WkAAAAJ;LE3ctn0AAAAJ;lWk2LtQAAAAJ", "orcid": "0000-0002-5186-6563;0000-0002-9110-3042;0000-0003-3874-1667;;0000-0002-3818-7830;;0009-0009-9088-9491;0000-0001-7774-8197;0000-0002-5957-5816", "linkedin": ";;;;;;;tianlong-chen-783862167/;", "or_profile": "~Sukwon_Yun1;~Yunhak_Oh1;~Junseok_Lee1;~Xin_Liu24;~Tsuyoshi_Murata1;~Dongmin_Hyun2;~Sein_Kim1;~Tianlong_Chen1;~Chanyoung_Park1", "aff": "University of North Carolina at Chapel Hill;Korea Advanced Institute of Science & Technology;University of Texas Health Center at Houson;;Tokyo Institute of Technology;;Korea Advanced Institute of Science & Technology;Harvard University;Korea Advanced Institute of Science & Technology", "aff_domain": "cs.unc.edu;kaist.ac.kr;uth.tmc.edu;;titech.ac.jp;;kaist.ac.kr;harvard.edu;kaist.ac.kr", "position": "PhD student;PhD student;Researcher;;Full Professor;;PhD student;Postdoc;Assistant Professor", "bibtex": "@misc{\nyun2024toward,\ntitle={Toward Generalizability of Graph-based Imputation on Bio-Medical Missing Data},\nauthor={Sukwon Yun and Yunhak Oh and Junseok Lee and Xin Liu and Tsuyoshi Murata and Dongmin Hyun and Sein Kim and Tianlong Chen and Chanyoung Park},\nyear={2024},\nurl={https://openreview.net/forum?id=7iCUSBlOgh}\n}", "github": "", "project": "", "reviewers": "4SJf;r66i;T1VM;JSNu;Cqu6", "site": "https://openreview.net/forum?id=7iCUSBlOgh", "pdf_size": 2312914, "rating": "3;5;6;6;6", "confidence": "3;5;5;4;3", "soundness": "1;2;3;3;3", "contribution": "2;2;3;3;3", "presentation": "1;1;3;4;2", "wc_summary": "50;117;81;86;71", "wc_strengths": "15;96;13;42;76", "wc_weaknesses": "160;94;1092;147;133", "wc_questions": "15;164;40;74;3", "wc_review": "240;471;1226;349;283", "wc_reply_reviewers": "0;28;165;0;0", "wc_reply_authors": "949;3064;4788;1978;616", "reply_reviewers": "0;1;2;0;0", "reply_authors": "3;6;10;4;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.4, 0.8 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 1.16619037896906 ], "wc_summary_avg": [ 81.0, 21.82658928921328 ], "wc_strengths_avg": [ 48.4, 32.97635516548183 ], "wc_weaknesses_avg": [ 325.2, 384.0371856995101 ], "wc_questions_avg": [ 59.2, 57.762963912874135 ], "wc_review_avg": [ 513.8, 364.5421237662392 ], "wc_reply_reviewers_avg": [ 38.6, 64.12363058966639 ], "wc_reply_authors_avg": [ 2279.0, 1518.971757472798 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 5.0, 2.8284271247461903 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3834824944236852, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6FqY728RwdUJ:scholar.google.com/&scioq=Toward+Generalizability+of+Graph-based+Imputation+on+Bio-Medical+Missing+Data&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;1;4;1", "aff_unique_norm": "University of North Carolina;Korea Advanced Institute of Science and Technology;University of Texas Health Science Center at Houston;Tokyo Institute of Technology;Harvard University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.unc.edu;https://www.kaist.ac.kr;https://www.uth.edu;https://www.titech.ac.jp;https://www.harvard.edu", "aff_unique_abbr": "UNC;KAIST;UTHealth;Titech;Harvard", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Chapel Hill;;Houston", "aff_country_unique_index": "0;1;0;2;1;0;1", "aff_country_unique": "United States;South Korea;Japan" }, { "id": "7ipjMIHVJt", "title": "DASFormer: Self-supervised Pretraining for Earthquake Monitoring", "track": "main", "status": "Reject", "tldr": "", "abstract": "Earthquake monitoring is a fundamental task to unravel the underlying physics of earthquakes and mitigate associated hazards for public safety. Distributed acoustic sensing, or DAS, which transforms pre-existing telecommunication cables into ultra-dense seismic networks, offers a cost-effective and scalable solution for next-generation earthquake monitoring. However, current approaches for earthquake monitoring primarily rely on supervised learning, while manually labeled DAS data is quite limited and it is difficult to obtain more annotated datasets. In this paper, we present DASFormer, a novel self-supervised pretraining technique on DAS data with a coarse-to-fine framework that models spatial-temporal signal correlation. Given the pretrained DASFormer, we treat earthquake monitoring as an anomaly detection task and demonstrate that the pretrained DASFormer can be successfully utilized as a seismic phase detector. Experimental results demonstrate that DASFormer is effective in terms of several evaluation metrics and outperforms state-of-the-art time-series forecasting, anomaly detection, and foundation models on several datasets in the seismic detection tasks.", "keywords": "deep learing;self-supervised learning;time series analysis;anomaly detection;earthquake monitering", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Qianggang Ding;Zhichao Shen;Weiqiang Zhu;Bang Liu", "authorids": "~Qianggang_Ding1;~Zhichao_Shen1;~Weiqiang_Zhu1;~Bang_Liu1", "gender": "M;M;;M", "homepage": "http://www.mrdqg.com/;https://www.whoi.edu/profile/zhichao.shen/;;http://www-labs.iro.umontreal.ca/~liubang/", "dblp": "247/1295;;;", "google_scholar": "ga3j4_oAAAAJ;oWmPQzUAAAAJ;ApsNeMkAAAAJ;lmfAnP4AAAAJ", "orcid": ";0000-0003-0458-5264;;0000-0002-9483-8984", "linkedin": ";;;bang-liu-12b66789/?originalSubdomain=ca", "or_profile": "~Qianggang_Ding1;~Zhichao_Shen1;~Weiqiang_Zhu1;~Bang_Liu1", "aff": "Universit\u00e9 de Montr\u00e9al;Woods Hole Oceanographic Institution;University of California, Berkeley;University of Montreal", "aff_domain": "umontreal.ca;whoi.edu;berkeley.edu;umontreal.ca", "position": "PhD student;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nding2024dasformer,\ntitle={{DASF}ormer: Self-supervised Pretraining for Earthquake Monitoring},\nauthor={Qianggang Ding and Zhichao Shen and Weiqiang Zhu and Bang Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=7ipjMIHVJt}\n}", "github": "", "project": "", "reviewers": "Hht1;RSrX;8ntH;gsBc", "site": "https://openreview.net/forum?id=7ipjMIHVJt", "pdf_size": 9282097, "rating": "5;5;5;6", "confidence": "4;3;5;3", "soundness": "3;3;3;2", "contribution": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "65;20;112;79", "wc_strengths": "31;50;73;71", "wc_weaknesses": "283;83;538;459", "wc_questions": "192;75;5;98", "wc_review": "571;228;728;707", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1160;732;1829;1037", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;3;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.0, 33.03785707336358 ], "wc_strengths_avg": [ 56.25, 17.137313091613866 ], "wc_weaknesses_avg": [ 340.75, 175.11478378480786 ], "wc_questions_avg": [ 92.5, 66.88235940814289 ], "wc_review_avg": [ 558.5, 200.10559712311897 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1189.5, 400.74711477439234 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:34S76So_WW0J:scholar.google.com/&scioq=DASFormer:+Self-supervised+Pretraining+for+Earthquake+Monitoring&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Woods Hole Oceanographic Institution;University of California, Berkeley;University of Montreal", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.umontreal.ca;https://www.whoi.edu;https://www.berkeley.edu;https://wwwumontreal.ca", "aff_unique_abbr": "UdeM;WHOI;UC Berkeley;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Canada;United States" }, { "id": "7iuFxx9Ccx", "title": "Resource Efficient Test-Time Training with Slimmable Network", "track": "main", "status": "Reject", "tldr": "", "abstract": "Test-Time Training (TTT), an innovative paradigm for enhancing a model's generalization in a specific future scenario, commonly leverages self-supervised learning to adapt the model to the unlabeled test data under distribution shifts. However, previous TTT methods tend to disregard resource constraints during the deployment phase in real-world scenarios and have two fundamental shortcomings. Firstly, they are obligated to retrain adapted models when deploying across multiple devices with diverse resource limitations, causing considerable resource inefficiency. Secondly, they are incapable of coping with computational budget variations during the testing stage. To tackle these issues, we propose a resource-adaptive test-time training framework called SlimTTT, which allows for the seamless switching of different sub-networks for adaptive inference. Furthermore, we discover that different width of sub-networks can capture different views of images and these views are complementary and beneficial to the ones created by data augmentation, which is widely used in TTT. To utilize these views, we introduce Width-enhance Contrastive Learning (WCL), Logits Consistency Regularization (LCR) and Global Feature Alignment (GFA) to promote representation consistency at both feature and prediction space in a self-supervised manner, enabling networks of different widths to excel in TTT tasks. Our proposed method, SlimTTT, has achieved state-of-the-art (SOTA) results across a variety of adaptation methods and four different datasets with varying backbones. Remarkably, despite a significant reduction in computational complexity - over 70% less than the current SOTA method - SlimTTT continues to deliver competitive performance, rendering it highly conducive for adoption in practice.", "keywords": "Test-Time Training;Resource Efficient;Slimmable Neural Network", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/0decffc758dac0536859f1845c9bc589cdf1d853.zip", "author": "Lincan Cai;Shuang Li;Wenxuan Ma;Ling Liu", "authorids": "~Lincan_Cai1;~Shuang_Li6;~Wenxuan_Ma2;~Ling_Liu8", "gender": ";M;M;F", "homepage": "https://github.com/cailincan0129;https://shuangli.xyz;;https://github.com/llada60", "dblp": ";43/6294-8;289/0784-1;", "google_scholar": "wH-dNbAAAAAJ;VXCiAc4AAAAJ;u7aJOt8AAAAJ;", "orcid": ";0000-0001-6807-9905;0000-0001-5402-6028;", "linkedin": ";;;", "or_profile": "~Lincan_Cai1;~Shuang_Li6;~Wenxuan_Ma2;~Ling_Liu8", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "position": "MS student;Associate Professor;MS student;Undergrad student", "bibtex": "@misc{\ncai2024resource,\ntitle={Resource Efficient Test-Time Training with Slimmable Network},\nauthor={Lincan Cai and Shuang Li and Wenxuan Ma and Ling Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=7iuFxx9Ccx}\n}", "github": "", "project": "", "reviewers": "yJHx;by2E;1QhS;aCQv", "site": "https://openreview.net/forum?id=7iuFxx9Ccx", "pdf_size": 765386, "rating": "5;5;6;8", "confidence": "4;4;4;3", "soundness": "1;2;3;4", "contribution": "2;2;3;3", "presentation": "3;2;3;2", "wc_summary": "92;58;332;110", "wc_strengths": "61;29;163;55", "wc_weaknesses": "152;180;169;94", "wc_questions": "12;5;67;183", "wc_review": "317;272;731;442", "wc_reply_reviewers": "0;138;222;22", "wc_reply_authors": "1404;3521;2910;1669", "reply_reviewers": "0;1;2;1", "reply_authors": "2;6;4;4", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 148.0, 107.86102168995063 ], "wc_strengths_avg": [ 77.0, 51.088159097779204 ], "wc_weaknesses_avg": [ 148.75, 33.14645531576491 ], "wc_questions_avg": [ 66.75, 71.28244875142829 ], "wc_review_avg": [ 440.5, 178.9112908678488 ], "wc_reply_reviewers_avg": [ 95.5, 89.90411558988832 ], "wc_reply_authors_avg": [ 2376.0, 871.8964961507759 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MVLhjDRX0kMJ:scholar.google.com/&scioq=Resource+Efficient+Test-Time+Training+with+Slimmable+Network&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "7j5KWl7VtF", "title": "Exploring the Edge of Stability: Insights from a Fine-Grained Analysis of Gradient Descent in Shallow ReLU Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Gradient descent (GD) in modern neural networks initially sharpens the loss landscape by increasing the top Hessian eigenvalues until the step size becomes unstable. Subsequently, it enters the ``Edge of Stability'' (EoS) regime, characterized by unstable step size and non-monotonic loss reduction. EoS regime challenges conventional step size wisdom, sparking recent intensive research. However, a detailed characterization of EoS within the fine-grained GD neural network training dynamics remains under-explored. This paper provides a comprehensive analysis of both the sharpening phase and the EoS regime throughout the entire GD dynamics, focusing on shallow ReLU networks with squared loss on orthogonal inputs. Our theory characterizes the evolution of the top Hessian eigenvalues and elucidates the mechanisms behind EoS training. Leveraging this analysis, we present empirical validations of our predictions regarding sharpening and EoS dynamics, contributing to a deeper understanding of neural network training processes.", "keywords": "gradient descent;edge of stability", "primary_area": "optimization", "supplementary_material": "", "author": "Junghwan Kim;Michelle YoungJin Kim;Barzan Mozafari", "authorids": "~Junghwan_Kim1;~Michelle_YoungJin_Kim1;~Barzan_Mozafari1", "gender": "M;M;F", "homepage": ";https://web.eecs.umich.edu/~mozafari/;https://cozymichelle.github.io/", "dblp": ";;", "google_scholar": "msafJ3UAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Junghwan_Kim1;~Barzan_Mozafari1;~Michelle_Kim2", "aff": "University of Michigan - Ann Arbor;University of Michigan;Michigan State University", "aff_domain": "umich.edu;umich.edu;msu.edu", "position": "PhD student;Associate Professor;PhD student", "bibtex": "@misc{\nkim2024exploring,\ntitle={Exploring the Edge of Stability: Insights from a Fine-Grained Analysis of Gradient Descent in Shallow Re{LU} Networks},\nauthor={Junghwan Kim and Michelle YoungJin Kim and Barzan Mozafari},\nyear={2024},\nurl={https://openreview.net/forum?id=7j5KWl7VtF}\n}", "github": "", "project": "", "reviewers": "QK5F;CLYU;xPNL;5wmW", "site": "https://openreview.net/forum?id=7j5KWl7VtF", "pdf_size": 608370, "rating": "1;3;3;3", "confidence": "3;4;5;4", "soundness": "1;1;2;1", "contribution": "2;1;1;1", "presentation": "2;2;2;1", "wc_summary": "55;56;36;188", "wc_strengths": "36;37;43;2", "wc_weaknesses": "121;319;202;2", "wc_questions": "166;8;1;7", "wc_review": "378;420;282;199", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 1.25, 0.4330127018922193 ], "contribution_avg": [ 1.25, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.75, 60.71398109167278 ], "wc_strengths_avg": [ 29.5, 16.101242188104617 ], "wc_weaknesses_avg": [ 161.0, 115.67843359935334 ], "wc_questions_avg": [ 45.5, 69.62219473702334 ], "wc_review_avg": [ 319.75, 85.80319050012068 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mERti5tom8oJ:scholar.google.com/&scioq=Exploring+the+Edge+of+Stability:+Insights+from+a+Fine-Grained+Analysis+of+Gradient+Descent+in+Shallow+ReLU+Networks&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Michigan;Michigan State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.msu.edu", "aff_unique_abbr": "UM;MSU", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7jUQHmz4Tq", "title": "D3AD: DYNAMIC DENOISING DIFFUSION PROBABILISTIC MODEL FOR ANOMALY DETECTION", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion models have found valuable applications in anomaly detection by capturing the nominal data distribution and identifying anomalies via reconstruction. Despite their merits, they struggle to localize anomalies of varying scales, especially larger anomalies like entire missing components. Addressing this, we present a novel framework that enhances the capability of diffusion models, by extending the previous introduced implicit conditioning approach \\cite{DBLP:conf/iclr/MengHSSWZE22} in three significant ways. First, we incorporate a dynamic step size computation that allows for variable noising steps in the forward process guided by an initial anomaly prediction. Second, we demonstrate that denoising an only scaled input, without any added noise, outperforms conventional denoising process. Third, we project images in a latent space to abstract away from fine details that interfere with reconstruction of large missing components. Additionally, we propose a fine-tuning mechanism that facilitates the model to effectively grasp the nuances of the target domain. Our method undergoes rigorous evaluation on two prominent anomaly detection datasets VISA and BTAD, yielding state-of-the-art performance. Importantly, our framework effectively localizes anomalies regardless of their scale, marking a pivotal advancement in diffusion-based anomaly detection. All code will be made public upon acceptance.", "keywords": "Anomaly Detection;Diffusion Models;Domain Adaptation;Score Based Models", "primary_area": "generative models", "supplementary_material": "/attachment/8d398527c3c4d820a8e622da38507b989203354b.zip", "author": "Justin Tebbe;Jawad Tayyub", "authorids": "~Justin_Tebbe1;~Jawad_Tayyub1", "gender": ";M", "homepage": ";", "dblp": ";161/2637", "google_scholar": ";BtS3MQsAAAAJ", "orcid": ";", "linkedin": "justin-tebbe-18a056283;jawad-tayyub-b8899a23/", "or_profile": "~Justin_Tebbe1;~Jawad_Tayyub1", "aff": "Otto-von-Guericke Universit\u00e4t Magdeburg;Endress + Hauser", "aff_domain": "ovgu.de;endress.com", "position": "MS student;AI Research Scientist", "bibtex": "@misc{\ntebbe2024dad,\ntitle={D3{AD}: {DYNAMIC} {DENOISING} {DIFFUSION} {PROBABILISTIC} {MODEL} {FOR} {ANOMALY} {DETECTION}},\nauthor={Justin Tebbe and Jawad Tayyub},\nyear={2024},\nurl={https://openreview.net/forum?id=7jUQHmz4Tq}\n}", "github": "", "project": "", "reviewers": "vjSv;Emgj;xkrq", "site": "https://openreview.net/forum?id=7jUQHmz4Tq", "pdf_size": 25497277, "rating": "3;3;3", "confidence": "5;5;5", "soundness": "1;2;2", "contribution": "1;2;2", "presentation": "2;1;2", "wc_summary": "75;72;66", "wc_strengths": "11;47;26", "wc_weaknesses": "490;122;169", "wc_questions": "25;97;49", "wc_review": "601;338;310", "wc_reply_reviewers": "503;0;180", "wc_reply_authors": "1097;381;794", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 71.0, 3.7416573867739413 ], "wc_strengths_avg": [ 28.0, 14.7648230602334 ], "wc_weaknesses_avg": [ 260.3333333333333, 163.5284548803527 ], "wc_questions_avg": [ 57.0, 29.93325909419153 ], "wc_review_avg": [ 416.3333333333333, 131.07843283910424 ], "wc_reply_reviewers_avg": [ 227.66666666666666, 208.0966655720899 ], "wc_reply_authors_avg": [ 757.3333333333334, 293.45338452450824 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5215605692183936052&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1+2", "aff_unique_norm": "Otto-von-Guericke University Magdeburg;Endress;Hauser", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ovgu.de;;", "aff_unique_abbr": "OVGU;;", "aff_campus_unique_index": "0;", "aff_campus_unique": "Magdeburg;", "aff_country_unique_index": "0;", "aff_country_unique": "Germany;" }, { "id": "7jWiBAWG0b", "title": "Learning Guarantees for Non-convex Pairwise SGD with Heavy Tails", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, there have been a growing number of works studying the generalization properties of pairwise stochastic gradient descent (SGD) from the perspective of algorithmic stability. However, few of them devote to simultaneously studying the generalization and optimization for the non-convex setting, especially the ones with heavy-tailed gradient noise. This paper establishes the stability-based learning guarantees for non-convex, heavy-tailed pairwise SGD by investigating its generalization and optimization jointly. Firstly, we bound the generalization error of pairwise SGD in the general non-convex setting, after bridging the quantitative relationships between $\\ell_1$ on-average model stability and generalization error. Secondly, a refined generalization bound is established for non-convex pairwise SGD by introducing the heavy-tailed gradient noise to remove the bounded gradient assumption. Finally, the sharper error bounds for generalization and optimization are provided under the gradient dominance condition. In addition, we extend our analysis to the corresponding pairwise minibatch SGD and derive the first stability-based near-optimal generalization and optimization bounds which are consistent with many empirical observations. These theoretical results fill the learning theory gap for non-convex pairwise SGD with heavy tails.", "keywords": "Stability;generalization bound;stochastic gradient descent;pairwise learning;heavy tail", "primary_area": "learning theory", "supplementary_material": "", "author": "Jun Chen;Hong Chen;Bin Gu;Tieliang Gong;Guodong Liu;Yingjie Wang;Weifu Li", "authorids": "~Jun_Chen12;~Hong_Chen1;~Bin_Gu1;~Tieliang_Gong2;~Guodong_Liu2;~Yingjie_Wang1;~Weifu_Li1", "gender": "M;;M;;M;M;M", "homepage": "https://www.researchgate.net/profile/Jun-Chen-256;https://chenhongml.github.io/;https://mbzuai.ac.ae/study/faculty/bin-gu/;;;https://www.researchgate.net/profile/Yingjie-Wang-37;https://www.researchgate.net/profile/Weifu-Li", "dblp": ";https://dblp.uni-trier.de/pers/hd/c/Chen_0004:Hong;29/1758-1;;;33/6297-7;198/9625", "google_scholar": "GkZkdRYAAAAJ;;Vo8OgCgAAAAJ;;Xgwse5AAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-9810-5171;;0000-0001-6049-1815;;;;", "linkedin": "%E5%90%9B-%E9%99%88-4a8823276/;;;;guodong-liu-56a671107/;;", "or_profile": "~Jun_Chen12;~Hong_Chen1;~Bin_Gu1;~Tieliang_Gong2;~Guodong_Liu2;~Yingjie_Wang1;~Weifu_Li1", "aff": "Huazhong Agricultural University;Huazhong Agricultural University;Mohamed bin Zayed University of Artificial Intelligence;;University of Maryland, College Park;Nanyang Technological University;Huazhong Agricultural University", "aff_domain": "hzau.edu.cn;hzau.edu.cn;mbzuai.ac.ae;;umd.edu;ntu.edu.sg;hzau.edu.cn", "position": "PhD student;Full Professor;Assistant Professor;;Postdoc;Postdoc;Associate Professor", "bibtex": "@misc{\nchen2024learning,\ntitle={Learning Guarantees for Non-convex Pairwise {SGD} with Heavy Tails},\nauthor={Jun Chen and Hong Chen and Bin Gu and Tieliang Gong and Guodong Liu and Yingjie Wang and Weifu Li},\nyear={2024},\nurl={https://openreview.net/forum?id=7jWiBAWG0b}\n}", "github": "", "project": "", "reviewers": "dGQ5;6TFb;dBNi", "site": "https://openreview.net/forum?id=7jWiBAWG0b", "pdf_size": 589245, "rating": "5;5;6", "confidence": "3;3;3", "soundness": "3;2;3", "contribution": "2;3;2", "presentation": "3;2;3", "wc_summary": "50;93;121", "wc_strengths": "41;39;76", "wc_weaknesses": "464;255;86", "wc_questions": "235;66;55", "wc_review": "790;453;338", "wc_reply_reviewers": "0;116;30", "wc_reply_authors": "1633;1746;641", "reply_reviewers": "0;1;1", "reply_authors": "4;4;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 88.0, 29.20045661743437 ], "wc_strengths_avg": [ 52.0, 16.990193249832878 ], "wc_weaknesses_avg": [ 268.3333333333333, 154.60559138947363 ], "wc_questions_avg": [ 118.66666666666667, 82.3825763671475 ], "wc_review_avg": [ 527.0, 191.8037191158364 ], "wc_reply_reviewers_avg": [ 48.666666666666664, 49.16186417223099 ], "wc_reply_authors_avg": [ 1340.0, 496.41582032270753 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OA79FH0x53cJ:scholar.google.com/&scioq=Learning+Guarantees+for+Non-convex+Pairwise+SGD+with+Heavy+Tails&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Huazhong Agricultural University;Mohamed bin Zayed University of Artificial Intelligence;University of Maryland;Nanyang Technological University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hzau.edu.cn/;https://mbzuai.ac.ae;https://www/umd.edu;https://www.ntu.edu.sg", "aff_unique_abbr": "HAU;MBZUAI;UMD;NTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;1;2;3;0", "aff_country_unique": "China;United Arab Emirates;United States;Singapore" }, { "id": "7kKyELnAhn", "title": "Efficient Offline Preference-Based Reinforcement Learning with Transition-Dependent Discounting", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Offline preference-based reinforcement learning (OPBRL) tackles two major limitations of traditional reinforcement learning: the need for online interaction and the requirement for carefully designed reward labels. Despite recent progress, solving complex tasks with a small number of preference labels remains challenging, as the learned reward function is inaccurate when preference labels are scarce. To tackle this challenge, we first demonstrate that the inaccurate reward model predicts low-preference regions much more precisely than high-preference regions, as the former suffers less from generalization errors. By incorporating this insight with offline RL's pessimism property, we propose a novel OPBRL framework, Transition-dEpendent Discounting (TED), that excels in complex OPBRL tasks with only a small number of preference queries. TED assigns low transition-dependent discount factors to the predicted low-preference regions, which discourages the offline agent from visiting these regions and achieves higher performance. On the challenging Meta-World MT1 tasks, TED significantly outperforms current OPBRL baselines.", "keywords": "preference-based reinforcement learning;offline reinforcement learning;RLHF;transition-dependent discounting", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/d499e23ae564a989731714cd81c6163b70aaf9ad.zip", "author": "Jin Zhang;Hao Hu;Yiqin Yang;Bo Liu;Yang Gao;Chongjie Zhang", "authorids": "~Jin_Zhang6;~Hao_Hu3;~Yiqin_Yang1;~Bo_Liu2;~Yang_Gao1;~Chongjie_Zhang1", "gender": "M;M;M;M;M;", "homepage": "http://group.iiis.tsinghua.edu.cn/~milab/person-zhangjin.html;https://mousehu.github.io;https://www.researchgate.net/profile/Yiqin-Yang-2;https://liubo-cs.github.io/;http://yang-gao.weebly.com;", "dblp": "43/6657-16;67/6924-6;180/7725;58/2670-6.html;89/4402-29;29/6693", "google_scholar": ";https://scholar.google.com/citations?hl=en;aHTi5IEAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;LjxqXycAAAAJ", "orcid": ";;;0000-0003-2519-6196;;", "linkedin": ";hao-hu-tsinghua;;bo-liu-8b2b8118/;yang-gao-45245348/;", "or_profile": "~Jin_Zhang6;~Hao_Hu3;~Yiqin_Yang1;~Bo_Liu2;~Yang_Gao1;~Chongjie_Zhang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Auburn University;Tsinghua University;Washington University, Saint Louis", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;auburn.edu;tsinghua.edu.cn;wustl.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@misc{\nzhang2024efficient,\ntitle={Efficient Offline Preference-Based Reinforcement Learning with Transition-Dependent Discounting},\nauthor={Jin Zhang and Hao Hu and Yiqin Yang and Bo Liu and Yang Gao and Chongjie Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=7kKyELnAhn}\n}", "github": "", "project": "", "reviewers": "AQAk;CqDq;KeDu;wPHc", "site": "https://openreview.net/forum?id=7kKyELnAhn", "pdf_size": 744407, "rating": "1;3;3;3", "confidence": "4;4;4;4", "soundness": "2;1;2;2", "contribution": "2;3;2;2", "presentation": "2;2;3;1", "wc_summary": "86;148;59;26", "wc_strengths": "26;72;37;38", "wc_weaknesses": "138;257;104;630", "wc_questions": "27;137;68;2", "wc_review": "277;614;268;696", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.75, 44.76815274277017 ], "wc_strengths_avg": [ 43.25, 17.25362280797862 ], "wc_weaknesses_avg": [ 282.25, 208.65566730860678 ], "wc_questions_avg": [ 58.5, 51.08081831764248 ], "wc_review_avg": [ 463.75, 193.4610748962178 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YrOpRukvZLEJ:scholar.google.com/&scioq=Efficient+Offline+Preference-Based+Reinforcement+Learning+with+Transition-Dependent+Discounting&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Tsinghua University;Auburn University;Washington University in St. Louis", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.auburn.edu;https://wustl.edu", "aff_unique_abbr": "THU;Auburn;WUSTL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saint Louis", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "China;United States" }, { "id": "7kubdPrlRY", "title": "A PERSPECTIVE OF IMPROPER DYNAMICS ON OFFLINE MODEL-BASED PLANNING", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "By learning the dynamics model, estimating, and planning on the latent state, MuZero and its variants perform well in complex environments. However, the performance of these algorithms require an accurate dynamics model and prediction model, which may be difficult in offline reinforcement learning since the lack of interactions with the environment. Recent works attempt to use one-step rollouts to reduce the cumulative error of rollout caused by an inaccurate dynamics model. We argue that the planning issues of MuZero-type methods are mainly caused by inaccurate models. To address this issue, we propose a robust method, Constrained Offline Model-based Planning (COMP), for training dynamics or prediction models more smoothly. COMP introduces a kind of specifically designed noise to the latent state, aiming to align the value and dynamics of these states with those of states not perturbed. Our method can be combined with MuZero and its derived algorithms to improve planning performance in offline settings. Experiments show that our proposed method achieved notable performance in most Atari game tasks on RL Unplugged benchmark.", "keywords": "Offline RL;model-based RL;deep RL;planning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Chen Zhao;Yazhe Niu;Kaixin Huang;Yu Liu;Chun Yuan", "authorids": "~Chen_Zhao8;~Yazhe_Niu1;~Kaixin_Huang1;~Yu_Liu2;~Chun_Yuan1", "gender": "M;M;M;M;M", "homepage": "https://github.com/PaParaZz1;;http://liuyu.us;https://www.sigs.tsinghua.edu.cn/fg3/105064.jhtml;", "dblp": "252/5570.html;;97/2274-15;;", "google_scholar": "P3BUrBQAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=fYdxi2sAAAAJ;https://scholar.google.com/citations?view_op=new_profile", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yazhe_Niu1;~Kaixin_Huang1;~Yu_Liu2;~Chun_Yuan1;~Zhao_Chen7", "aff": "The Chinese University of Hong Kong;Electronic Engineering, Tsinghua University, Tsinghua University;SenseTime;Tsinghua University;Tsinghua University", "aff_domain": "cuhk.edu.hk;mails.tsinghua.edu.cn;sensetime.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;MS student;Principal Researcher;Full Professor;MS student", "bibtex": "@misc{\nzhao2024a,\ntitle={A {PERSPECTIVE} {OF} {IMPROPER} {DYNAMICS} {ON} {OFFLINE} {MODEL}-{BASED} {PLANNING}},\nauthor={Chen Zhao and Yazhe Niu and Kaixin Huang and Yu Liu and Chun Yuan},\nyear={2024},\nurl={https://openreview.net/forum?id=7kubdPrlRY}\n}", "github": "", "project": "", "reviewers": "pj73;LURo;dRm8;NRcG", "site": "https://openreview.net/forum?id=7kubdPrlRY", "pdf_size": 2305251, "rating": "1;3;3;3", "confidence": "4;4;4;4", "soundness": "2;2;2;2", "contribution": "1;2;2;1", "presentation": "1;2;2;2", "wc_summary": "66;38;48;43", "wc_strengths": "30;17;68;11", "wc_weaknesses": "839;266;176;104", "wc_questions": "14;1;60;29", "wc_review": "949;322;352;187", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 48.75, 10.568230693924125 ], "wc_strengths_avg": [ 31.5, 22.1641602593015 ], "wc_weaknesses_avg": [ 346.25, 290.2209287766821 ], "wc_questions_avg": [ 26.0, 21.988633427296023 ], "wc_review_avg": [ 452.5, 293.31425127327174 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DQYVGxuANnIJ:scholar.google.com/&scioq=A+PERSPECTIVE+OF+IMPROPER+DYNAMICS+ON+OFFLINE+MODEL-BASED+PLANNING&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Chinese University of Hong Kong;Tsinghua University;SenseTime", "aff_unique_dep": ";Electronic Engineering;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.tsinghua.edu.cn;https://www.sensetime.com", "aff_unique_abbr": "CUHK;THU;SenseTime", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7lmvCdD6va", "title": "P4Q: Learning to Prompt for Quantization in Visual-language Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large-scale pre-trained Vision-Language Models (VLMs) have gained prominence in various visual and multimodal tasks, yet the deployment of VLMs on resource-constrained platforms remains challenging due to their prohibitive computational and memory overhead. Quantization of VLMs can substantially reduce the computational and memory costs, which are in urgent need. There are two prevailing paradigms, Quantization-Aware Training (QAT) can effectively quantize large-scale VLMs but incur a huge training cost, while low-bit Post-Training Quantization (PTQ) suffers from a notable performance drop. We propose a `Prompt for Quantization'' (P4Q) method, in which we design a lightweight architecture to leverage contrastive loss supervision to enhance the recognition performance of a PTQ model. Our method can effectively reduce the gap between image features and text features caused by low-bit quantization, based on learnable prompts to reorganize textual representations and a low-bit adapter to realign the distributions of image and text features. We also introduce a distillation loss based on cosine similarity predictions to distill the quantized model using a full-precision teacher. Extensive experimental results demonstrate that our P4Q method outperforms prior arts, even achieving comparable results to its full-precision counterparts. For instance, our 8-bit P4Q can theoretically compress the CLIP-ViT/B-32 by 4 $\\times$ while achieving 79.42\\% Top-1 accuracy, outperforming the learnable prompt fine-tuned full-precision model by 2.91\\% with negligible additional parameters on the CIFAR100 dataset. Test code and checkpoints are available at \\url{https://anonymous.4open.science/r/ICLR2024-P4Q-1255}", "keywords": "Quantization;Vision-Language Models (VLMs)", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Huixin Sun;Runqi Wang;Xianbin Cao;Yanjing Li;Xiaolong Jiang;Yao Hu;Baochang Zhang", "authorids": "~Huixin_Sun1;~Runqi_Wang1;~Xianbin_Cao2;~Yanjing_Li2;~Xiaolong_Jiang2;~Yao_Hu4;~Baochang_Zhang1", "gender": "F;M;M;;M;M;M", "homepage": "https://huixinsun.github.io/;;http://www.ee.buaa.edu.cn/info/1205/22851.htm;;https://dblp.org/pid/56/5097;https://dblp.uni-trier.de/pid/80/3887-1.html;", "dblp": "329/3654;266/9915;22/3485;62/201;;https://dblp.uni-trier.de/pid/80/3887-1.html;", "google_scholar": ";https://scholar.google.com.hk/citations?hl=zh-CN;;2rE-GM8AAAAJ;G0Ow8j8AAAAJ;;LIu7k7wAAAAJ", "orcid": ";;;0000-0003-3745-8755;;;0009-0006-1274-7111", "linkedin": "https://www.linkedin.cn/incareer/in/huixin-sun-52a5701a2;;;;;;", "or_profile": "~Huixin_Sun1;~Runqi_Wang1;~Xianbin_Cao2;~Yanjing_Li2;~Xiaolong_Jiang2;~Baochang_Zhang1;~Yao_Hu1", "aff": "Beihang University;Beihang University;Beihang University;Beihang University;Alibaba Group;Beihang University;Zhejiang University of Technology", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;alibaba-inc.com;buaa.edu.cn;zjut.edu.cn", "position": "PhD student;PhD student;Full Professor;PhD student;Research Engineer;Professor;Researcher", "bibtex": "@misc{\nsun2024pq,\ntitle={P4Q: Learning to Prompt for Quantization in Visual-language Models},\nauthor={Huixin Sun and Runqi Wang and Xianbin Cao and Yanjing Li and Xiaolong Jiang and Yao Hu and Baochang Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=7lmvCdD6va}\n}", "github": "", "project": "", "reviewers": "xyBt;X2dx;iH7g;5VdB", "site": "https://openreview.net/forum?id=7lmvCdD6va", "pdf_size": 2874160, "rating": "3;3;5;5", "confidence": "4;5;4;4", "soundness": "2;2;3;3", "contribution": "2;1;2;2", "presentation": "2;2;3;3", "wc_summary": "112;54;130;70", "wc_strengths": "53;34;72;62", "wc_weaknesses": "251;184;295;163", "wc_questions": "46;2;265;31", "wc_review": "462;274;762;326", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 91.5, 30.70423423568808 ], "wc_strengths_avg": [ 55.25, 13.988834833537782 ], "wc_weaknesses_avg": [ 223.25, 52.651566928250105 ], "wc_questions_avg": [ 86.0, 104.54903155935975 ], "wc_review_avg": [ 456.0, 189.53627621117812 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14843857089839360972&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;1;0;2", "aff_unique_norm": "Beihang University;Alibaba Group;Zhejiang University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.alibaba.com;https://www.zjut.edu.cn", "aff_unique_abbr": "BUAA;Alibaba;ZJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7m5jhNXklB", "title": "VTruST : Controllable value function based subset selection for Data-Centric Trustworthy AI", "track": "main", "status": "Reject", "tldr": "", "abstract": "Trustworthy AI is crucial to the widespread adoption of AI in high-stakes applications with explainability, fairness, and robustness being some of the key trustworthiness metrics. Data-Centric AI (DCAI) aims to construct high-quality datasets for efficient training of trustworthy models. In this work, we propose a controllable framework for data-centric trustworthy AI (DCTAI)- VTruST, that allows users to control the trade-offs between the different trustworthiness metrics of the constructed training datasets. A key challenge in implementing an efficient DCTAI framework is to design an online value-function-based training data subset selection algorithm. We pose the training data valuation and subset selection problem as an online sparse approximation formulation, where the $\\textit{features}$ for each training datapoint is obtained in an online manner through an iterative training algorithm. We propose a novel online version of the OMP algorithm for solving this problem. We also derive conditions on the data matrix, that guarantee the exact recovery of the sparse solution. We demonstrate the generality and effectiveness of our approach by designing data-driven value functions for the above trustworthiness metrics. Experimental results show that VTruST outperforms the state-of-the-art baselines for fair learning as well as robust training, on standard fair and robust datasets. We also demonstrate that VTruST can provide effective tradeoffs between different trustworthiness metrics through pareto optimal fronts. Finally, we show that the data valuation generated by VTruST can provide effective data-centric explanations for different trustworthiness metrics.", "keywords": "Data centric trustworthy AI;value function;data valuation;online sparse approximation;fairness;robustness;explainability", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/d8c70fa936124b80998e8c95b94b8ae1cc8e561d.zip", "author": "Soumi Das;Shubhadip Nag;Shreyyash Sharma;Suparna Bhattacharya;Sourangshu Bhattacharya", "authorids": "~Soumi_Das1;~Shubhadip_Nag1;~Shreyyash_Sharma1;~Suparna_Bhattacharya1;~Sourangshu_Bhattacharya1", "gender": "F;M;M;F;M", "homepage": "https://soumidas.github.io/;https://nagshubhadip.github.io/;http://cse.iitkgp.ac.in/~shreyyashs/;https://www.hpe.com/psnow/doc/a00130899enw;http://cse.iitkgp.ac.in/~sourangshu/", "dblp": "252/5452;;;10/2594;http://dblp.uni-trier.de/pers/hd/b/Bhattacharya:Sourangshu", "google_scholar": "1tJnMkoAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.co.in/citations?user=ubLpnh4AAAAJ;https://scholar.google.co.in/citations?user=IixRsP0AAAAJ", "orcid": ";;;0000-0001-9541-4027;", "linkedin": "soumi-das;shubhadip-nag;;suparna-bhattacharya-5a7798b/;sourangshubhattacharya", "or_profile": "~Soumi_Das1;~Shubhadip_Nag1;~Shreyyash_Sharma1;~Suparna_Bhattacharya1;~Sourangshu_Bhattacharya1", "aff": "MPI-SWS;Indian Institute of Technology, Kharagpur;;Hewlett Packard Enterprise;Indian Institute of Technology Kharagpur", "aff_domain": "mpi-sws.org;iitkgp.ac.in;;hpe.com;iitkgp.ac.in", "position": "Postdoc;MS student;;Researcher;Associate Professor", "bibtex": "@misc{\ndas2024vtrust,\ntitle={{VT}ru{ST} : Controllable value function based subset selection for Data-Centric Trustworthy {AI}},\nauthor={Soumi Das and Shubhadip Nag and Shreyyash Sharma and Suparna Bhattacharya and Sourangshu Bhattacharya},\nyear={2024},\nurl={https://openreview.net/forum?id=7m5jhNXklB}\n}", "github": "", "project": "", "reviewers": "FQcd;7pAq;ohGL", "site": "https://openreview.net/forum?id=7m5jhNXklB", "pdf_size": 9471762, "rating": "5;5;6", "confidence": "3;3;3", "soundness": "2;2;3", "contribution": "3;2;3", "presentation": "3;3;2", "wc_summary": "69;78;161", "wc_strengths": "41;62;30", "wc_weaknesses": "183;103;99", "wc_questions": "3;2;87", "wc_review": "296;245;377", "wc_reply_reviewers": "0;0;151", "wc_reply_authors": "383;406;1004", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 102.66666666666667, 41.41121694527811 ], "wc_strengths_avg": [ 44.333333333333336, 13.274871834493252 ], "wc_weaknesses_avg": [ 128.33333333333334, 38.689648342791756 ], "wc_questions_avg": [ 30.666666666666668, 39.83577398380617 ], "wc_review_avg": [ 306.0, 54.35071296680477 ], "wc_reply_reviewers_avg": [ 50.333333333333336, 71.18208263944578 ], "wc_reply_authors_avg": [ 597.6666666666666, 287.47444330854097 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vU38P9fvb6QJ:scholar.google.com/&scioq=VTruST+:+Controllable+value+function+based+subset+selection+for+Data-Centric+Trustworthy+AI&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Max Planck Institute for Software Systems;Indian Institute of Technology;Hewlett Packard Enterprise;Indian Institute of Technology Kharagpur", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.mpi-sws.org;https://www.iitkgp.ac.in;https://www.hpe.com;https://www.iitkgp.ac.in", "aff_unique_abbr": "MPI-SWS;IIT Kharagpur;HPE;IIT Kharagpur", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Kharagpur", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Germany;India;United States" }, { "id": "7mR83Q12cJ", "title": "Counterfactual Data Augmentation with Contrastive Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Statistical disparity between distinct treatment groups is one of the most significant challenges for estimating Conditional Average Treatment Effects (CATE). To address this, we introduce a model-agnostic data augmentation method that imputes the counterfactual outcomes for a selected subset of individuals. Specifically, we utilize contrastive learning to learn a representation space and a similarity measure such that in the learned representation space \\textit{close} individuals identified by the learned similarity measure have \\textit{similar} potential outcomes. This property ensures reliable imputation of counterfactual outcomes for the individuals with close neighbors from the alternative treatment group. By augmenting the original dataset with these reliable imputations, we can effectively reduce the discrepancy between different treatment groups, while inducing minimal imputation error. The augmented dataset is subsequently employed to train CATE estimation models. Theoretical analysis and experimental studies on synthetic and semi-synthetic benchmarks demonstrate that our method achieves significant improvements in both performance and robustness to overfitting across state-of-the-art models.", "keywords": "Data Augmentation;Contrastive Learning;Treatment Effect;Causal Inference", "primary_area": "causal reasoning", "supplementary_material": "/attachment/fd5e7459888b42e76d8e91a42b239b8b056762cb.zip", "author": "Ahmed Aloui;Juncheng Dong;Cat Phuoc Le;Vahid Tarokh", "authorids": "~Ahmed_Aloui1;~Juncheng_Dong1;~Cat_Phuoc_Le1;~Vahid_Tarokh1", "gender": "M;;M;", "homepage": ";;https://scholars.duke.edu/person/cat.le;", "dblp": "116/6738;;251/5583;", "google_scholar": ";;gSzKGdQAAAAJ;", "orcid": ";;0000-0002-9121-9395;", "linkedin": "ahmed-aloui-b06547153/;;catphuocle/;", "or_profile": "~Ahmed_Aloui1;~Juncheng_Dong1;~Cat_Phuoc_Le1;~Vahid_Tarokh1", "aff": "Duke University, Duke University;;Duke University;", "aff_domain": "ece.duke.edu;;duke.edu;", "position": "PhD student;;Postdoc;", "bibtex": "@misc{\naloui2024counterfactual,\ntitle={Counterfactual Data Augmentation with Contrastive Learning},\nauthor={Ahmed Aloui and Juncheng Dong and Cat Phuoc Le and Vahid Tarokh},\nyear={2024},\nurl={https://openreview.net/forum?id=7mR83Q12cJ}\n}", "github": "", "project": "", "reviewers": "AQb9;NLwi;Lde6;FUqM", "site": "https://openreview.net/forum?id=7mR83Q12cJ", "pdf_size": 2896223, "rating": "3;5;5;6", "confidence": "5;3;5;3", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "52;50;35;75", "wc_strengths": "6;13;29;126", "wc_weaknesses": "21;145;287;142", "wc_questions": "1024;37;31;42", "wc_review": "1103;245;382;385", "wc_reply_reviewers": "0;156;0;0", "wc_reply_authors": "1555;676;463;241", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 53.0, 14.300349646075091 ], "wc_strengths_avg": [ 43.5, 48.355454707819675 ], "wc_weaknesses_avg": [ 148.75, 94.19759816470906 ], "wc_questions_avg": [ 283.5, 427.54561160185006 ], "wc_review_avg": [ 528.75, 336.33196027139616 ], "wc_reply_reviewers_avg": [ 39.0, 67.54998149518622 ], "wc_reply_authors_avg": [ 733.75, 498.4713507314136 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10401963946042605033&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "7n360rsYAq", "title": "Towards Dynamic Trend Filtering through Trend Points Detection with Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Trend filtering simplifies complex time series data by prioritizing proximity to the original data while applying smoothness to filter out noise. However, the inherent smoothness of trend filtering filters out the tail distribution of time series data, characterized as extreme values, thereby failing to reflect abrupt changes in the trend. In this paper, we introduce Trend Point Detection, a novel approach to trend filtering that directly identifies essential points that should be reflected in the trend including abrupt changes. We refer to these essential points as Dynamic Trend Points (DTPs) and extract trends from connecting these points. To identify DTPs, we formalize the Trend Point Detection problem as a Markov Decision Process (MDP). We solve the Trend Point Detection problem using Reinforcement Learning (RL) algorithms operating within a discrete action space, referred to as the Dynamic Trend Filtering network (DTF-net). DTF-net incorporates flexible noise filtering, preserving important original sub-sequences while removing noise as needed for other sub-sequences. We demonstrate that DTF-net excels at capturing abrupt changes compared to other trend filtering algorithms, using synthetic data and the Nasdaq intraday dataset. Furthermore, when we utilize DTF-net's trend as an additional feature for Time Series Forecasting (TSF) in non-stationary data, we demonstrate performance improvements, as abrupt changes are captured rather than smoothed out.", "keywords": "time series analysis;trend filtering;reinforcement learning;time series forecasting", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/d1a85b8573e87c6b89a8e6a5586dd8464abdcfb9.zip", "author": "Jihyeon Seong;Sekwang Oh;Jaesik Choi", "authorids": "~Jihyeon_Seong1;~Sekwang_Oh1;~Jaesik_Choi1", "gender": "F;M;M", "homepage": "http://sailab.kaist.ac.kr/members/#GraduateStudents;;https://sailab.kaist.ac.kr/jaesik", "dblp": ";;13/1402", "google_scholar": "DdSo9q4AAAAJ;;RqMLVzUAAAAJ", "orcid": "0000-0002-3591-131X;;", "linkedin": "jihyeon-seong-302571267/;oskoskosk;", "or_profile": "~Jihyeon_Seong1;~Sekwang_Oh1;~Jaesik_Choi1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;Associate Professor", "bibtex": "@misc{\nseong2024towards,\ntitle={Towards Dynamic Trend Filtering through Trend Points Detection with Reinforcement Learning},\nauthor={Jihyeon Seong and Sekwang Oh and Jaesik Choi},\nyear={2024},\nurl={https://openreview.net/forum?id=7n360rsYAq}\n}", "github": "", "project": "", "reviewers": "ALFQ;Fg2U;Q1zH;NMhu", "site": "https://openreview.net/forum?id=7n360rsYAq", "pdf_size": 1115341, "rating": "5;5;6;8", "confidence": "3;3;3;3", "soundness": "2;2;2;3", "contribution": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "57;84;100;119", "wc_strengths": "49;67;118;102", "wc_weaknesses": "173;212;40;89", "wc_questions": "4;77;53;16", "wc_review": "283;440;311;326", "wc_reply_reviewers": "76;28;0;0", "wc_reply_authors": "918;1133;486;394", "reply_reviewers": "1;1;0;0", "reply_authors": "3;4;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.0, 22.726636354727024 ], "wc_strengths_avg": [ 84.0, 27.358728040608906 ], "wc_weaknesses_avg": [ 128.5, 67.72185762366534 ], "wc_questions_avg": [ 37.5, 29.090376415577712 ], "wc_review_avg": [ 340.0, 59.76202807803631 ], "wc_reply_reviewers_avg": [ 26.0, 31.04834939252005 ], "wc_reply_authors_avg": [ 732.75, 304.2017217242532 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nl0k2DsgulMJ:scholar.google.com/&scioq=Towards+Dynamic+Trend+Filtering+through+Trend+Points+Detection+with+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "7n8RzGQKnR", "title": "A Symbolic Framework for Evaluating Mathematical Reasoning with Transformers", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper proposes a methodology for generating synthetic mathematical derivations via a computer algebra system to evaluate the generalisability of Transformers in symbolic and quantitative reasoning problems, and provides a general framework for building large-scale and high-quality benchmarks in the mathematical domain. In the context of classification tasks involving multi-step annotated derivations (spanning 18 mathematical operators), we leverage the framework to compare the mathematical capabilities of GPT-4, GPT-3.5, and a canon of fine-tuned BERT models, exploring the relationship between specific operators and generalisation failure. Surprisingly, the average in-distribution performance of BERT models surpasses GPT-3.5, and rivals GPT-4, yet simple data perturbations reduce BERT scores by up to 80 F1 points. The results suggest that the in-distribution performance and generalisability of smaller open-source models may potentially rival GPT in narrow mathematical domains by incorporating appropriately structured discourse-level relations during training, and highlight a shared weakness between BERT and GPT involving a relative inability to decode dependency relations involving indirect references to mathematical entities. We release the data generation framework along with all the resulting datasets and fine-tuned models\\footnote{\\url{https://github.com/anonymous/TBA}}.", "keywords": "mathematical reasoning;generalisation;gpt;bert;sequence classification;synthetic data;fine-tuning;few-shot learning", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Jordan Meadows;Marco Valentino;Damien Teney;Andre Freitas", "authorids": "~Jordan_Meadows1;~Marco_Valentino1;~Damien_Teney1;~Andre_Freitas1", "gender": "M;M;M;", "homepage": ";https://www.marcovalentino.net/;https://www.damienteney.info;http://andrefreitas.org", "dblp": ";212/3533;62/10068;47/9409.html", "google_scholar": "https://scholar.google.com/citations?hl=en;nnaBYcIAAAAJ;https://scholar.google.com.au/citations?user=iS_jP_3dpD8J;ExmHmMoAAAAJ", "orcid": ";;;", "linkedin": ";marco-valentino-844a5ab1/;;andrefreitas/", "or_profile": "~Jordan_Meadows1;~Marco_Valentino1;~Damien_Teney1;~Andre_Freitas1", "aff": "University of Manchester;Idiap Research Institute;Idiap Research Institute;University of Manchester", "aff_domain": "cs.manchester.ac.uk;idiap.ch;idiap.ch;manchester.ac.uk", "position": "PhD student;Postdoc;Researcher;Associate Professor", "bibtex": "@misc{\nmeadows2024a,\ntitle={A Symbolic Framework for Evaluating Mathematical Reasoning with Transformers},\nauthor={Jordan Meadows and Marco Valentino and Damien Teney and Andre Freitas},\nyear={2024},\nurl={https://openreview.net/forum?id=7n8RzGQKnR}\n}", "github": "", "project": "", "reviewers": "o3bz;Y9EH;RYtb;5bKm", "site": "https://openreview.net/forum?id=7n8RzGQKnR", "pdf_size": 948481, "rating": "3;3;6;6", "confidence": "3;3;3;4", "soundness": "3;2;4;3", "contribution": "2;1;3;3", "presentation": "2;1;3;4", "wc_summary": "42;75;77;164", "wc_strengths": "31;131;69;45", "wc_weaknesses": "92;482;6;51", "wc_questions": "166;102;23;1", "wc_review": "331;790;175;261", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "11;17;18;14", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 89.5, 45.202322949158265 ], "wc_strengths_avg": [ 69.0, 38.28837943815329 ], "wc_weaknesses_avg": [ 157.75, 189.66071680767212 ], "wc_questions_avg": [ 73.0, 65.52480446365331 ], "wc_review_avg": [ 389.25, 237.8785141621664 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 15.0, 2.7386127875258306 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Frt3JHs3X-0J:scholar.google.com/&scioq=A+Symbolic+Framework+for+Evaluating+Mathematical+Reasoning+with+Transformers&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Manchester;Idiap Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.manchester.ac.uk;https://www.idiap.ch", "aff_unique_abbr": "UoM;Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;Switzerland" }, { "title": "TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19347", "id": "7oLshfEIC2", "author_site": "Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Zhang, JUN ZHOU", "tldr": "", "abstract": "Time series forecasting is widely used in extensive applications, such as traffic planning and weather forecasting. However, real-world time series usually present intricate temporal variations, making forecasting extremely challenging. Going beyond the mainstream paradigms of plain decomposition and multiperiodicity analysis, we analyze temporal variations in a novel view of multiscale-mixing, where time series present distinct patterns in different sampling scales. Specifically, the microscopic and the macroscopic information are reflected in fine and coarse scales, respectively, and thereby complex variations are inherently disentangled. Based on this observation, we propose TimeMixer as a fully MLP-based architecture with Past-Decomposable-Mixing (PDM) and Future-Multipredictor-Mixing (FMM) blocks to take full advantage of disentangled multiscale series in both past extraction and future prediction phases. Concretely, PDM applies the decomposition to multiscale series and further mixes the decomposed seasonal and trend components in fine-to-coarse and coarse-to-fine directions separately, which successively aggregates the microscopic seasonal and macroscopic trend information. FMM further ensembles multiple predictors to utilize complementary forecasting capabilities in multiscale observations. Consequently, our proposed TimeMixer is able to achieve consistent state-of-the-art performances in both long-term and short-term forecasting tasks with favorable run-time efficiency.", "keywords": "Time Series Forecasting;Mixing Networks", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/92b4b480bed2da6400329929685bd647f09c0bf8.zip", "author": "Shiyu Wang;Haixu Wu;Xiaoming Shi;Tengge Hu;Huakun Luo;Lintao Ma;James Y. Zhang;JUN ZHOU", "authorids": "~Shiyu_Wang3;~Haixu_Wu1;~Xiaoming_Shi2;~Tengge_Hu1;~Huakun_Luo1;~Lintao_Ma1;~James_Y._Zhang1;~JUN_ZHOU6", "gender": ";M;M;;;;M;M", "homepage": ";;;http://ise.thss.tsinghua.edu.cn/~mlong/;;;https://scholar.google.com/citations?user=Ywakh_sAAAAJ;https://scholar.google.com/citations?user=mCVvloEAAAAJ&hl=en", "dblp": ";286/8115;;330/4778;;;151/3086;99/3847-11", "google_scholar": ";oLL_x0wAAAAJ;0WMTWacAAAAJ;;;;Ywakh_sAAAAJ;mCVvloEAAAAJ", "orcid": ";;0000-0003-0764-8961;;;;0000-0001-6519-676X;0000-0001-6033-6102", "linkedin": ";;;;;;jamesymzhang/;", "or_profile": "~Shiyu_Wang3;~Haixu_Wu1;~Xiaoming_Shi2;~Tengge_Hu1;~Huakun_Luo1;~Lintao_Ma1;~James_Y._Zhang1;~JUN_ZHOU6", "aff": ";Tsinghua University;Ant Group;Tsinghua University;;;Ant Group;Ant Group", "aff_domain": ";tsinghua.edu.cn;antgroup.com;tsinghua.edu.cn;;;alipay.com;antgroup.com", "position": ";PhD student;Researcher;MS student;;;managing director;Researcher", "bibtex": "@inproceedings{\nwang2024timemixer,\ntitle={TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting},\nauthor={Shiyu Wang and Haixu Wu and Xiaoming Shi and Tengge Hu and Huakun Luo and Lintao Ma and James Y. Zhang and JUN ZHOU},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7oLshfEIC2}\n}", "github": "", "project": "", "reviewers": "ixvB;P2SD;ansv", "pdf_size": 5651407, "rating": "3;6;8", "confidence": "3;4;3", "soundness": "3;2;3", "contribution": "3;2;3", "presentation": "4;3;4", "wc_summary": "99;46;94", "wc_strengths": "161;50;205", "wc_weaknesses": "265;181;193", "wc_questions": "117;5;74", "wc_review": "642;282;566", "wc_reply_reviewers": "569;60;44", "wc_reply_authors": "3469;1390;714", "reply_reviewers": "2;1;1", "reply_authors": "11;5;2", "rating_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 79.66666666666667, 23.893281249943232 ], "wc_strengths_avg": [ 138.66666666666666, 65.21928821717971 ], "wc_weaknesses_avg": [ 213.0, 37.094473981982816 ], "wc_questions_avg": [ 65.33333333333333, 46.13265895460852 ], "wc_review_avg": [ 496.6666666666667, 154.9308089295204 ], "wc_reply_reviewers_avg": [ 224.33333333333334, 243.8036550632952 ], "wc_reply_authors_avg": [ 1857.6666666666667, 1172.3311060541823 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 6.0, 3.7416573867739413 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.1147078669352809, "gs_citation": 210, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7179608277779096511&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7oLshfEIC2", "pdf": "https://openreview.net/pdf?id=7oLshfEIC2", "email": ";tsinghua.edu.cn;antgroup.com;tsinghua.edu.cn;;;alipay.com;antgroup.com", "author_num": 8, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "Tsinghua University;Ant Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.antgroup.com", "aff_unique_abbr": "THU;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7oYpj8BOLW", "title": "BackBench: Are Vision Language Models Resilient to Object-to-Background Context?", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this paper, we evaluate the resilience of modern vision and multimodal foundational models against object-to-background context variations. The majority of robustness evaluation methods have introduced synthetic datasets to induce changes to object characteristics (viewpoints, scale, color) or utilized image transformation techniques (adversarial changes, common corruptions) on real images to simulate shifts in distributions. Our approach, on the other hand, can change the background of real images using text prompts thus allowing diverse changes to the background. We achieve this while preserving the original appearance and semantics of the object of interest. This allows us to quantify the role of background context in understanding the robustness and generalization of deep neural networks. To achieve this goal, we harness the generative capabilities of text-to-image, image-to-text, and image-to-segment models to automatically generate a broad spectrum of object-to-background changes. By using textual guidance for control, we produce various versions of standard vision datasets (ImageNet, COCO), incorporating either diverse and realistic backgrounds into the images or introducing variations in the color and texture of the background. Additionally, we craft adversarial backgrounds by optimizing the latent variables and text embeddings within text-to-image models. We conduct thorough experimentation and provide an in-depth analysis of the robustness of vision and language models against object-to-background context variations across different tasks. Our code and evaluation benchmark along with the datasets will be publicly released.", "keywords": "Robustness;Real Image Editing;Foundational models;Adversarial Examples;Counterfactual images", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/621bb75b2ca1bab96f2b2602621aee16ab2bcfbe.pdf", "author": "Muhammad Huzaifa;Hashmat Shadab Malik;Muzammal Naseer;Salman Khan;Fahad Khan", "authorids": "~Muhammad_Huzaifa1;~Hashmat_Shadab_Malik1;~Muzammal_Naseer1;~Salman_Khan4;~Fahad_Khan1", "gender": "M;M;M;M;M", "homepage": "https://muhammad-huzaifaa.github.io/;;https://muzammal-naseer.com/;https://salman-h-khan.github.io/;https://sites.google.com/view/fahadkhans/home", "dblp": ";312/4561;;32/11535-1;05/8618", "google_scholar": "V7hTDxQAAAAJ;2Ft7r4AAAAAJ;https://scholar.google.ch/citations?user=tM9xKA8AAAAJ;https://scholar.google.es/citations?user=M59O9lkAAAAJ;zvaeYnUAAAAJ", "orcid": ";;0000-0001-7663-7161;0000-0002-9502-1749;", "linkedin": "muhammad-huzaifa--/;hashmat-shadab-malik-7a578b17b?originalSubdomain=ae;muzammalnaseer/;;", "or_profile": "~Muhammad_Huzaifa1;~Hashmat_Shadab_Malik1;~Muzammal_Naseer1;~Salman_Khan4;~Fahad_Khan1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Australian National University;Link\u00f6ping University", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;anu.edu.au;liu.se", "position": "MS student;PhD student;Researcher;Lecturer;Associate Professor", "bibtex": "@misc{\nanonymous2024backbench,\ntitle={BackBench: Are Vision Language Models Resilient to Object-to-Background Context?},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=7oYpj8BOLW}\n}", "github": "", "project": "", "reviewers": "SPdM;ATH5;B18W;RPSV", "site": "https://openreview.net/forum?id=7oYpj8BOLW", "pdf_size": 10114163, "rating": "5;5;6;8", "confidence": "4;3;3;4", "soundness": "2;2;2;4", "contribution": "2;3;2;3", "presentation": "2;3;2;4", "wc_summary": "52;58;149;72", "wc_strengths": "57;8;81;84", "wc_weaknesses": "202;55;96;42", "wc_questions": "34;2;90;56", "wc_review": "345;123;416;254", "wc_reply_reviewers": "55;0;0;0", "wc_reply_authors": "1904;1168;768;1489", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;2;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 82.75, 38.931831449342326 ], "wc_strengths_avg": [ 57.5, 30.434355587066403 ], "wc_weaknesses_avg": [ 98.75, 62.85449466824151 ], "wc_questions_avg": [ 45.5, 32.07413287993925 ], "wc_review_avg": [ 284.5, 109.5045661148429 ], "wc_reply_reviewers_avg": [ 13.75, 23.81569860407206 ], "wc_reply_authors_avg": [ 1332.25, 417.3801474675095 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PBSU1a9SdPIJ:scholar.google.com/&scioq=BackBench:+Are+Vision+Language+Models+Resilient+to+Object-to-Background+Context%3F&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Australian National University;Link\u00f6ping University", "aff_unique_dep": ";;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.anu.edu.au;https://www.liu.se", "aff_unique_abbr": "MBZUAI;ANU;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "United Arab Emirates;Australia;Sweden" }, { "id": "7pVIFJW2Hp", "title": "FigCaps-HF: A Figure-to-Caption Generative Framework and Benchmark with Human Feedback", "track": "main", "status": "Reject", "tldr": "", "abstract": "Captions are crucial for understanding scientific visualizations and documents. Existing captioning methods for scientific figures rely on figure-caption pairs extracted from documents for training, many of which fall short with respect to metrics like helpfulness, explainability, and visual-descriptiveness leading to generated captions being misaligned with reader preferences. To enable the generation of high-quality figure captions, we introduce FigCaps-HF a new framework for figure-caption generation that can incorporate domain expert feedback in generating captions optimized for reader preferences. Our framework comprises of 1) an automatic method for evaluating quality of figure-caption pairs, 2) a novel reinforcement learning with human feedback (RLHF) method to optimize a generative figure-to-caption model for reader preferences. We demonstrate the effectiveness of our simple learning framework by improving performance over standard fine-tuning across different types of models. In particular, when using BLIP as the base model, our RLHF framework achieves a mean gain of 35.7%, 16.9%, and 9% in ROUGE, BLEU, and Meteor, respectively. Finally, we release a large-scale benchmark dataset with human feedback on figure-caption pairs to enable further evaluation and development of RLHF techniques for this problem", "keywords": "Figure Caption Generation;Image-to-Text Generation;Reinforcement Learning using Human Feedback;Figure-Caption Benchmark;Human Feedback", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Ashish Singh;Prateek Agarwal;Zixuan Huang;Arpita Singh;Tong Yu;Sungchul Kim;Victor Bursztyn;Nikos Vlassis;Ryan A. Rossi", "authorids": "~Ashish_Singh2;~Prateek_Agarwal1;~Zixuan_Huang5;~Arpita_Singh1;~Tong_Yu3;~Sungchul_Kim1;~Victor_Bursztyn1;~Nikos_Vlassis1;~Ryan_A._Rossi2", "gender": "M;M;M;F;;M;M;;M", "homepage": "https://people.cs.umass.edu/~ashishsingh/;;https://github.com/rayt98;;https://www.linkedin.com/in/tong-yu-42790744;https://sites.google.com/site/subright;https://vbursztyn.github.io/;;http://ryanrossi.com", "dblp": ";;;;32/1593-1;61/1573;154/7800.html;v/NikosAVlassis;17/5085", "google_scholar": "5lJCCzMAAAAJ;;;;https://scholar.google.com/citations?hl=en;v8ISLgIAAAAJ;HRx3epUAAAAJ;JJWWPjsAAAAJ;_Dc6lbQAAAAJ", "orcid": ";;;;0000-0002-5991-2050;0000-0003-3580-5290;;;0000-0001-9758-0635", "linkedin": "ashish-singh-a750b7b6/;prateekagarw/;;arpita505/;tong-yu-42790744;;;;", "or_profile": "~Ashish_Singh2;~Prateek_Agarwal1;~Zixuan_Huang5;~Arpita_Singh1;~Tong_Yu3;~Sungchul_Kim1;~Victor_Bursztyn1;~Nikos_Vlassis1;~Ryan_Rossi1", "aff": "Department of Computer Science, University of Massachusetts, Amherst;;University of Massachusetts at Amherst;LinkedIn;Adobe Research;Adobe Systems;Adobe Systems;Adobe Systems;Adobe Research", "aff_domain": "cs.umass.edu;;umass.edu;linkedin.com;adobe.com;adobe.com;adobe.com;adobe.com;adobe.com", "position": "PhD student;;MS student;Software Engineer;Senior Research Scientist;Researcher;Researcher;Principal Researcher;Senior Research Scientist", "bibtex": "@misc{\nsingh2024figcapshf,\ntitle={FigCaps-{HF}: A Figure-to-Caption Generative Framework and Benchmark with Human Feedback},\nauthor={Ashish Singh and Prateek Agarwal and Zixuan Huang and Arpita Singh and Tong Yu and Sungchul Kim and Victor Bursztyn and Nikos Vlassis and Ryan A. Rossi},\nyear={2024},\nurl={https://openreview.net/forum?id=7pVIFJW2Hp}\n}", "github": "", "project": "", "reviewers": "oWEK;LPWT;3Ti9;MqnR", "site": "https://openreview.net/forum?id=7pVIFJW2Hp", "pdf_size": 3346984, "rating": "5;5;6;6", "confidence": "3;3;3;3", "soundness": "3;2;3;3", "contribution": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "45;62;101;57", "wc_strengths": "71;53;124;40", "wc_weaknesses": "70;98;22;52", "wc_questions": "53;4;201;24", "wc_review": "239;217;448;173", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "711;685;0;727", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;0;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 66.25, 20.99255820523073 ], "wc_strengths_avg": [ 72.0, 31.976553910638962 ], "wc_weaknesses_avg": [ 60.5, 27.617928959282953 ], "wc_questions_avg": [ 70.5, 77.3320761392063 ], "wc_review_avg": [ 269.25, 105.90178232683338 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 530.75, 306.79502522042304 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15640674917187120192&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;2;2;2;2", "aff_unique_norm": "University of Massachusetts Amherst;LinkedIn Corporation;Adobe", "aff_unique_dep": "Department of Computer Science;;Adobe Research", "aff_unique_url": "https://www.umass.edu;https://www.linkedin.com;https://research.adobe.com", "aff_unique_abbr": "UMass Amherst;LinkedIn;Adobe", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Heterogeneous Personalized Federated Learning by Local-Global Updates Mixing via Convergence Rate", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19346", "id": "7pWRLDBAtc", "author_site": "Meirui Jiang, Anjie Le, Xiaoxiao Li, Qi Dou", "tldr": "", "abstract": "Personalized federated learning (PFL) has emerged as a promising technique for addressing the challenge of data heterogeneity. While recent studies have made notable progress in mitigating heterogeneity associated with label distributions, the issue of effectively handling feature heterogeneity remains an open question. In this paper, we propose a personalization approach by Local-global updates Mixing (LG-Mix) via Neural Tangent Kernel (NTK)-based convergence. The core idea is to leverage the convergence rate induced by NTK to quantify the importance of local and global updates, and subsequently mix these updates based on their importance. Specifically, we find the trace of the NTK matrix can manifest the convergence rate, and propose an efficient and effective approximation to calculate the trace of a feature matrix instead of the NTK matrix. Such approximation significantly reduces the cost of computing NTK, and the feature matrix explicitly considers the heterogeneous features among samples. We have theoretically analyzed the convergence of our method in the over-parameterize regime, and experimentally evaluated our method on five datasets. These datasets present heterogeneous data features in natural and medical images. With comprehensive comparison to existing state-of-the-art approaches, our LG-Mix has consistently outperformed them across all datasets (largest accuracy improvement of 5.01\\%), demonstrating the outstanding efficacy of our method for model personalization. Code is available at \\url{https://github.com/med-air/HeteroPFL}.", "keywords": "Personalized Federated Learning;Heterogeneous Data;Feature Distribution Shift", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/5b33797c929db3d387af16617136bea9a85e4fd2.zip", "author": "Meirui Jiang;Anjie Le;Xiaoxiao Li;Qi Dou", "authorids": "~Meirui_Jiang2;~Anjie_Le1;~Xiaoxiao_Li1;~Qi_Dou2", "gender": "F;Unspecified;F;M", "homepage": ";https://xxlya.github.io/;https://www.cse.cuhk.edu.hk/~qdou;https://meiruijiang.github.io/MeiruiJiang/", "dblp": "353/1044;71/8042;165/7846;285/5480", "google_scholar": "XvQ4B1sAAAAJ;sdENOQ4AAAAJ;https://scholar.google.com.hk/citations?user=iHh7IJQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-3416-9950;0000-0003-4228-8420", "linkedin": ";;;", "or_profile": "~Anjie_Le1;~Xiaoxiao_Li1;~Qi_Dou2;~Meirui_JIANG1", "aff": "University of Cambridge;University of British Columbia;The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "cam.ac.uk;ece.ubc.ca;cuhk.edu.hk;cse.cuhk.edu.hk", "position": "MS student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\njiang2024heterogeneous,\ntitle={Heterogeneous Personalized Federated Learning by Local-Global Updates Mixing via Convergence Rate},\nauthor={Meirui Jiang and Anjie Le and Xiaoxiao Li and Qi Dou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7pWRLDBAtc}\n}", "github": "", "project": "", "reviewers": "Ho8d;efqn;i8zc", "pdf_size": 1368900, "rating": "5;6;8", "confidence": "2;3;2", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "2;3;3", "wc_summary": "65;47;102", "wc_strengths": "44;50;71", "wc_weaknesses": "322;154;48", "wc_questions": "391;28;71", "wc_review": "822;279;292", "wc_reply_reviewers": "0;152;0", "wc_reply_authors": "1819;942;1510", "reply_reviewers": "0;2;0", "reply_authors": "3;3;3", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 71.33333333333333, 22.895899681432528 ], "wc_strengths_avg": [ 55.0, 11.575836902790225 ], "wc_weaknesses_avg": [ 174.66666666666666, 112.81055900146147 ], "wc_questions_avg": [ 163.33333333333334, 161.93894597107337 ], "wc_review_avg": [ 464.3333333333333, 252.9642047580821 ], "wc_reply_reviewers_avg": [ 50.666666666666664, 71.65348716023682 ], "wc_reply_authors_avg": [ 1423.6666666666667, 363.2008932930767 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vK5WzmeAoIgJ:scholar.google.com/&scioq=Heterogeneous+Personalized+Federated+Learning+by+Local-Global+Updates+Mixing+via+Convergence+Rate&hl=en&as_sdt=0,33", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=7pWRLDBAtc", "pdf": "https://openreview.net/pdf?id=7pWRLDBAtc", "email": "cam.ac.uk;ece.ubc.ca;cuhk.edu.hk;cse.cuhk.edu.hk", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Cambridge;University of British Columbia;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.ubc.ca;https://www.cuhk.edu.hk", "aff_unique_abbr": "Cambridge;UBC;CUHK", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Cambridge;;Hong Kong SAR", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "United Kingdom;Canada;China" }, { "id": "7q7s5fXEpP", "title": "Stealthy Imitation: Reward-guided Environment-free Policy Stealing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep reinforcement learning policies, which are integral to modern control systems, represent valuable intellectual property. The development of these policies demands considerable resources, such as domain expertise, simulation fidelity, and real-world validation. These policies are potentially vulnerable to model stealing attacks, which aim to replicate their functionality using only black-box access. In this paper, we propose Stealthy Imitation, the first attack designed to steal policies without access to the environment or knowledge of the input range. This setup has not been considered by previous model stealing methods. Lacking access to the victim's input states distribution, Stealthy Imitation fits a reward model that allows to approximate it. We show that the victim policy is harder to imitate when the distribution of the attack queries matches that of the victim. We evaluate our approach across diverse, high-dimensional control tasks and consistently outperform prior data-free approaches adapted for policy stealing. Lastly, we propose a countermeasure that significantly diminishes the effectiveness of the attack. The implementation of Stealthy Imitation will be publicly available and open-source.", "keywords": "model stealing;security;deep reinforcement learning;control system", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Zhixiong Zhuang;Maria-Irina Nicolae;Mario Fritz", "authorids": "~Zhixiong_Zhuang1;~Maria-Irina_Nicolae1;~Mario_Fritz1", "gender": "M;F;M", "homepage": "https://de.linkedin.com/in/zhixiong-zhuang-7b18121b0;https://ririnicolae.github.io/;https://cispa.saarland/group/fritz/", "dblp": ";156/0167.html;", "google_scholar": ";kNOsX30AAAAJ;https://scholar.google.de/citations?user=4V1nNm4AAAAJ", "orcid": ";0009-0002-2758-7481;", "linkedin": ";irina-nicolae-a2251638;", "or_profile": "~Zhixiong_Zhuang1;~Maria-Irina_Nicolae1;~Mario_Fritz1", "aff": "Robert Bosch GmbH;Robert Bosch GmbH;Saarland University", "aff_domain": "bosch.com;bosch.com;uni-saarland.de", "position": "PhD student;Research scientist;Full Professor", "bibtex": "@misc{\nzhuang2024stealthy,\ntitle={Stealthy Imitation: Reward-guided Environment-free Policy Stealing},\nauthor={Zhixiong Zhuang and Maria-Irina Nicolae and Mario Fritz},\nyear={2024},\nurl={https://openreview.net/forum?id=7q7s5fXEpP}\n}", "github": "", "project": "", "reviewers": "3FCu;LEDd;Maon;AqHM", "site": "https://openreview.net/forum?id=7q7s5fXEpP", "pdf_size": 2088392, "rating": "5;5;6;6", "confidence": "4;3;3;3", "soundness": "2;3;3;2", "contribution": "3;2;2;2", "presentation": "4;3;3;2", "wc_summary": "140;78;72;131", "wc_strengths": "40;56;24;74", "wc_weaknesses": "493;74;150;320", "wc_questions": "73;201;45;200", "wc_review": "746;409;291;725", "wc_reply_reviewers": "260;0;20;63", "wc_reply_authors": "673;602;498;666", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 105.25, 30.49077729412617 ], "wc_strengths_avg": [ 48.5, 18.567444627627143 ], "wc_weaknesses_avg": [ 259.25, 161.69628165174362 ], "wc_questions_avg": [ 129.75, 71.44009728436825 ], "wc_review_avg": [ 542.75, 197.3529515867447 ], "wc_reply_reviewers_avg": [ 85.75, 103.14643716580811 ], "wc_reply_authors_avg": [ 609.75, 70.2010505619396 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12495121733455247305&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "Robert Bosch GmbH;Saarland University", "aff_unique_dep": ";", "aff_unique_url": "https://www.bosch.com;https://www.uni-saarland.de", "aff_unique_abbr": "Bosch;UdS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "7rex8lEZH2", "title": "Prompt Tuning with Diffusion for Few-Shot Pre-trained Policy Generalization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline Reinforcement Learning (RL) methods harness previous experiences to derive an optimal policy, forming the foundation for pre-trained large-scale models (PLMs). When encountering tasks not seen before, PLMs often utilize several expert trajectories as prompts to expedite their adaptation to new requirements. Though a range of prompt-tuning methods has been proposed to enhance the quality of prompts, these methods frequently face restrictions due to prompt initialization, which can significantly constrain the exploration domain and potentially lead to suboptimal solutions. To eliminate the reliance on the initial prompt, we shift our perspective towards the generative model, framing the prompt-tuning process as a form of conditional generative modeling, where prompts are generated from random noise. Our innovation, the Prompt Diffuser, leverages a conditional diffusion model to produce prompts of exceptional quality. Central to our framework is the approach to trajectory reconstruction and the meticulous integration of downstream task guidance during the training phase. Further experimental results underscore the potency of the Prompt Diffuser as a robust and effective tool for the prompt-tuning process, demonstrating strong performance in the meta-RL tasks.", "keywords": "prompt tuning;diffusion model", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/19a1db969304f3603761c62f87bfbfb406968244.pdf", "author": "Shengchao Hu;Li Shen;Ya Zhang;Dacheng Tao", "authorids": "~Shengchao_Hu1;~Li_Shen1;~Ya_Zhang1;~Dacheng_Tao1", "gender": ";M;F;", "homepage": ";https://sites.google.com/site/mathshenli/home;https://annzhanglion.github.io/;", "dblp": ";91/3680-8;85/3714-2;", "google_scholar": ";yVhgENIAAAAJ;pbjw9sMAAAAJ;", "orcid": ";;0000-0002-5390-9053;", "linkedin": ";;;", "or_profile": "~Shengchao_Hu1;~Li_Shen1;~Ya_Zhang1;~Dacheng_Tao1", "aff": ";JD Explore Academy;Shanghai Jiaotong University;", "aff_domain": ";jd.com;sjtu.edu.cn;", "position": ";Researcher;Professor;", "bibtex": "@misc{\nhu2024prompt,\ntitle={Prompt Tuning with Diffusion for Few-Shot Pre-trained Policy Generalization},\nauthor={Shengchao Hu and Li Shen and Ya Zhang and Dacheng Tao},\nyear={2024},\nurl={https://openreview.net/forum?id=7rex8lEZH2}\n}", "github": "", "project": "", "reviewers": "GaVK;4ueS;V1Dp;zrXj", "site": "https://openreview.net/forum?id=7rex8lEZH2", "pdf_size": 900417, "rating": "5;5;6;8", "confidence": "4;3;4;3", "soundness": "3;2;3;3", "contribution": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "74;47;52;52", "wc_strengths": "39;45;48;67", "wc_weaknesses": "103;138;97;52", "wc_questions": "75;40;109;2", "wc_review": "291;270;306;173", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1862;1918;2556;1175", "reply_reviewers": "0;0;0;0", "reply_authors": "5;4;5;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 56.25, 10.449282272003183 ], "wc_strengths_avg": [ 49.75, 10.473180032826706 ], "wc_weaknesses_avg": [ 97.5, 30.581857366746057 ], "wc_questions_avg": [ 56.5, 39.81519810323691 ], "wc_review_avg": [ 260.0, 51.83145762951299 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1877.75, 488.8120165257806 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1566295000175567071&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "JD;Shanghai Jiao Tong University", "aff_unique_dep": "JD Explore Academy;", "aff_unique_url": ";https://www.sjtu.edu.cn", "aff_unique_abbr": ";SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "id": "7sASqAmGaO", "title": "Augmenting Negative Representation for Continual Self-Supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce a novel and general loss function, called Augmented Negatives (AugNeg), for effective continual self-supervised learning (CSSL). We first argue that the conventional loss form of continual learning which consists of single task-specific loss (for plasticity) and a regularizer (for stability) may not be ideal for contrastive loss based CSSL that focus on representation learning. Our reasoning is that, in contrastive learning based methods, the task-specific loss would suffer from decreasing diversity of negative samples and the regularizer may hinder learning new distinctive representations. To that end, we propose AugNeg that consists of two losses with symmetric dependence on current and past models' negative representations. We argue our model can naturally find good trade-off between the plasticity and stability without any explicit hyperparameter tuning. \nFurthermore, we present that the idea of utilizing augmented negative representations can be applied to CSSL with non-contrastive learning by adding a regularization term.\nWe validate the effectiveness of our approach through extensive experiments, demonstrating that applying the AugNeg loss achieves superior performance compared to other state-of-the-art CSSL methods, in both contrastive and non-contrastive learning algorithms.", "keywords": "Continual Learning;Representation Learning;Self-supervised Learning;Continual Self-Supervised Learning;Continual Representation Learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/ac9c8b73f89c33a533cf3ae8b11732619bcecc3d.zip", "author": "Sungmin Cha;Kyunghyun Cho;Taesup Moon", "authorids": "~Sungmin_Cha1;~Kyunghyun_Cho1;~Taesup_Moon1", "gender": "M;M;", "homepage": "https://sites.google.com/view/sungmin-cha/;http://kyunghyuncho.me;https://mindlab-snu.github.io/people/pi/", "dblp": "206/6287;41/9736;05/4084", "google_scholar": "i0PPhfAAAAAJ;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ;lQlioBoAAAAJ", "orcid": ";;0000-0002-9257-6503", "linkedin": ";;", "or_profile": "~Sungmin_Cha1;~Kyunghyun_Cho1;~Taesup_Moon1", "aff": "New York University;Genentech;Seoul National University", "aff_domain": "nyu.edu;gene.com;snu.ac.kr", "position": "Faculty Fellow;Senior Director of Frontier Research;Associate Professor", "bibtex": "@misc{\ncha2024augmenting,\ntitle={Augmenting Negative Representation for Continual Self-Supervised Learning},\nauthor={Sungmin Cha and Kyunghyun Cho and Taesup Moon},\nyear={2024},\nurl={https://openreview.net/forum?id=7sASqAmGaO}\n}", "github": "", "project": "", "reviewers": "Tkp5;E4AJ;GiPi", "site": "https://openreview.net/forum?id=7sASqAmGaO", "pdf_size": 1335969, "rating": "5;5;6", "confidence": "3;4;3", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "2;2;3", "wc_summary": "75;55;59", "wc_strengths": "18;17;75", "wc_weaknesses": "259;135;173", "wc_questions": "49;127;44", "wc_review": "401;334;351", "wc_reply_reviewers": "0;51;16", "wc_reply_authors": "1036;1144;669", "reply_reviewers": "0;1;1", "reply_authors": "4;4;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 63.0, 8.640987597877148 ], "wc_strengths_avg": [ 36.666666666666664, 27.10883414846328 ], "wc_weaknesses_avg": [ 189.0, 51.87163643713842 ], "wc_questions_avg": [ 73.33333333333333, 38.00292386412159 ], "wc_review_avg": [ 362.0, 28.437065014988214 ], "wc_reply_reviewers_avg": [ 22.333333333333332, 21.296843793284385 ], "wc_reply_authors_avg": [ 949.6666666666666, 203.29999726731154 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14189818090051337427&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "New York University;Genentech;Seoul National University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nyu.edu;https://www.genentech.com;https://www.snu.ac.kr", "aff_unique_abbr": "NYU;Genentech;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;South Korea" }, { "id": "7sMR09VNKU", "title": "Learning System Dynamics from Sensory Input under Optimal Control Principles", "track": "main", "status": "Reject", "tldr": "", "abstract": "Identifying the underlying dynamics of actuated physical systems from sensory input is of high interest in control, robotics, and engineering in general. In the context of control problems, existing approaches decouple the construction of the feature space where the dynamics identification process occurs from the target control tasks, potentially leading to a mismatch between feature and state spaces: the systems may not be controllable in feature space, and synthesized controls may not be applicable in state space.\nBorrowing from the Koopman formalism, we propose instead to learn an embedding of both the states and controls into a feature space where the dynamics are linear, and include the target control task in the learning objective in the form of a differentiable and robust optimal control problem. We validate the proposed approach with simulation experiments using systems with non-linear dynamics, demonstrating that the controls obtained in feature space can be used to drive the corresponding physical systems and that the learned model can serve for future state prediction.", "keywords": "representation learning;optimal control", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/d9f53cb0d0718befb58b84bc55561e74bc656ee3.pdf", "author": "Oumayma Bounou;Jean Ponce;Justin Carpentier", "authorids": "~Oumayma_Bounou1;~Jean_Ponce1;~Justin_Carpentier1", "gender": "F;M;M", "homepage": ";http://www.di.ens.fr/~ponce/;https://jcarpent.github.io", "dblp": ";p/JeanPonce;173/7498", "google_scholar": "B2RS1M4AAAAJ;https://scholar.google.com.tw/citations?user=vC2vywcAAAAJ;https://scholar.google.fr/citations?user=CyhIdmMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Oumayma_Bounou1;~Jean_Ponce1;~Justin_Carpentier1", "aff": ";Ecole Normale Sup\u00e9rieure de Paris;INRIA", "aff_domain": ";ens.fr;inria.fr", "position": ";Full Professor;Researcher", "bibtex": "@misc{\nbounou2024learning,\ntitle={Learning System Dynamics from Sensory Input under Optimal Control Principles},\nauthor={Oumayma Bounou and Jean Ponce and Justin Carpentier},\nyear={2024},\nurl={https://openreview.net/forum?id=7sMR09VNKU}\n}", "github": "", "project": "", "reviewers": "6hvE;fvtc;ianP;wihP", "site": "https://openreview.net/forum?id=7sMR09VNKU", "pdf_size": 933768, "rating": "1;3;5;5", "confidence": "5;4;4;3", "soundness": "2;2;3;2", "contribution": "1;2;2;2", "presentation": "2;3;3;2", "wc_summary": "46;85;104;80", "wc_strengths": "30;66;109;30", "wc_weaknesses": "611;273;178;29", "wc_questions": "61;56;125;8", "wc_review": "748;480;516;147", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.75, 20.92098229051399 ], "wc_strengths_avg": [ 58.75, 32.52210786526605 ], "wc_weaknesses_avg": [ 272.75, 213.77836069162848 ], "wc_questions_avg": [ 62.5, 41.59627387158614 ], "wc_review_avg": [ 472.75, 214.3587821853819 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17943297840464535428&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff_unique_index": "0;1", "aff_unique_norm": "Ecole Normale Sup\u00e9rieure de Paris;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.ens.fr;https://www.inria.fr", "aff_unique_abbr": "ENS Paris;INRIA", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "7suavRDxe8", "title": "Plausibly Deniable Encryption with Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present a novel approach for achieving plausible deniability in cryptography by harnessing the power of large language models (LLMs) in conjunction with conventional encryption algorithms. Leveraging the inherent statistical properties of LLMs, we design an encryption scheme that allows the same ciphertext to be decrypted with any key, while still yielding a plausible message. Unlike established methods, our approach neither relies on a fixed set of decoy keys or messages nor introduces redundancy. Our method is founded on the observation that language models can be used as encoders to compress a low-entropy signal (such as natural language) into a stream indistinguishable from noise, and similarly, that sampling from the model is equivalent to decoding a stream of noise. When such a stream is encrypted and subsequently decrypted with an incorrect key, it will lead to a sampling behavior and will thus generate a plausible message. Through a series of experiments, we substantiate the resilience of our approach against various statistical detection techniques. Finally, although we mainly focus on language models, we establish the applicability of our approach to a broader set of generative models and domains, including images and audio.", "keywords": "large language models;LLM;deniable encryption;compression", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/764c29d631d2939d474d5a77f1f77f61272b4496.zip", "author": "Dario Pavllo;Sotiris Anagnostidis", "authorids": "~Dario_Pavllo2;~Sotiris_Anagnostidis1", "gender": ";M", "homepage": ";", "dblp": "218/5320;286/1763", "google_scholar": "5A_sjVQAAAAJ;qjzTKWUAAAAJ", "orcid": ";", "linkedin": "dario-pavllo/;sotiris-anagnostidis-b064a5129/", "or_profile": "~Dario_Pavllo2;~Sotiris_Anagnostidis1", "aff": ";ETH Zurich", "aff_domain": ";inf.ethz.ch", "position": ";PhD student", "bibtex": "@misc{\npavllo2024plausibly,\ntitle={Plausibly Deniable Encryption with Large Language Models},\nauthor={Dario Pavllo and Sotiris Anagnostidis},\nyear={2024},\nurl={https://openreview.net/forum?id=7suavRDxe8}\n}", "github": "", "project": "", "reviewers": "xago;dPno;ydp2;jZiK;n1KU", "site": "https://openreview.net/forum?id=7suavRDxe8", "pdf_size": 1213338, "rating": "3;3;5;5;8", "confidence": "4;4;3;5;4", "soundness": "1;1;2;3;3", "contribution": "1;2;2;2;4", "presentation": "3;3;2;3;4", "wc_summary": "74;70;55;219;176", "wc_strengths": "130;92;19;19;127", "wc_weaknesses": "508;323;101;76;113", "wc_questions": "74;21;69;51;37", "wc_review": "786;506;244;365;453", "wc_reply_reviewers": "0;0;0;342;32", "wc_reply_authors": "228;817;458;630;293", "reply_reviewers": "0;0;0;2;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 4.8, 1.8330302779823362 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.0, 0.8944271909999159 ], "contribution_avg": [ 2.2, 0.9797958971132712 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 118.8, 65.98605913372914 ], "wc_strengths_avg": [ 77.4, 49.52009693043826 ], "wc_weaknesses_avg": [ 224.2, 167.21889845349418 ], "wc_questions_avg": [ 50.4, 19.734234213670415 ], "wc_review_avg": [ 470.8, 180.8528683764789 ], "wc_reply_reviewers_avg": [ 74.8, 134.1736188674957 ], "wc_reply_authors_avg": [ 485.2, 216.74445782995238 ], "reply_reviewers_avg": [ 0.6, 0.8 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:S3CLm32LP88J:scholar.google.com/&scioq=Plausibly+Deniable+Encryption+with+Large+Language+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "id": "7tUDUFQkh2", "title": "Ref-Diff: Zero-shot Referring Image Segmentation with Generative Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Zero-shot referring image segmentation (RIS) presents a significant challenge. It requires identifying an instance segmentation mask using referring descriptions, without having been trained on such paired data. While existing zero-shot RIS methods mainly utilize pre-trained discriminative models (e.g., CLIP), this study observes that generative models (e.g., Stable Diffusion) can discern relationships between various visual elements and text descriptions, an area yet to be explored in this task. In this work, we introduce the Referring Diffusional Segmentor (Ref-Diff), a model that leverages the fine-grained multi-modal information derived from generative models. Our findings show that even without an external proposal generator, our Ref-Diff with a sole generative model outperforms SOTA weakly-supervised models on RefCOCO+ and RefCOCOg. Notably, when combining both generative and discriminative models, our Ref-Diff+ surpasses competing methods by a substantial margin. This highlights the constructive role of generative models in this domain, providing complementary advantages alongside discriminative models to enhance referring segmentation. Our source code will be publicly available.", "keywords": "Zero-shot Referring Image Segmentation;Generative Model", "primary_area": "generative models", "supplementary_material": "", "author": "Minheng Ni;Yabo Zhang;Kailai Feng;Xiaoming Li;Yiwen Guo;Wangmeng Zuo", "authorids": "~Minheng_Ni1;~Yabo_Zhang1;~Kailai_Feng1;~Xiaoming_Li3;~Yiwen_Guo1;~Wangmeng_Zuo3", "gender": "M;M;M;M;;M", "homepage": "https://kodenii.github.io;https://ybybzhang.github.io/;https://github.com/carlofkl;;;", "dblp": "263/9969;231/0624;331/2340;36/3071-2;;93/2671", "google_scholar": "-ybr4_cAAAAJ;LnYDPdAAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;rUOpCEYAAAAJ", "orcid": ";;;;;0000-0002-3330-783X", "linkedin": "https://linkedin.com/in/minheng-ni-7b8a99146;;;;;", "or_profile": "~Minheng_Ni1;~Yabo_Zhang1;~Kailai_Feng1;~Xiaoming_Li3;~Yiwen_Guo1;~Wangmeng_Zuo3", "aff": "Microsoft;Harbin Institute of Technology;Harbin Institute of Technology;Nanyang Technological University;;Harbin Institute of Technology", "aff_domain": "microsoft.com;hit.edu.cn;stu.hit.edu.cn;ntu.edu.sg;;hit.edu.cn", "position": "Research Intern;PhD student;MS student;Postdoc;;Full Professor", "bibtex": "@misc{\nni2024refdiff,\ntitle={Ref-Diff: Zero-shot Referring Image Segmentation with Generative Models},\nauthor={Minheng Ni and Yabo Zhang and Kailai Feng and Xiaoming Li and Yiwen Guo and Wangmeng Zuo},\nyear={2024},\nurl={https://openreview.net/forum?id=7tUDUFQkh2}\n}", "github": "", "project": "", "reviewers": "YFJv;83NR;rW9R;2K1U", "site": "https://openreview.net/forum?id=7tUDUFQkh2", "pdf_size": 6617820, "rating": "3;3;3;6", "confidence": "4;5;4;4", "soundness": "3;1;2;3", "contribution": "2;1;1;2", "presentation": "3;2;2;3", "wc_summary": "55;44;38;149", "wc_strengths": "24;15;23;43", "wc_weaknesses": "261;247;397;134", "wc_questions": "2;11;24;2", "wc_review": "342;317;482;328", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 71.5, 45.15805575974236 ], "wc_strengths_avg": [ 26.25, 10.280442597476044 ], "wc_weaknesses_avg": [ 259.75, 93.29355551162149 ], "wc_questions_avg": [ 9.75, 9.01041064547005 ], "wc_review_avg": [ 367.25, 66.84076226375639 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13803166602830577984&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Microsoft;Harbin Institute of Technology;Nanyang Technological University", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;http://www.hit.edu.cn/;https://www.ntu.edu.sg", "aff_unique_abbr": "Microsoft;HIT;NTU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1;1;2;1", "aff_country_unique": "United States;China;Singapore" }, { "id": "7v3tkQmtpE", "title": "Rethinking Decision Transformer via Hierarchical Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Decision Transformer (DT) is an innovative algorithm leveraging recent advances of the Transformer architecture in sequential decision making. However, a notable limitation of DT is its reliance on {recalling} trajectories from datasets, without the capability to seamlessly stitch them together. In this work, we introduce a general sequence modeling framework for studying sequential decision making through the lens of \\emph{Hierarchical Reinforcement Learning}. At the time of making decisions, a \\emph{high-level} policy first proposes an ideal \\emph{prompt} for the current state, a \\emph{low-level} policy subsequently generates an action conditioned on the given prompt. We show how DT emerges as a special case with specific choices of high-level and low-level policies and discuss why these choices might fail in practice. Inspired by these observations, we investigate how to jointly optimize the high-level and low-level policies to enable the stitching capability. This further leads to the development of new algorithms for offline reinforcement learning. Finally, our empirical studies clearly demonstrate the proposed algorithms significantly surpass DT on several control and navigation benchmarks. We hope that our contributions can inspire the integration of Transformer architectures within the field of RL.", "keywords": "offline reinforcement learning;decision transformer", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Yi Ma;Chenjun Xiao;Hebin Liang;Jianye HAO", "authorids": "~Yi_Ma5;~Chenjun_Xiao1;~Hebin_Liang2;~Jianye_HAO1", "gender": ";;M;M", "homepage": "https://mayi1996.top/;https://chenjun-x.github.io/;http://www.icdai.org/jianye.html;https://github.com/superCat-star", "dblp": "69/1112-5.html;178/8641;21/7664.html;352/9378.html", "google_scholar": "TdVWzqgAAAAJ;;;", "orcid": "0000-0001-9375-6605;0000-0002-5493-1500;0000-0002-0422-8235;0009-0000-8371-2297", "linkedin": ";;;", "or_profile": "~Yi_Ma5;~Chenjun_Xiao1;~Jianye_HAO1;~hebin_liang1", "aff": "Tianjin University;Huawei Technologies Ltd.;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;huawei.com;tju.edu.cn;tju.edu.cn", "position": "PhD student;Researcher;Associate Professor;MS student", "bibtex": "@misc{\nma2024rethinking,\ntitle={Rethinking Decision Transformer via Hierarchical Reinforcement Learning},\nauthor={Yi Ma and Chenjun Xiao and Hebin Liang and Jianye HAO},\nyear={2024},\nurl={https://openreview.net/forum?id=7v3tkQmtpE}\n}", "github": "", "project": "", "reviewers": "UBLF;xoB5;RLH1;XUDW", "site": "https://openreview.net/forum?id=7v3tkQmtpE", "pdf_size": 2059234, "rating": "5;5;5;6", "confidence": "4;3;5;4", "soundness": "3;2;2;3", "contribution": "2;2;2;2", "presentation": "3;2;2;3", "wc_summary": "81;51;72;212", "wc_strengths": "253;45;31;118", "wc_weaknesses": "237;204;161;129", "wc_questions": "34;8;58;21", "wc_review": "605;308;322;480", "wc_reply_reviewers": "159;182;0;39", "wc_reply_authors": "945;1269;783;275", "reply_reviewers": "1;1;0;1", "reply_authors": "3;4;3;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 104.0, 63.29691935631623 ], "wc_strengths_avg": [ 111.75, 87.98685981440637 ], "wc_weaknesses_avg": [ 182.75, 41.09972627646077 ], "wc_questions_avg": [ 30.25, 18.471261462065875 ], "wc_review_avg": [ 428.75, 122.13389169268291 ], "wc_reply_reviewers_avg": [ 95.0, 77.17836484404162 ], "wc_reply_authors_avg": [ 818.0, 359.0278540726332 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=871146290267306675&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tianjin University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.tju.edu.cn;https://www.huawei.com", "aff_unique_abbr": "TJU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "7vKWg2Vdrs", "title": "LeBD: A Run-time Defense Against Backdoor Attack in YOLO", "track": "main", "status": "Reject", "tldr": "", "abstract": "Backdoor attack poses a serious threat to deep neural networks (DNNs). An adversary can manipulate the prediction of a backdoored model by attaching a specific backdoor trigger to the input. However, existing defenses are mainly aimed at detecting backdoors in the digital world, which cannot meet the real-time requirement of application scenes in the physical world. We propose a LayerCAMenabled backdoor detector (LeBD) for monitoring backdoor attacks in the object detection (OD) network, YOLOv5. LeBD ultilizes LayerCAM to locate the trigger and give a risk warning at run-time. In order to further improve the precision of trigger localization, we propose a backdoor detector based on counterfactual attribution LayerCAM (CA-LeBD). We evaluated the performance of the backdoor detector on images in the digital world and video streams in the physical world. Extensive experiments demonstrate that LeBD and CA-LeBD can efficiently locate the trigger and mitigate the effect of backdoor in real time. In the physical world scene, the detection rate of backdoor can achieve over 90\\%.", "keywords": "backdoor detector;physical world;YOLO;LayerCAM;counterfactual attribution", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/23a737e05bc27b5ac5cb5fdb77cb4e4c62c36a6d.zip", "author": "Kai Chen;Weijun Shan;Xin Li;XUE YANG;Qing Li;Jun Yu", "authorids": "~Kai_Chen26;~Weijun_Shan1;~Xin_Li63;~XUE_YANG6;~Qing_Li21;~Jun_Yu9", "gender": "M;M;M;F;F;M", "homepage": "https://1208320416.github.io/kaichen.github.io/;https://shanweijun.github.io/;https://903861078.github.io/lixin.github.io/;https://shirley0302.github.io/XueYang.github.io/;https://qingli66.github.io/qingli.github.io/;https://sme.fudan.edu.cn/60/5e/c31157a352350/page.htm", "dblp": ";;;;;", "google_scholar": ";;;;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Kai_Chen26;~Weijun_Shan1;~Xin_Li63;~XUE_YANG6;~Qing_Li21;~Jun_Yu9", "aff": ";;;;Fudan University;Fudan University", "aff_domain": ";;;;fudan.edu.cn;fudan.edu.cn", "position": ";;;;Principal Researcher;Principal Researcher", "bibtex": "@misc{\nchen2024lebd,\ntitle={Le{BD}: A Run-time Defense Against Backdoor Attack in {YOLO}},\nauthor={Kai Chen and Weijun Shan and Xin Li and XUE YANG and Qing Li and Jun Yu},\nyear={2024},\nurl={https://openreview.net/forum?id=7vKWg2Vdrs}\n}", "github": "", "project": "", "reviewers": "dW4R;JMJB;4B6q;9mm5", "site": "https://openreview.net/forum?id=7vKWg2Vdrs", "pdf_size": 43049579, "rating": "1;3;3;6", "confidence": "5;5;4;3", "soundness": "1;3;3;3", "contribution": "1;2;1;3", "presentation": "2;2;3;3", "wc_summary": "78;53;72;40", "wc_strengths": "17;56;26;41", "wc_weaknesses": "57;149;172;344", "wc_questions": "87;9;6;64", "wc_review": "239;267;276;489", "wc_reply_reviewers": "0;123;85;0", "wc_reply_authors": "388;619;505;811", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.75, 15.122417134836613 ], "wc_strengths_avg": [ 35.0, 14.849242404917497 ], "wc_weaknesses_avg": [ 180.5, 103.74126469250315 ], "wc_questions_avg": [ 41.5, 34.97499106504532 ], "wc_review_avg": [ 317.75, 99.80825366671837 ], "wc_reply_reviewers_avg": [ 52.0, 53.70754136990447 ], "wc_reply_authors_avg": [ 580.75, 156.0198304703604 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8866206949335731, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9dgrYx05OTEJ:scholar.google.com/&scioq=LeBD:+A+Run-time+Defense+Against+Backdoor+Attack+in+YOLO&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Rethinking and Extending the Probabilistic Inference Capacity of GNNs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19345", "id": "7vVWiCrFnd", "author_site": "Tuo Xu, Lei Zou", "tldr": "", "abstract": "Designing expressive Graph Neural Networks (GNNs) is an important topic in graph machine learning fields. Despite the existence of numerous approaches proposed to enhance GNNs based on Weisfeiler-Lehman (WL) tests, what GNNs can and cannot learn still lacks a deeper understanding. This paper adopts a fundamentally different approach to examine the expressive power of GNNs from a probabilistic perspective. By establishing connections between GNNs' predictions and the central inference problems of probabilistic graphical models (PGMs), we can analyze previous GNN variants with a novel hierarchical framework and gain new insights into their node-level and link-level behaviors. Additionally, we introduce novel methods that can provably enhance GNNs' ability to capture complex dependencies and make complex predictions. Experiments on both synthetic and real-world datasets demonstrate the effectiveness of our approaches.", "keywords": "graph neural networks;expressiveness;approximate inference", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/54841c69346bbc60258f7a6556c00bfbb49e79f7.zip", "author": "Tuo Xu;Lei Zou", "authorids": "~Tuo_Xu1;~Lei_Zou2", "gender": "M;M", "homepage": "https://github.com/doujzc;https://www.wict.pku.edu.cn/zoulei/", "dblp": ";81/3390-1.html", "google_scholar": ";", "orcid": ";0000-0002-8586-4400", "linkedin": ";", "or_profile": "~Tuo_Xu1;~Lei_Zou2", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nxu2024rethinking,\ntitle={Rethinking and Extending the Probabilistic Inference Capacity of {GNN}s},\nauthor={Tuo Xu and Lei Zou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7vVWiCrFnd}\n}", "github": "", "project": "", "reviewers": "G6RK;G9eq;rfPW;jjG1;ubyE", "pdf_size": 875277, "rating": "5;6;6;8;8", "confidence": "3;3;2;3;2", "soundness": "3;3;3;3;3", "contribution": "2;2;3;3;3", "presentation": "3;3;3;3;2", "wc_summary": "95;150;65;80;135", "wc_strengths": "86;60;56;92;68", "wc_weaknesses": "254;61;40;263;82", "wc_questions": "140;108;5;4;60", "wc_review": "575;379;166;439;345", "wc_reply_reviewers": "0;16;0;0;0", "wc_reply_authors": "1673;818;434;462;332", "reply_reviewers": "0;1;0;0;0", "reply_authors": "3;2;1;1;1", "rating_avg": [ 6.6, 1.2 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 105.0, 32.4037034920393 ], "wc_strengths_avg": [ 72.4, 14.22111106770494 ], "wc_weaknesses_avg": [ 140.0, 97.70363350459388 ], "wc_questions_avg": [ 63.4, 54.41911428900694 ], "wc_review_avg": [ 380.8, 133.05998647226747 ], "wc_reply_reviewers_avg": [ 3.2, 6.400000000000001 ], "wc_reply_authors_avg": [ 743.8, 492.7240201167384 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9428183867229033194&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=7vVWiCrFnd", "pdf": "https://openreview.net/pdf?id=7vVWiCrFnd", "email": "pku.edu.cn;pku.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "7vnKTsj66A", "title": "When Self-Supervised Learning Meets Unbounded Pseudo-Label Generation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Self-supervised learning (SSL) has demonstrated strong generalization abilities across diverse downstream tasks. However, it is difficult for SSL to accurately gather samples of the same category and separate samples of different categories in the training stage. In this paper, we present a novel approach of generating pseudo-labels for augmented samples to regulate their feature-space relationships. To align the pseudo-label space with the ground-truth label space, we propose an instance-level pseudo-label generation mechanism. Building upon our observations that pseudo-labels can encompass unbounded label noise and that learning remains robust to such noise in the early stages of training, we propose Precise Adjustment Regularization (PAR) for precise dynamic relationship mining. Finally, we propose a PAR-based bi-level optimization learning mechanism mechanism (PBOLM) to promote high-quality representations in SSL. Theoretically, from a data generation perspective, we demonstrate that the proposed PBOLM is more conducive to extracting critical generative factors in data generation. Empirically, based on various downstream tasks, we demonstrate that PBOLM can be considered a plug-and-play module to enhance the performance of SSL methods.", "keywords": "Self-supervised learning;contrastive learning;representation learning;bi-level optimization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Wenwen Qiang;Lingyu Si;Jie Hu;Jiangmeng Li;Changwen Zheng;Fuchun Sun;Hui Xiong", "authorids": "~Wenwen_Qiang1;~Lingyu_Si1;~Jie_Hu4;~Jiangmeng_Li1;~Changwen_Zheng1;~Fuchun_Sun1;~Hui_Xiong1", "gender": "M;M;M;M;M;M;M", "homepage": ";;https://jiangmengli.github.io/;http://people.ucas.ac.cn/~cwzheng;https://www.cs.tsinghua.edu.cn/info/1121/3555.htm;https://www.hkust-gz.edu.cn/people/hui-xiong/;", "dblp": "261/6913;298/0368;293/0997;81/2728;;262/1686-1.html;90/5064-19", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.sg/citations?user=-kU4VLcAAAAJ;-lErK1QAAAAJ;;cVDF1tkAAAAJ;DAJdHnkAAAAJ", "orcid": "0000-0002-7985-5743;0000-0002-7735-6676;0000-0002-3376-1522;0000-0002-2311-6757;;0000-0001-6016-6465;0000-0002-5150-1003", "linkedin": ";;jiangmeng-li-86aaa7125/;;;;%E6%9D%B0-%E8%83%A1-97093710a/", "or_profile": "~Wenwen_Qiang1;~Lingyu_Si1;~Jiangmeng_Li1;~Changwen_Zheng1;~Fuchun_Sun1;~Hui_Xiong1;~Jie_Hu3", "aff": "Institute of Software Chinese Academy of Sciences;Institute of Software Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;Tsinghua University;Hong Kong University of Science and Technology (Guangzhou);Meituan", "aff_domain": "iscas.ac.cn;iscas.ac.cn;iscas.ac.cn;iscas.ac.cn;cs.tsinghua.edu.cn;hkust.edu;meituan.com", "position": "Assistant Professor;Associate Professor;Assistant Professor;Full Professor;Full Professor;Full Professor;Principal Researcher", "bibtex": "@misc{\nqiang2024when,\ntitle={When Self-Supervised Learning Meets Unbounded Pseudo-Label Generation},\nauthor={Wenwen Qiang and Lingyu Si and Jie Hu and Jiangmeng Li and Changwen Zheng and Fuchun Sun and Hui Xiong},\nyear={2024},\nurl={https://openreview.net/forum?id=7vnKTsj66A}\n}", "github": "", "project": "", "reviewers": "fBvz;bjmg;ZMGw", "site": "https://openreview.net/forum?id=7vnKTsj66A", "pdf_size": 400215, "rating": "3;5;5", "confidence": "4;2;4", "soundness": "3;3;2", "contribution": "2;2;2", "presentation": "2;2;2", "wc_summary": "64;92;128", "wc_strengths": "49;33;88", "wc_weaknesses": "220;193;148", "wc_questions": "5;12;19", "wc_review": "338;330;383", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 94.66666666666667, 26.195843605851334 ], "wc_strengths_avg": [ 56.666666666666664, 23.098821518760552 ], "wc_weaknesses_avg": [ 187.0, 29.698484809834994 ], "wc_questions_avg": [ 12.0, 5.715476066494082 ], "wc_review_avg": [ 350.3333333333333, 23.32857094256359 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YMKnU--v9AMJ:scholar.google.com/&scioq=When+Self-Supervised+Learning+Meets+Unbounded+Pseudo-Label+Generation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;2;3", "aff_unique_norm": "Chinese Academy of Sciences;Tsinghua University;Hong Kong University of Science and Technology;Meituan", "aff_unique_dep": "Institute of Software;;;", "aff_unique_url": "http://www.is.cas.cn;https://www.tsinghua.edu.cn;https://www.ust.hk;https://www.meituan.com", "aff_unique_abbr": "CAS;THU;HKUST;Meituan", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7vzyqs8UbA", "title": "LMCC-MBC: Metric-Constrained Model-Based Clustering with Wasserstein-2 Distance of Gaussian Markov Random Fields", "track": "main", "status": "Reject", "tldr": "", "abstract": "A wide range of temporal (1D) and spatial (2D) data analysis problems can be formulated as model-based clustering problems given metric constraints. For example, subsequence clustering of multivariate time series is constrained by 1D temporal continuity, while urban functional area identification is constrained by the spatial proximity in the 2D space. Existing works model such metric constraints independent of the model estimation process, failing to leverage the correlation between adjacent estimated models and their locations in the metric space. To solve this problem we propose a novel metric-constrained model-based clustering algorithm LMCC-MBC that softly requires the Wasserstein-2 distance between estimated model parameters (such as those of Gaussian Markov Random Fields) to be a locally monotonic continuous function of the metric distance. We theoretically prove that satisfaction of this requirement guarantees intra-cluster cohesion and inter-cluster separation. Moreover, without explicitly optimizing log-likelihood LMCC-MBC voids the expensive EM-step that is needed by previous approaches (e.g., TICC and STICC), and enables faster and more stable clustering. Experiments on both 1D and 2D synthetic as well as real-world datasets demonstrate that our algorithm successfully captures the latent correlation between the estimated models and the metric constraints, and outperforms strong baselines by a margin up to 14.3% in ARI (Adjusted Rand Index) and 32.1% in NMI (Normalized Mutual Information).", "keywords": "unsupervised learning;clustering;model-based clustering;metric-constrained clustering", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/0940559c62d56eef5dab3517bf8141b275d6cf62.zip", "author": "Zhangyu Wang;Gengchen Mai;Krzysztof Janowicz;Ni Lao", "authorids": "~Zhangyu_Wang1;~Gengchen_Mai1;~Krzysztof_Janowicz2;~Ni_Lao1", "gender": ";M;;M", "homepage": ";https://gengchenmai.github.io/;;http://www.cs.cmu.edu/~nlao", "dblp": ";151/5583;95/5567;82/283", "google_scholar": "8vNk5Z8AAAAJ;X2Wfl1UAAAAJ;;iUgWR3MAAAAJ", "orcid": ";0000-0002-7818-7309;;0000-0002-4034-7784", "linkedin": "zhangyu-wang-26aab0170/;gengchen-mai-144439121/;;ni-lao", "or_profile": "~Zhangyu_Wang1;~Gengchen_Mai1;~Krzysztof_Janowicz2;~Ni_Lao1", "aff": "University of California, Santa Barbara;University of Georgia;UC Santa Barbara;Google", "aff_domain": "ucsb.edu;uga.edu;ucsb.edu;google.com", "position": "PhD student;Assistant Professor;Full Professor;Researcher", "bibtex": "@misc{\nwang2024lmccmbc,\ntitle={{LMCC}-{MBC}: Metric-Constrained Model-Based Clustering with Wasserstein-2 Distance of Gaussian Markov Random Fields},\nauthor={Zhangyu Wang and Gengchen Mai and Krzysztof Janowicz and Ni Lao},\nyear={2024},\nurl={https://openreview.net/forum?id=7vzyqs8UbA}\n}", "github": "", "project": "", "reviewers": "FzKt;cTE9;FyBz", "site": "https://openreview.net/forum?id=7vzyqs8UbA", "pdf_size": 1404434, "rating": "5;6;6", "confidence": "2;5;3", "soundness": "2;3;2", "contribution": "1;3;2", "presentation": "3;2;3", "wc_summary": "71;133;166", "wc_strengths": "45;44;119", "wc_weaknesses": "221;189;186", "wc_questions": "1;2;163", "wc_review": "338;368;634", "wc_reply_reviewers": "0;59;0", "wc_reply_authors": "460;614;415", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 123.33333333333333, 39.38132665222045 ], "wc_strengths_avg": [ 69.33333333333333, 35.122009560324926 ], "wc_weaknesses_avg": [ 198.66666666666666, 15.839472494022298 ], "wc_questions_avg": [ 55.333333333333336, 76.13292469242346 ], "wc_review_avg": [ 446.6666666666667, 133.02965417112415 ], "wc_reply_reviewers_avg": [ 19.666666666666668, 27.812866726670865 ], "wc_reply_authors_avg": [ 496.3333333333333, 85.20693764138119 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7559289460184542, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JgrvziXnTrwJ:scholar.google.com/&scioq=LMCC-MBC:+Metric-Constrained+Model-Based+Clustering+with+Wasserstein-2+Distance+of+Gaussian+Markov+Random+Fields&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of California, Santa Barbara;University of Georgia;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.ucsb.edu;https://www.uga.edu;https://www.google.com", "aff_unique_abbr": "UCSB;UGA;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Santa Barbara;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Cauchy-Schwarz Divergence Information Bottleneck for Regression", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19344", "id": "7wY67ZDQTE", "author_site": "Shujian Yu, Xi Yu, Sigurd L\u00f8kse, Robert Jenssen, Jose Principe", "tldr": "", "abstract": "The information bottleneck (IB) approach is popular to improve the generalization, robustness and explainability of deep neural networks. Essentially, it aims to find a minimum sufficient representation $\\mathbf{t}$ by striking a trade-off between a compression term $I(\\mathbf{x};\\mathbf{t})$ and a prediction term $I(y;\\mathbf{t})$, where $I(\\cdot;\\cdot)$ refers to the mutual information (MI). MI is for the IB for the most part expressed in terms of the Kullback-Leibler (KL) divergence, which in the regression case corresponds to prediction based on mean squared error (MSE) loss with Gaussian assumption and compression approximated by variational inference. \nIn this paper, we study the IB principle for the regression problem and develop a new way to parameterize the IB with deep neural networks by exploiting favorable properties of the Cauchy-Schwarz (CS) divergence. By doing so, we move away from MSE-based regression and ease estimation by avoiding variational approximations or distributional assumptions. We investigate the improved generalization ability of our proposed CS-IB and demonstrate strong adversarial robustness guarantees. We demonstrate its superior performance on six real-world regression tasks over other popular deep IB approaches. We additionally observe that the solutions discovered by CS-IB always achieve the best trade-off between prediction accuracy and compression ratio in the information plane. The code is available at \\url{https://github.com/SJYuCNEL/Cauchy-Schwarz-Information-Bottleneck}.", "keywords": "Information Bottleneck;Cauchy-Schwarz Divergence;Regression", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/f2602b561c7a9045ae501b67e9061b0c34bdb59b.zip", "author": "Shujian Yu;Xi Yu;Sigurd L\u00f8kse;Robert Jenssen;Jose C Principe", "authorids": "~Shujian_Yu1;~Xi_Yu1;~Sigurd_L\u00f8kse1;~Robert_Jenssen1;~Jose_C_Principe1", "gender": "M;M;M;M;M", "homepage": "https://sjyucnel.github.io/;https://www.bnl.gov/staff/xyu1;;https://uit.no/ansatte/robert.jenssen;http://www.cnel.ufl.edu", "dblp": "154/5763.html;;163/4540;45/5813;", "google_scholar": "O8kpnMoAAAAJ;;7nKP_jYAAAAJ;HiviXjIAAAAJ;", "orcid": ";0000-0002-2029-1680;0000-0002-1953-4315;0000-0002-7496-8474;", "linkedin": ";;;robert-jenssen-10b79318/?originalSubdomain=no;", "or_profile": "~Shujian_Yu1;~Xi_Yu1;~Sigurd_L\u00f8kse1;~Robert_Jenssen1;~Jose_C_Principe1", "aff": "University of Troms\u00f8;Brookhaven National Laboratory;NORCE Norwegian Research Centre;UiT The Arctic University of Norway;", "aff_domain": "uit.no;bnl.gov;norceresearch.no;uit.no;", "position": "Guest Associate Professor;Postdoc;Researcher;Full Professor;", "bibtex": "@inproceedings{\nyu2024cauchyschwarz,\ntitle={Cauchy-Schwarz Divergence Information Bottleneck for Regression},\nauthor={Shujian Yu and Xi Yu and Sigurd L{\\o}kse and Robert Jenssen and Jose C Principe},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7wY67ZDQTE}\n}", "github": "", "project": "", "reviewers": "MhfL;aubm;uyPi;zqzB;SV68", "pdf_size": 2257629, "rating": "5;6;6;6;6", "confidence": "4;4;2;2;4", "soundness": "3;2;3;3;4", "contribution": "2;3;3;2;3", "presentation": "2;3;3;3;3", "wc_summary": "148;62;77;47;70", "wc_strengths": "151;79;40;39;39", "wc_weaknesses": "1217;497;106;78;91", "wc_questions": "435;2;3;50;22", "wc_review": "1951;640;226;214;222", "wc_reply_reviewers": "121;16;26;0;0", "wc_reply_authors": "1847;1205;204;478;386", "reply_reviewers": "1;1;1;0;0", "reply_authors": "3;3;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 80.8, 35.05082024717824 ], "wc_strengths_avg": [ 69.6, 43.50448252766604 ], "wc_weaknesses_avg": [ 397.8, 438.7424757189575 ], "wc_questions_avg": [ 102.4, 167.20837299609133 ], "wc_review_avg": [ 650.6, 670.1873170987348 ], "wc_reply_reviewers_avg": [ 32.6, 45.297240534054616 ], "wc_reply_authors_avg": [ 824.0, 614.4346995409683 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12295463531972727817&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=7wY67ZDQTE", "pdf": "https://openreview.net/pdf?id=7wY67ZDQTE", "email": "uit.no;bnl.gov;norceresearch.no;uit.no;", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Troms\u00f8;Brookhaven National Laboratory;NORCE Norwegian Research Centre;Arctic University of Norway", "aff_unique_dep": ";;;", "aff_unique_url": "https://uit.no;https://www.bnl.gov;https://www.norce.no;https://www.uit.no", "aff_unique_abbr": "UIT;BNL;NORCE;UiT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Norway;United States" }, { "id": "7yyAoyfVEC", "title": "Hypothesis- and Structure-based prompting for medical and business diagnosis", "track": "main", "status": "Reject", "tldr": "", "abstract": "In real-world scenarios like healthcare and business, tackling many-to-one problems is challenging but crucial. Take medical diagnosis: A patient's chief complaint can be caused by various diseases, yet time and resource constraints make identifying the cause via difficult.\nTo tackle these issues, our study introduces Hypothesis-based and Structure-based (HS) prompting, a method designed to enhance the problem-solving capabilities of Large Language Models (LLMs). Our approach starts by efficiently breaking down the problem space using a Mutually Exclusive and Collectively Exhaustive (MECE) framework. Armed with this structure, LLMs generate, prioritize, and validate hypotheses through targeted questioning and data collection. The ability to ask the right questions is crucial for pinpointing the root cause of a problem accurately. We provide an easy-to-follow guide for crafting examples, enabling users to develop tailored HS prompts for specific tasks. We validate our method through diverse case studies in business consulting and medical diagnosis, which are further evaluated by domain experts. Interestingly, adding one sentence ``You can request one data in each response if needed'' initiates human interaction and improves performance.", "keywords": "Large Language Models;Prompting method;Medical diagnosis;Business consulting application", "primary_area": "generative models", "supplementary_material": "/attachment/b1902f38586375c759a0a65d630345a9f41ec8ad.pdf", "author": "Juyeon Heo;Kyunghyun Lee;Hyonkeun Joh;Umang Bhatt;Adrian Weller", "authorids": "~Juyeon_Heo1;~Kyunghyun_Lee3;~Hyonkeun_Joh1;~Umang_Bhatt1;~Adrian_Weller1", "gender": "F;M;M;M;M", "homepage": "https://sites.google.com/view/juyeonheo/%ED%99%88;https://www.linkedin.com/in/kyunghyun-kh-lee-5148bb232/;;https://umangsbhatt.github.io;http://mlg.eng.cam.ac.uk/adrian/", "dblp": ";;;207/7955;73/8324", "google_scholar": ";;;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ", "orcid": ";;0000-0001-9163-8859;;", "linkedin": ";;;umangsbhatt/;", "or_profile": "~Juyeon_Heo1;~Kyunghyun_Lee3;~Hyonkeun_Joh1;~Umang_Bhatt1;~Adrian_Weller1", "aff": "University of Cambridge;;Yonsei University;New York University;University of Cambridge", "aff_domain": "cam.ac.uk;;yonsei.ac.kr;nyu.edu;cam.ac.uk", "position": "PhD student;;PhD student;Assistant Professor;Principal Researcher", "bibtex": "@misc{\nheo2024hypothesis,\ntitle={Hypothesis- and Structure-based prompting for medical and business diagnosis},\nauthor={Juyeon Heo and Kyunghyun Lee and Hyonkeun Joh and Umang Bhatt and Adrian Weller},\nyear={2024},\nurl={https://openreview.net/forum?id=7yyAoyfVEC}\n}", "github": "", "project": "", "reviewers": "hnXU;ueoZ;HT4V;y4hX", "site": "https://openreview.net/forum?id=7yyAoyfVEC", "pdf_size": 463113, "rating": "1;3;3;3", "confidence": "4;4;3;2", "soundness": "1;3;2;3", "contribution": "1;2;2;2", "presentation": "2;3;3;2", "wc_summary": "312;75;89;75", "wc_strengths": "19;17;45;110", "wc_weaknesses": "369;155;157;113", "wc_questions": "42;3;92;168", "wc_review": "742;250;383;466", "wc_reply_reviewers": "260;23;71;0", "wc_reply_authors": "884;706;775;742", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 137.75, 100.76550749140303 ], "wc_strengths_avg": [ 47.75, 37.599035891894886 ], "wc_weaknesses_avg": [ 198.5, 99.99374980467529 ], "wc_questions_avg": [ 76.25, 61.653771174195015 ], "wc_review_avg": [ 460.25, 179.99218733045055 ], "wc_reply_reviewers_avg": [ 88.5, 102.27536360238472 ], "wc_reply_authors_avg": [ 776.75, 66.55589755987069 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uNpwW_eFtD0J:scholar.google.com/&scioq=Hypothesis-+and+Structure-based+prompting+for+medical+and+business+diagnosis&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Cambridge;Yonsei University;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.yonsei.ac.kr;https://www.nyu.edu", "aff_unique_abbr": "Cambridge;Yonsei;NYU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United Kingdom;South Korea;United States" }, { "title": "Free from Bellman Completeness: Trajectory Stitching via Model-based Return-conditioned Supervised Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19343", "id": "7zY781bMDO", "author_site": "Zhaoyi Zhou, Chuning Zhu, Runlong Zhou, Qiwen Cui, Abhishek Gupta, Simon Du", "tldr": "", "abstract": "Off-policy dynamic programming (DP) techniques such as $Q$-learning have proven to be important in sequential decision-making problems. In the presence of function approximation, however, these techniques often diverge due to the absence of Bellman completeness in the function classes considered, a crucial condition for the success of DP-based methods. In this paper, we show how off-policy learning techniques based on return-conditioned supervised learning (RCSL) are able to circumvent these challenges of Bellman completeness, converging under significantly more relaxed assumptions inherited from supervised learning. We prove there exists a natural environment in which if one uses two-layer multilayer perceptron as the function approximator, the layer width needs to grow *linearly* with the state space size to satisfy Bellman completeness while a constant layer width is enough for RCSL. These findings take a step towards explaining the superior empirical performance of RCSL methods compared to DP-based methods in environments with near-optimal datasets. Furthermore, in order to learn from sub-optimal datasets, we propose a simple framework called MBRCSL, granting RCSL methods the ability of dynamic programming to stitch together segments from distinct trajectories. MBRCSL leverages learned dynamics models and forward sampling to accomplish trajectory stitching while avoiding the need for Bellman completeness that plagues all dynamic programming algorithms. We propose both theoretical analysis and experimental evaluation to back these claims, outperforming state-of-the-art model-free and model-based offline RL algorithms across several simulated robotics problems.", "keywords": "Offline Reinforcement Learning;Return-Conditioned Supervised Learning;Bellman Completeness;Trajectory Stitching", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Zhaoyi Zhou;Chuning Zhu;Runlong Zhou;Qiwen Cui;Abhishek Gupta;Simon Shaolei Du", "authorids": "~Zhaoyi_Zhou1;~Chuning_Zhu1;~Runlong_Zhou1;~Qiwen_Cui1;~Abhishek_Gupta1;~Simon_Shaolei_Du1", "gender": "M;M;M;M;M;M", "homepage": "https://zhaoyizhou1123.github.io/;https://homes.cs.washington.edu/~zchuning/;https://vectorzhou.com;;https://homes.cs.washington.edu/~abhgupta/;http://simonshaoleidu.com", "dblp": "211/3392;295/9468;290/8755;276/6268;18/6404-4;176/5602", "google_scholar": "9fPuoP4AAAAJ;;https://scholar.google.com/citations?hl=en;AnSVkUYAAAAJ;1wLVDP4AAAAJ;OttawxUAAAAJ", "orcid": ";;;;;", "linkedin": ";chuning-zhu-39b086167/;;;;", "or_profile": "~Zhaoyi_Zhou1;~Chuning_Zhu1;~Runlong_Zhou1;~Qiwen_Cui1;~Abhishek_Gupta1;~Simon_Shaolei_Du1", "aff": "Tsinghua University;University of Washington;Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;University of Washington;University of Washington", "aff_domain": "tsinghua.edu.cn;cs.washington.edu;cs.washington.edu;cs.washington.edu;uw.edu;washington.edu", "position": "Undergrad student;PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024free,\ntitle={Free from Bellman Completeness: Trajectory Stitching via Model-based Return-conditioned Supervised Learning},\nauthor={Zhaoyi Zhou and Chuning Zhu and Runlong Zhou and Qiwen Cui and Abhishek Gupta and Simon Shaolei Du},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=7zY781bMDO}\n}", "github": "", "project": "", "reviewers": "HXBB;YXKR;BsaB;9pmM", "pdf_size": 1132711, "rating": "5;5;6;8", "confidence": "5;3;3;4", "soundness": "3;2;3;3", "contribution": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "160;104;92;66", "wc_strengths": "59;57;122;106", "wc_weaknesses": "33;30;114;97", "wc_questions": "291;234;182;250", "wc_review": "543;425;510;519", "wc_reply_reviewers": "0;147;21;39", "wc_reply_authors": "1813;1614;617;1161", "reply_reviewers": "0;2;1;1", "reply_authors": "4;5;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 105.5, 34.332928800205785 ], "wc_strengths_avg": [ 86.0, 28.574464124459098 ], "wc_weaknesses_avg": [ 68.5, 37.5 ], "wc_questions_avg": [ 239.25, 39.04724702203729 ], "wc_review_avg": [ 499.25, 44.53299338692606 ], "wc_reply_reviewers_avg": [ 51.75, 56.69821425759368 ], "wc_reply_authors_avg": [ 1301.25, 460.3174855466605 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17828791884504357290&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=7zY781bMDO", "pdf": "https://openreview.net/pdf?id=7zY781bMDO", "email": "tsinghua.edu.cn;cs.washington.edu;cs.washington.edu;cs.washington.edu;uw.edu;washington.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Tsinghua University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.washington.edu", "aff_unique_abbr": "THU;UW", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "id": "7zxGHwe7Vw", "title": "FedAnchor: Enhancing Federated Semi-Supervised Learning with Label Contrastive Loss", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning (FL) is a distributed learning paradigm that allows devices to collaboratively train a shared global model while keeping the data locally. Due to the nature of FL, it provides access to an astonishing amount of training data for meaningful research and applications. However, the assumption that all of these private data samples include correct and complete annotations is not realistic for real-world applications. Federated Semi-Supervised Learning (FSSL) provides a powerful approach for training models on a large amount of data without requiring all data points to be completely labeled. In this paper, we propose FedAnchor, an innovative method that tackles the label-at-server FSSL scenario where the server maintains a limited amount of labeled data, while clients' private data remain unlabeled. FedAnchor introduces a unique double-head structure, with one anchor head attached with a newly designed label contrastive loss based on the cosine similarity to train on labeled anchor data to provide better pseudo-labels for faster convergence and higher performance. Following this approach, we alleviate the confirmation bias and over-fitting easy-to-learn data problems coming from pseudo-labeling based on high-confidence model prediction samples. We conduct extensive experiments on three different datasets and demonstrate our method can outperform the state-of-the-art method by a significant margin, both in terms of convergence rate and model accuracy.", "keywords": "Federated Learning;Semi-supervised Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Xinchi Qiu;Yan Gao;Lorenzo Sani;Heng Pan;Wanru Zhao;Pedro Porto Buarque de Gusmao;Nicholas Donald Lane", "authorids": "~Xinchi_Qiu1;~Yan_Gao4;~Lorenzo_Sani1;~Heng_Pan2;~Wanru_Zhao1;~Pedro_Porto_Buarque_de_Gusmao1;~Nicholas_Donald_Lane1", "gender": "F;M;M;;;M;", "homepage": ";https://www.cst.cam.ac.uk/people/yg381;https://relogu.github.io/;;;https://portobgusmao.com/;", "dblp": "265/6559;;237/2312;;;88/10808;", "google_scholar": "yW6vsS8AAAAJ;https://scholar.google.com/citations?hl=en;IoCEzUMAAAAJ;;;https://scholar.google.it/citations?user=TfdVttMAAAAJ;", "orcid": ";;;;;0000-0002-7072-9898;", "linkedin": "xinchi-qiu-686a7394/;;;heng-pan-117915247/;;pedropgusmao;", "or_profile": "~Xinchi_Qiu1;~Yan_Gao4;~Lorenzo_Sani1;~Heng_Pan2;~Wanru_Zhao1;~Pedro_Porto_Buarque_de_Gusmao1;~Nicholas_Donald_Lane1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of Cambridge;;University of Surrey;", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;;surrey.ac.uk;", "position": "PhD student;PhD student;PhD student;Researcher;;Lecturer;", "bibtex": "@misc{\nqiu2024fedanchor,\ntitle={FedAnchor: Enhancing Federated Semi-Supervised Learning with Label Contrastive Loss},\nauthor={Xinchi Qiu and Yan Gao and Lorenzo Sani and Heng Pan and Wanru Zhao and Pedro Porto Buarque de Gusmao and Nicholas Donald Lane},\nyear={2024},\nurl={https://openreview.net/forum?id=7zxGHwe7Vw}\n}", "github": "", "project": "", "reviewers": "6dEL;LCsM;WkFS;wVdB", "site": "https://openreview.net/forum?id=7zxGHwe7Vw", "pdf_size": 8124750, "rating": "3;3;3;5", "confidence": "4;3;4;3", "soundness": "3;2;2;2", "contribution": "2;2;2;2", "presentation": "1;1;3;3", "wc_summary": "81;56;100;83", "wc_strengths": "13;7;38;155", "wc_weaknesses": "493;156;127;367", "wc_questions": "9;42;3;5", "wc_review": "596;261;268;610", "wc_reply_reviewers": "85;17;91;0", "wc_reply_authors": "394;287;475;314", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;2;2", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 80.0, 15.700318468107582 ], "wc_strengths_avg": [ 53.25, 59.88478521294036 ], "wc_weaknesses_avg": [ 285.75, 151.31981859624338 ], "wc_questions_avg": [ 14.75, 15.880412463157246 ], "wc_review_avg": [ 433.75, 169.34044850537038 ], "wc_reply_reviewers_avg": [ 48.25, 40.25776322648838 ], "wc_reply_authors_avg": [ 367.5, 73.4863932983515 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dUuKApYhJtsJ:scholar.google.com/&scioq=FedAnchor:+Enhancing+Federated+Semi-Supervised+Learning+with+Label+Contrastive+Loss&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Cambridge;University of Surrey", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.surrey.ac.uk", "aff_unique_abbr": "Cambridge;Surrey", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "80faVLl6ji", "title": "BRIDGING THE GAP BETWEEN HUMAN MOTION AND ACTION SEMANTICS VIA KINEMATIC PHRASES", "track": "main", "status": "Reject", "tldr": "", "abstract": "The goal of motion understanding is to establish a reliable mapping between motion and action semantics, while it is a challenging many-to-many problem. An abstract action semantic (i.e., walk forwards) could be conveyed by perceptually diverse motions (walk with arms up or swinging), while a motion could carry different semantics w.r.t. its context and intention. This makes an elegant mapping between them difficult. Previous attempts adopted direct-mapping paradigms with limited reliability. Also, current automatic metrics fail to provide reliable assessments of the consistency between motions and action semantics. We identify the source of these problems as the significant gap between the two modalities. To alleviate this gap, we propose Kinematic Phrases (KP) that take the objective kinematic facts of human motion with proper abstraction, interpretability, and generality characteristics. Based on KP as a mediator, we can unify a motion knowledge base and build a motion understanding system. Meanwhile, KP can be automatically converted from motions and to text descriptions with no subjective bias, inspiring Kinematic Prompt Generation (KPG) as a novel automatic motion generation benchmark. In extensive experiments, our approach shows superiority over other methods. Our code and data would be made publicly available.", "keywords": "Motion generation;Motion representation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/d431a6b69944d0b4e6b748affb4f09d388269107.zip", "author": "Xinpeng Liu;Yong-Lu Li;Ailing Zeng;Zizheng Zhou;Yang You;Cewu Lu", "authorids": "~Xinpeng_Liu1;~Yong-Lu_Li1;~Ailing_Zeng1;~Zizheng_Zhou1;~Yang_You2;~Cewu_Lu3", "gender": "M;M;F;M;M;M", "homepage": "https://foruck.github.io/;https://dirtyharrylyl.github.io/;https://ailingzeng.site/;https://darth-zzz.github.io;https://qq456cvb.github.io;https://www.mvig.org/", "dblp": "27/5719-2;198/9345;226/4720;;33/8167;", "google_scholar": "DBE-ju8AAAAJ;https://scholar.google.com.hk/citations?user=UExAaVgAAAAJ;Tn7fzS8AAAAJ;h_UN0qUAAAAJ;1YV1_KUAAAAJ;https://scholar.google.com.tw/citations?user=QZVQEWAAAAAJ", "orcid": "0000-0002-7525-3243;0000-0003-0478-0692;;;;", "linkedin": ";%E6%B0%B8%E9%9C%B2-%E6%9D%8E-991b99139/;%E7%88%B1%E7%8E%B2-%E6%9B%BE-65504112a/;;;", "or_profile": "~Xinpeng_Liu1;~Yong-Lu_Li1;~Ailing_Zeng1;~Zizheng_Zhou1;~Yang_You2;~Cewu_Lu3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;International Digital Economy Academy;Shanghai Jiaotong University;Stanford University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu;sjtu.edu.cn;idea.edu.cn;sjtu.edu.cn;stanford.edu;sjtu.edu.cn", "position": "PhD student;Assistant Professor;Researcher;Undergrad student;Postdoc;Full Professor", "bibtex": "@misc{\nliu2024bridging,\ntitle={{BRIDGING} {THE} {GAP} {BETWEEN} {HUMAN} {MOTION} {AND} {ACTION} {SEMANTICS} {VIA} {KINEMATIC} {PHRASES}},\nauthor={Xinpeng Liu and Yong-Lu Li and Ailing Zeng and Zizheng Zhou and Yang You and Cewu Lu},\nyear={2024},\nurl={https://openreview.net/forum?id=80faVLl6ji}\n}", "github": "", "project": "", "reviewers": "DPfm;HZyv;537a;87dC", "site": "https://openreview.net/forum?id=80faVLl6ji", "pdf_size": 2891901, "rating": "6;6;6;6", "confidence": "4;3;4;2", "soundness": "3;2;3;3", "contribution": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "90;40;60;91", "wc_strengths": "190;31;47;84", "wc_weaknesses": "109;250;126;261", "wc_questions": "131;248;240;437", "wc_review": "520;569;473;873", "wc_reply_reviewers": "234;0;0;12", "wc_reply_authors": "1002;697;539;1362", "reply_reviewers": "2;0;0;1", "reply_authors": "3;1;1;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.25, 21.45198126048035 ], "wc_strengths_avg": [ 88.0, 61.94755846681934 ], "wc_weaknesses_avg": [ 186.5, 69.37038272923107 ], "wc_questions_avg": [ 264.0, 110.05680351527569 ], "wc_review_avg": [ 608.75, 156.29519346416254 ], "wc_reply_reviewers_avg": [ 61.5, 99.71333912772153 ], "wc_reply_authors_avg": [ 900.0, 314.39545162104366 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7699111375450909101&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;International Digital Economy Academy;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;;https://www.stanford.edu", "aff_unique_abbr": "SJTU;;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;2;0", "aff_country_unique": "China;;United States" }, { "title": "Dynamic Neighborhood Construction for Structured Large Discrete Action Spaces", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19342", "id": "80wh3jjCZf", "author_site": "Fabian Akkerman, Julius Luy, Wouter van Heeswijk, Maximilian Schiffer", "tldr": "", "abstract": "Large discrete action spaces (LDAS) remain a central challenge in reinforcement learning. Existing solution approaches can handle unstructured LDAS with up to a few million actions. However, many real-world applications in logistics, production, and transportation systems have combinatorial action spaces, whose size grows well beyond millions of actions, even on small instances. Fortunately, such action spaces exhibit structure, e.g., equally spaced discrete resource units. With this work, we focus on handling structured LDAS (SLDAS) with sizes that cannot be handled by current benchmarks: we propose Dynamic Neighborhood Construction (DNC), a novel exploitation paradigm for SLDAS. We present a scalable neighborhood exploration heuristic that utilizes this paradigm and efficiently explores the discrete neighborhood around the continuous proxy action in structured action spaces with up to $10^{73}$ actions. We demonstrate the performance of our method by benchmarking it against three state-of-the-art approaches designed for large discrete action spaces across three distinct environments. Our results show that DNC matches or outperforms state-of-the-art approaches while being computationally more efficient. Furthermore, our method scales to action spaces that so far remained computationally intractable for existing methodologies.", "keywords": "Structured large discrete action space;Reinforcement learning;Neighborhood search", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/cd6e8b1d6539421502f8244cda3ae5e0b205e9b4.zip", "author": "Fabian Akkerman;Julius Luy;Wouter van Heeswijk;Maximilian Schiffer", "authorids": "~Fabian_Akkerman1;~Julius_Luy1;~Wouter_van_Heeswijk1;~Maximilian_Schiffer1", "gender": ";M;;M", "homepage": ";https://www.ot.mgt.tum.de/osm/team/alumni/julius-luy/;;https://www.professors.wi.tum.de/osm/team/maximilian-schiffer/", "dblp": ";;;198/6733", "google_scholar": ";BvEGvqoAAAAJ;;umGuS18AAAAJ", "orcid": ";;;0000-0003-2682-4975", "linkedin": ";julius-luy-b66a47a9/?originalSubdomain=de;;", "or_profile": "~Fabian_Akkerman1;~Julius_Luy1;~Wouter_van_Heeswijk1;~Maximilian_Schiffer1", "aff": ";Technische Universit\u00e4t M\u00fcnchen;;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": ";tum.de;;tum.de", "position": ";PhD student;;Associate Professor", "bibtex": "@inproceedings{\nakkerman2024dynamic,\ntitle={Dynamic Neighborhood Construction for Structured Large Discrete Action Spaces},\nauthor={Fabian Akkerman and Julius Luy and Wouter van Heeswijk and Maximilian Schiffer},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=80wh3jjCZf}\n}", "github": "", "project": "", "reviewers": "x32f;kH6M;ZSBT", "pdf_size": 1651736, "rating": "6;8;8", "confidence": "2;2;4", "soundness": "3;3;3", "contribution": "3;3;3", "presentation": "3;4;3", "wc_summary": "98;52;142", "wc_strengths": "18;120;35", "wc_weaknesses": "224;76;211", "wc_questions": "2;25;148", "wc_review": "342;273;536", "wc_reply_reviewers": "72;29;29", "wc_reply_authors": "437;630;840", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 97.33333333333333, 36.745370078721784 ], "wc_strengths_avg": [ 57.666666666666664, 44.61937795273359 ], "wc_weaknesses_avg": [ 170.33333333333334, 66.91453919407617 ], "wc_questions_avg": [ 58.333333333333336, 64.09541498595841 ], "wc_review_avg": [ 383.6666666666667, 111.33832324147073 ], "wc_reply_reviewers_avg": [ 43.333333333333336, 20.270394394014364 ], "wc_reply_authors_avg": [ 635.6666666666666, 164.57284776724933 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8412935024885307152&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=80wh3jjCZf", "pdf": "https://openreview.net/pdf?id=80wh3jjCZf", "email": ";tum.de;;tum.de", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen", "aff_unique_dep": "", "aff_unique_url": "https://www.tum.de", "aff_unique_abbr": "TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "816T4ab9Z5", "title": "Perfect Alignment May be Poisonous to Graph Contrastive Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Contrastive Learning (GCL) aims to learn node representations by aligning positive pairs and separating negative ones. However, limited research has been conducted on the inner law behind specific augmentations used in graph-based learning. What kind of augmentation will help downstream performance, how does contrastive learning actually influence downstream tasks, and why the magnitude of augmentation matters? This paper seeks to address these questions by establishing a connection between augmentation and downstream performance, as well as by investigating the generalization of contrastive learning. Our findings reveal that GCL contributes to downstream tasks mainly by separating different classes rather than gathering nodes of the same class. So perfect alignment and augmentation overlap which draw all intra-class samples the same can not explain the success of contrastive learning. Then in order to comprehend how augmentation aids the contrastive learning process, we conduct further investigations into its generalization, finding that perfect alignment that draw positive pair the same could help contrastive loss but is poisonous to generalization, on the contrary, imperfect alignment enhances the model's generalization ability. We analyse the result by information theory and graph spectrum theory respectively, and propose two simple but effective methods to verify the theories. The two methods could be easily applied to various GCL algorithms and extensive experiments are conducted to prove its effectiveness.", "keywords": "Graph Contrastive Learning;Alignment;Generalization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/986cca6c5db8e20d8b0822ae1d37dbf799c743e2.zip", "author": "Jingyu Liu;Yong Liu", "authorids": "~Jingyu_Liu4;~Yong_Liu7", "gender": "M;M", "homepage": "https://github.com/somebodyhh1;https://iie-liuyong.github.io", "dblp": ";29/4867-18", "google_scholar": ";vVhmzbAAAAAJ", "orcid": ";0000-0002-6739-621X", "linkedin": ";", "or_profile": "~Jingyu_Liu4;~Yong_Liu7", "aff": "Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nliu2024perfect,\ntitle={Perfect Alignment May be Poisonous to Graph Contrastive Learning},\nauthor={Jingyu Liu and Yong Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=816T4ab9Z5}\n}", "github": "", "project": "", "reviewers": "vy1b;yxaQ;J3pe;KFkN;yszY", "site": "https://openreview.net/forum?id=816T4ab9Z5", "pdf_size": 2808574, "rating": "3;6;6;6;8", "confidence": "5;3;3;4;4", "soundness": "2;3;3;3;3", "contribution": "2;3;3;2;3", "presentation": "3;3;2;2;3", "wc_summary": "50;200;110;55;122", "wc_strengths": "30;62;124;25;115", "wc_weaknesses": "459;77;223;284;102", "wc_questions": "4;4;6;290;15", "wc_review": "543;343;463;654;354", "wc_reply_reviewers": "25;0;11;273;0", "wc_reply_authors": "1833;399;703;2599;491", "reply_reviewers": "1;0;1;1;0", "reply_authors": "4;1;2;4;1", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 107.4, 54.47054249775744 ], "wc_strengths_avg": [ 71.2, 41.52782199923324 ], "wc_weaknesses_avg": [ 229.0, 137.9811581340003 ], "wc_questions_avg": [ 63.8, 113.17314169006708 ], "wc_review_avg": [ 471.4, 117.31086906165174 ], "wc_reply_reviewers_avg": [ 61.8, 105.99886791848297 ], "wc_reply_authors_avg": [ 1205.0, 865.916393192784 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 1.3564659966250538 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5345224838248488, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12307682829642070275&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "82A2EfMu3e", "title": "Efficient Discrete Physics-informed Neural Networks for Solving Evolutionary Partial Differential Equations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Physics-informed neural networks (PINNs) have shown promising potential for solving partial differential equations (PDEs) using deep learning. \nHowever, PINNs face training difficulties for evolutionary PDEs, particularly for dynamical systems whose solutions exhibit multi-scale or turbulent behavior over time.\nThe reason is that PINNs may violate the temporal causality property since all the temporal features in the PINNs loss are trained simultaneously. \nThis paper proposes to use implicit time differencing schemes to enforce temporal causality, and use transfer learning to sequentially update the PINNs in space as surrogates for PDE solutions in different time frames.\nThe evolving PINNs are better able to capture the varying complexities of the evolutionary equations, while only requiring minor updates between adjacent time frames.\nOur method is theoretically proven to be convergent if the time step is small and each PINN in different time frames is well-trained.\nIn addition, we provide state-of-the-art (SOTA) numerical results for a variety of benchmarks for which existing PINNs formulations may fail or be inefficient.\nWe demonstrate that the proposed method improves the accuracy of PINNs approximation for evolutionary PDEs and improves efficiency by a factor of 4\u201340x.\nAll code and data can be found in the supplemental materials.", "keywords": "Neural networks;Partial differential equation;Physics-informed machine learning", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/611bc55af46ccbe60fc9a6c1c0c47ac0aebb8283.zip", "author": "Ye Li;Siqi Chen;Bin Shan", "authorids": "~Ye_Li6;~Siqi_Chen5;~Bin_Shan2", "gender": "M;M;M", "homepage": ";https://github.com/kjzxcsq;", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;", "orcid": "0000-0003-3986-129X;0009-0003-0174-989X;0009-0003-1177-4597", "linkedin": ";;", "or_profile": "~Ye_Li6;~Siqi_Chen5;~Bin_Shan2", "aff": "Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics", "aff_domain": "nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn", "position": "Assistant Professor;Undergrad student;MS student", "bibtex": "@misc{\nli2024efficient,\ntitle={Efficient Discrete Physics-informed Neural Networks for Solving Evolutionary Partial Differential Equations},\nauthor={Ye Li and Siqi Chen and Bin Shan},\nyear={2024},\nurl={https://openreview.net/forum?id=82A2EfMu3e}\n}", "github": "", "project": "", "reviewers": "HW8p;XCvN;rdRW;sf3Z", "site": "https://openreview.net/forum?id=82A2EfMu3e", "pdf_size": 2938444, "rating": "3;3;5;5", "confidence": "5;5;5;3", "soundness": "2;1;3;3", "contribution": "1;1;2;3", "presentation": "1;2;3;3", "wc_summary": "144;94;148;94", "wc_strengths": "44;45;99;53", "wc_weaknesses": "673;440;303;159", "wc_questions": "166;201;298;47", "wc_review": "1027;780;848;353", "wc_reply_reviewers": "245;0;0;12", "wc_reply_authors": "257;500;529;179", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 120.0, 26.038433132583073 ], "wc_strengths_avg": [ 60.25, 22.64260364887395 ], "wc_weaknesses_avg": [ 393.75, 189.3823843444791 ], "wc_questions_avg": [ 178.0, 89.76914837515169 ], "wc_review_avg": [ 752.0, 247.3994745346077 ], "wc_reply_reviewers_avg": [ 64.25, 104.4709887959332 ], "wc_reply_authors_avg": [ 366.25, 151.14128324187274 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YFYSz6HeFZQJ:scholar.google.com/&scioq=Efficient+Discrete+Physics-informed+Neural+Networks+for+Solving+Evolutionary+Partial+Differential+Equations&hl=en&as_sdt=0,21", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics", "aff_unique_dep": "", "aff_unique_url": "http://www.nuaa.edu.cn", "aff_unique_abbr": "NUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "FreeDyG: Frequency Enhanced Continuous-Time Dynamic Graph Model for Link Prediction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19341", "id": "82Mc5ilInM", "author_site": "Yuxing Tian, Yiyan Qi, Fan Guo", "tldr": "", "abstract": "Link prediction is a crucial task in dynamic graph learning. Recent advancements in continuous-time dynamic graph models, primarily by leveraging richer temporal details, have significantly improved link prediction performance. However, due to their complex modules, they still face several challenges, such as overfitting and optimization difficulties. More importantly, it is challenging for these methods to capture the 'shift' phenomenon, where node interaction patterns change over time. To address these issues, we propose a simple yet novel method called \\textbf{Fre}quency \\textbf{E}nhanced Continuous-Time \\textbf{Dy}namic \\textbf{G}raph ({\\bf FreeDyG}) model for link prediction. Specifically, we propose a node interaction frequency encoding module that both explicitly captures the proportion of common neighbors and the frequency of the interaction of the node pair. Unlike previous works that primarily focus on the time domain, we delve into the frequency domain, allowing a deeper and more nuanced extraction of interaction patterns, revealing periodic and \"shift\" behaviors. Extensive experiments conducted on seven real-world continuous-time dynamic graph datasets validate the effectiveness of FreeDyG. The results consistently demonstrate that FreeDyG outperforms existing methods in both transductive and inductive settings. Our code is available at this repository: \\href{https://github.com/Tianxzzz/FreeDyG}{https://github.com/Tianxzzz/FreeDyG}", "keywords": "Dynamic graph; fourier transform; link prediction", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Yuxing Tian;Yiyan Qi;Fan Guo", "authorids": "~Yuxing_Tian1;~Yiyan_Qi1;~Fan_Guo3", "gender": "M;;", "homepage": ";;https://jsjxy.jxnu.edu.cn/2012/0923/c3381a106151/page.htm", "dblp": "02/6504.html;209/8128;", "google_scholar": "hZGWZnQAAAAJ;ZG9GqnMAAAAJ;", "orcid": ";0000-0002-8078-5834;", "linkedin": ";;", "or_profile": "~Yuxing_Tian1;~Yiyan_Qi1;~Fan_Guo3", "aff": "International Digital Economy Academy;IDEA;Jiangxi Normal University", "aff_domain": "idea.edu.cn;idea.edu.cn;jxnu.edu.cn", "position": "Intern;Researcher;Associate Professor", "bibtex": "@inproceedings{\ntian2024freedyg,\ntitle={FreeDyG: Frequency Enhanced Continuous-Time Dynamic Graph Model for Link Prediction},\nauthor={Yuxing Tian and Yiyan Qi and Fan Guo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=82Mc5ilInM}\n}", "github": "", "project": "", "reviewers": "VgR1;Gxoa;VGzm;Wgp4", "pdf_size": 908866, "rating": "5;6;8;8", "confidence": "3;5;3;3", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "3;4;3;3", "wc_summary": "67;56;65;125", "wc_strengths": "53;60;80;111", "wc_weaknesses": "326;170;126;151", "wc_questions": "276;63;65;44", "wc_review": "722;349;336;431", "wc_reply_reviewers": "0;27;8;0", "wc_reply_authors": "1123;1235;1033;554", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;3;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 27.307279249313726 ], "wc_strengths_avg": [ 76.0, 22.5055548698538 ], "wc_weaknesses_avg": [ 193.25, 78.21564741149945 ], "wc_questions_avg": [ 112.0, 95.03946548671242 ], "wc_review_avg": [ 459.5, 155.86933630448294 ], "wc_reply_reviewers_avg": [ 8.75, 11.031205736455105 ], "wc_reply_authors_avg": [ 986.25, 259.61642378709405 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7223783100460016618&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=82Mc5ilInM", "pdf": "https://openreview.net/pdf?id=82Mc5ilInM", "email": "idea.edu.cn;idea.edu.cn;jxnu.edu.cn", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "International Digital Economy Academy;Institute of Electrical and Electronics Engineers;Jiangxi Normal University", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.ieee.org;http://www.jxnu.edu.cn", "aff_unique_abbr": ";IEEE;JXNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";United States;China" }, { "id": "83w0LPowHz", "title": "On Reconstructability of Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, the expressive power of GNNs has been analyzed based on their ability to determine if two given graphs are isomorphic using the WL-test. However, previous analyses only establish the expressiveness of GNNs for graph-level tasks from a global perspective. In this paper, we analyze the expressive power of GNNs in terms of Graph Reconstructability, which aims to examine whether the topological information of graphs can be recovered from a local (node-level) perspective. We answer this question by analyzing how the output node embeddings extracted from GNNs may maintain important information for reconstructing the input graph structure. Moreover, we generalize GNNs in the form of Graph Reconstructable Neural Network (GRNN) and explore Nearly Orthogonal Random Features (NORF) to retain graph reconstructability. Experimental results demonstrate that GRNN outperforms representative baselines in reconstructability and efficiency.", "keywords": "Graph Neural Network;Reconstructability", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/fa435ca30046d40fb88c9d805787039b2b370491.pdf", "author": "Hsi-Wen Chen;De-Nian Yang;Hong-Han Shuai;Wang-Chien Lee;Ming-Syan Chen", "authorids": "~Hsi-Wen_Chen1;~De-Nian_Yang1;~Hong-Han_Shuai1;~Wang-Chien_Lee1;~Ming-Syan_Chen2", "gender": "Not Specified;M;M;M;M", "homepage": ";https://homepage.iis.sinica.edu.tw/pages/dnyang/index_en.html;http://basiclab.lab.nycu.edu.tw/;http://www.cse.psu.edu/~wul2/;https://arbor.ee.ntu.edu.tw/~mschen", "dblp": "39/9713.html;85/318;86/10294;14/716;c/MingSyanChen", "google_scholar": "https://scholar.google.com.tw/citations?user=ZupA27cAAAAJ;;https://scholar.google.com.tw/citations?user=MSWL2noAAAAJ;https://scholar.google.com.tw/citations?user=9OdHL5wAAAAJ;KTmCrFkAAAAJ", "orcid": ";0000-0002-3765-9293;0000-0003-2216-077X;0000-0002-8949-489X;0000-0002-0711-8197", "linkedin": "hsi-wen-chen-674395134;;;;", "or_profile": "~Hsi-Wen_Chen1;~De-Nian_Yang1;~Hong-Han_Shuai1;~Wang-Chien_Lee1;~Ming-Syan_Chen2", "aff": "National Taiwan University;Academia Sinica;National Yang Ming Chiao Tung University;Pennsylvania State University;National Taiwan University", "aff_domain": "ntu.edu.tw;iis.sinica.edu.tw;nycu.edu.tw;psu.edu;ntu.edu", "position": "PhD student;Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nchen2024on,\ntitle={On Reconstructability of Graph Neural Networks},\nauthor={Hsi-Wen Chen and De-Nian Yang and Hong-Han Shuai and Wang-Chien Lee and Ming-Syan Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=83w0LPowHz}\n}", "github": "", "project": "", "reviewers": "zmY8;9f9r;M2Jy;RshA", "site": "https://openreview.net/forum?id=83w0LPowHz", "pdf_size": 511522, "rating": "3;3;5;5", "confidence": "4;3;3;3", "soundness": "1;2;2;2", "contribution": "2;1;3;2", "presentation": "2;3;1;2", "wc_summary": "71;71;156;74", "wc_strengths": "53;78;59;48", "wc_weaknesses": "379;333;451;122", "wc_questions": "246;83;150;65", "wc_review": "749;565;816;309", "wc_reply_reviewers": "45;97;39;11", "wc_reply_authors": "1146;534;972;636", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 93.0, 36.39368077015569 ], "wc_strengths_avg": [ 59.5, 11.368817000902073 ], "wc_weaknesses_avg": [ 321.25, 122.4834172449479 ], "wc_questions_avg": [ 136.0, 70.9683027837076 ], "wc_review_avg": [ 609.75, 196.45785171379637 ], "wc_reply_reviewers_avg": [ 48.0, 31.064449134018133 ], "wc_reply_authors_avg": [ 822.0, 247.49545450371406 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QDNzCenuQhQJ:scholar.google.com/&scioq=On+Reconstructability+of+Graph+Neural+Networks&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "National Taiwan University;Academia Sinica;National Yang Ming Chiao Tung University;Pennsylvania State University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ntu.edu.tw;https://www.sinica.edu.tw;https://www.nycu.edu.tw;https://www.psu.edu", "aff_unique_abbr": "NTU;Academia Sinica;NYCU;PSU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "84Hk01tFKq", "title": "HyperFields: Towards Zero-Shot Generation of NeRFs from Text", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce HyperFields, a method for generating text-conditioned NeRFs with a single forward pass and (optionally) some finetuning. Key to our approach are: (i) a dynamic hypernetwork, which learns a smooth mapping from text token embeddings to the space of Neural Radiance Fields (NeRFs); (ii) NeRF distillation training, which distills scenes encoded in individual NeRFs into one dynamic hypernetwork. These techniques enable a single network to fit over a hundred unique scenes. We further demonstrate that HyperFields learns a more general map between text and NeRFs, and consequently is capable of predicting novel in-distribution and out-of-distribution scenes --- either zero-shot or with a few finetuning steps. Finetuning HyperFields benefits from accelerated convergence thanks to the learned general map, and is capable of synthesizing novel scenes 5 to 10 times faster than existing neural optimization-based methods. Our ablation experiments show that both the dynamic architecture and NeRF distillation are critical to the expressivity of HyperFields.", "keywords": "HyperNetworks;generative modelling", "primary_area": "generative models", "supplementary_material": "", "author": "Sudarshan Babu;Richard Liu;Avery Zhou;Michael Maire;Greg Shakhnarovich;Rana Hanocka", "authorids": "~Sudarshan_Babu1;~Richard_Liu1;~Avery_Zhou1;~Michael_Maire1;~Greg_Shakhnarovich1;~Rana_Hanocka1", "gender": "M;M;M;M;;M", "homepage": "https://people.cs.uchicago.edu/~sudarshan/;https://factoryofthesun.github.io/;https://www.linkedin.com/in/azhou/;http://people.cs.uchicago.edu/~mmaire/;https://people.cs.uchicago.edu/~ranahanocka/;http://ttic.edu/gregory/", "dblp": "164/6304;44/5359;;73/1498.html;167/2260;17/1926.html", "google_scholar": ";;;HXowq5YAAAAJ;3Bk5C9EAAAAJ;https://scholar.google.com.tw/citations?user=YLOz1kgAAAAJ", "orcid": ";;;;0000-0003-3214-3703;", "linkedin": ";;;;;", "or_profile": "~Sudarshan_Babu1;~Richard_Liu1;~Avery_Zhou1;~Michael_Maire1;~Rana_Hanocka1;~Gregory_Shakhnarovich2", "aff": "Toyota Technological Institute at Chicago;University of Chicago;;University of Chicago;University of Chicago;University of Chicago", "aff_domain": "ttic.edu;cs.uchicago.edu;;uchicago.edu;uchicago.edu;uchicago.edu", "position": "PhD student;PhD student;;Associate Professor;Assistant Professor;Professor, part time", "bibtex": "@misc{\nbabu2024hyperfields,\ntitle={HyperFields: Towards Zero-Shot Generation of Ne{RF}s from Text},\nauthor={Sudarshan Babu and Richard Liu and Avery Zhou and Michael Maire and Greg Shakhnarovich and Rana Hanocka},\nyear={2024},\nurl={https://openreview.net/forum?id=84Hk01tFKq}\n}", "github": "", "project": "", "reviewers": "VyWD;6JFZ;YDm2;ckby", "site": "https://openreview.net/forum?id=84Hk01tFKq", "pdf_size": 6003306, "rating": "5;5;5;8", "confidence": "5;4;4;4", "soundness": "3;3;3;4", "contribution": "3;2;2;4", "presentation": "2;3;3;3", "wc_summary": "92;98;64;104", "wc_strengths": "51;57;33;140", "wc_weaknesses": "162;125;164;99", "wc_questions": "107;82;21;18", "wc_review": "412;362;282;361", "wc_reply_reviewers": "100;0;0;0", "wc_reply_authors": "377;494;128;492", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.5, 15.321553446044563 ], "wc_strengths_avg": [ 70.25, 41.227266462864115 ], "wc_weaknesses_avg": [ 137.5, 27.115493725912497 ], "wc_questions_avg": [ 57.0, 38.54218468120353 ], "wc_review_avg": [ 354.25, 46.53157530107916 ], "wc_reply_reviewers_avg": [ 25.0, 43.30127018922193 ], "wc_reply_authors_avg": [ 372.75, 149.03250484374206 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11413657046320631001&as_sdt=5,40&sciodt=0,40&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Toyota Technological Institute at Chicago;University of Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.tti-chicago.org;https://www.uchicago.edu", "aff_unique_abbr": "TTI Chicago;UChicago", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "84fOBZlOiV", "title": "Estimating uncertainty from feed-forward network based sensing using quasilinear approximation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Artificial neural networks are increasingly integrated into both sensing hardware (e.g., \"smart sensors\") and dedicated decision-making circuits that operate on this information. As this technology is deployed in safety-critical environments (pedestrian-detection, power management, and flight-controls) it is critical to assess the real-time confidence of information built on these networks. However, while stand-alone confidence of sensing (e.g. object detection) neural networks are common, tools are much more limited for integrating such information into formal estimation of latent variables upstream of the sensor. To make this distinction clear, consider the common problem of target-tracking from a mobile camera. The geographic position of the target is a function of the camera position and orientation in addition to position within the image, whereas the neural network only reports confidence in pixel-space. Likewise, optimally leveraging an image-sequence requires consideration of uncertainty in the camera and target dynamics, as well as the sensing neural network. As we will demonstrate, fusing dynamical system models with large sensing networks presents a major computational challenge. Specifically, popular approaches such as first-order (Jacobian) linearization prove inaccurate, whereas nonlinear sampling-based approaches, while effective, are intractable for high-dimensional measurements such as images. In this work, we borrow an analytic approach from control engineering, quasilinear system approximation, to propagate the dynamics of environmental uncertainty through feedforward neural network architectures. The approximation enables direct Bayesian (i.e., Kalman-style) filtering to estimate latent variables, thus obviating the need for taxing sampling-based approaches. Thus, the proposed framework may enable real-time confidence estimation in high-dimensional network-based sensing deployments.", "keywords": "Uncertainty propagation;quasilinear approximation;stochastic linearization;neural networks;Kalman filter.", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/6116d624dba3dc0f924acb132a34ce744df6b47f.pdf", "author": "Songhan Zhang;Matthew Singh;ShiNung Ching", "authorids": "~Songhan_Zhang1;~Matthew_Singh1;~ShiNung_Ching1", "gender": "M;;", "homepage": "https://www.linkedin.com/in/songhan-zhang-7a58338b/;https://sites.wustl.edu/ccplab/people/matthew-singh/;http://braindynamics.engineering.wustl.edu", "dblp": ";;13/9188", "google_scholar": ";;z_vUo8EAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Songhan_Zhang1;~Matthew_Singh1;~ShiNung_Ching1", "aff": "Washington University, St. Louis;Washington University, St. Louis;Washington University, St. Louis", "aff_domain": "wustl.edu;wustl.edu;wustl.edu", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@misc{\nzhang2024estimating,\ntitle={Estimating uncertainty from feed-forward network based sensing using quasilinear approximation},\nauthor={Songhan Zhang and Matthew Singh and ShiNung Ching},\nyear={2024},\nurl={https://openreview.net/forum?id=84fOBZlOiV}\n}", "github": "", "project": "", "reviewers": "vnNG;Woe1;Lszd;gBbv", "site": "https://openreview.net/forum?id=84fOBZlOiV", "pdf_size": 1331208, "rating": "3;3;5;5", "confidence": "4;4;4;3", "soundness": "2;3;2;3", "contribution": "1;1;2;2", "presentation": "2;3;2;2", "wc_summary": "33;75;82;132", "wc_strengths": "9;17;49;173", "wc_weaknesses": "104;194;27;325", "wc_questions": "3;49;12;5", "wc_review": "149;335;170;635", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.5, 35.14612354157995 ], "wc_strengths_avg": [ 62.0, 65.81033353509159 ], "wc_weaknesses_avg": [ 162.5, 110.88394834239986 ], "wc_questions_avg": [ 17.25, 18.632968094214082 ], "wc_review_avg": [ 322.25, 194.40341432186833 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wRkjZyULNGEJ:scholar.google.com/&scioq=Estimating+uncertainty+from+feed-forward+network+based+sensing+using+quasilinear+approximation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Washington University in St. Louis", "aff_unique_dep": "", "aff_unique_url": "https://wustl.edu", "aff_unique_abbr": "WUSTL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "St. Louis", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Detecting, Explaining, and Mitigating Memorization in Diffusion Models", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19340", "id": "84n3UwkH7b", "author_site": "Yuxin Wen, Yuchen Liu, Chen Chen, Lingjuan Lyu", "tldr": "", "abstract": "Recent breakthroughs in diffusion models have exhibited exceptional image-generation capabilities. However, studies show that some outputs are merely replications of training data. Such replications present potential legal challenges for model owners, especially when the generated content contains proprietary information. In this work, we introduce a straightforward yet effective method for detecting memorized prompts by inspecting the magnitude of text-conditional predictions. Our proposed method seamlessly integrates without disrupting sampling algorithms, and delivers high accuracy even at the first generation step, with a single generation per prompt. Building on our detection strategy, we unveil an explainable approach that shows the contribution of individual words or tokens to memorization. This offers an interactive medium for users to adjust their prompts. Moreover, we propose two strategies i.e., to mitigate memorization by leveraging the magnitude of text-conditional predictions, either through minimization during inference or filtering during training. These proposed strategies effectively counteract memorization while maintaining high-generation quality. Code is available at https://github.com/YuxinWenRick/diffusion_memorization.", "keywords": "Diffusion Model;Memorization", "primary_area": "generative models", "supplementary_material": "/attachment/5a7cc8c3e384ae4896dc9e7840e66453f612bdb4.zip", "author": "Yuxin Wen;Yuchen Liu;Chen Chen;Lingjuan Lyu", "authorids": "~Yuxin_Wen2;~Yuchen_Liu8;~Chen_Chen20;~Lingjuan_Lyu1", "gender": ";;M;F", "homepage": "https://yuxinwenrick.github.io/;;https://cc233.github.io/;https://sites.google.com/view/lingjuan-lyu", "dblp": ";;65/4423-43;178/9876", "google_scholar": "oUYfjg0AAAAJ;;;", "orcid": ";0000-0002-3629-128X;0000-0001-7359-8515;", "linkedin": ";;;", "or_profile": "~Yuxin_Wen2;~Yuchen_Liu8;~Chen_Chen20;~Lingjuan_Lyu1", "aff": "University of Maryland, College Park;Zhejiang University;Sony AI;Sony", "aff_domain": "umd.edu;zju.edu.cn;sony.com;sony.com", "position": "PhD student;PhD student;Researcher;scientist", "bibtex": "@inproceedings{\nwen2024detecting,\ntitle={Detecting, Explaining, and Mitigating Memorization in Diffusion Models},\nauthor={Yuxin Wen and Yuchen Liu and Chen Chen and Lingjuan Lyu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=84n3UwkH7b}\n}", "github": "", "project": "", "reviewers": "hCP2;VQfz;Mwfj;5xmi", "pdf_size": 44556169, "rating": "8;8;8;8", "confidence": "2;3;4;4", "soundness": "2;3;4;4", "contribution": "3;3;4;4", "presentation": "3;3;4;3", "wc_summary": "204;322;81;133", "wc_strengths": "225;61;56;177", "wc_weaknesses": "271;2;59;276", "wc_questions": "82;116;33;2", "wc_review": "782;501;229;588", "wc_reply_reviewers": "21;0;0;0", "wc_reply_authors": "879;234;436;636", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 185.0, 90.34655499796325 ], "wc_strengths_avg": [ 129.75, 73.26450368357108 ], "wc_weaknesses_avg": [ 152.0, 123.1726430665511 ], "wc_questions_avg": [ 58.25, 43.87695864574025 ], "wc_review_avg": [ 525.0, 198.87810336987832 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 546.25, 238.97319410343914 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5536743981762017609&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=84n3UwkH7b", "pdf": "https://openreview.net/pdf?id=84n3UwkH7b", "email": "umd.edu;zju.edu.cn;sony.com;sony.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Maryland;Zhejiang University;Sony;Sony Corporation", "aff_unique_dep": ";;Sony AI;", "aff_unique_url": "https://www/umd.edu;https://www.zju.edu.cn;https://www.sony.com;https://www.sony.com", "aff_unique_abbr": "UMD;ZJU;Sony AI;Sony", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "United States;China;Japan" }, { "id": "85Af6AcMo5", "title": "SciRE-Solver: Accelerating Diffusion Models Sampling by Score-integrand Solver with Recursive Difference", "track": "main", "status": "Reject", "tldr": "", "abstract": "One downside of Diffusion models (DMs) is their slow iterative process. Recent algorithms for fast sampling are designed from the \ndifferential equations. However, in the fast algorithms, estimating the derivative of the score function evaluations becomes intractable due to the complexity of large-scale, well-trained neural networks. In this work, we introduce the recursive difference method to calculate the derivative of the score function networks. Building upon, we propose \\emph{SciRE-Solver} with the convergence order guarantee for accelerating DMs sampling. Our proposed sampling algorithms attain SOTA FIDs in comparison to existing training-free sampling algorithms, \nunder various number of score function evaluations (NFE). Such as, we achieve $3.48$ FID with $12$ NFE, and $2.42$ FID with $20$ NFE for continuous-time model on CIFAR-10; $1.79$ FID with $20$ NFE and $1.76$ FID with $100$ NFE for the pretrained model of EDM. Experiments demonstrate also that demonstrate that SciRE-Solver with multi-step methods can achieve high-quality samples on popular text-to-image generation tasks with only 6$\\sim$20 NFEs.", "keywords": "Diffusion Models;Sampler;Accelerating", "primary_area": "generative models", "supplementary_material": "/attachment/66d650a108b6c2b1946203983df892e7a90ac5de.zip", "author": "Shigui Li;Wei Chen;Delu Zeng", "authorids": "~Shigui_Li2;~Wei_Chen52;~Delu_Zeng4", "gender": "M;M;M", "homepage": ";;", "dblp": "236/7957;;38/5665", "google_scholar": "Fm039ikAAAAJ;https://scholar.google.com.hk/citations?user=r5fgeWQAAAAJ;08RCdoIAAAAJ", "orcid": "0000-0003-2606-3600;0009-0002-2880-3078;", "linkedin": ";;", "or_profile": "~Shigui_Li2;~Wei_Chen52;~Delu_zeng1", "aff": "South China University of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nli2024sciresolver,\ntitle={Sci{RE}-Solver: Accelerating Diffusion Models Sampling by Score-integrand Solver with Recursive Difference},\nauthor={Shigui Li and Wei Chen and Delu Zeng},\nyear={2024},\nurl={https://openreview.net/forum?id=85Af6AcMo5}\n}", "github": "", "project": "", "reviewers": "zqdo;MTm2;eocv;gjHa", "site": "https://openreview.net/forum?id=85Af6AcMo5", "pdf_size": 41322172, "rating": "5;6;6;6", "confidence": "4;4;4;2", "soundness": "3;3;3;2", "contribution": "2;3;3;3", "presentation": "1;3;3;2", "wc_summary": "29;53;59;90", "wc_strengths": "29;23;47;69", "wc_weaknesses": "200;12;132;131", "wc_questions": "2;36;34;308", "wc_review": "260;124;272;598", "wc_reply_reviewers": "47;0;0;64", "wc_reply_authors": "748;375;432;853", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 57.75, 21.741377601246892 ], "wc_strengths_avg": [ 42.0, 17.916472867168917 ], "wc_weaknesses_avg": [ 118.75, 67.68077644353676 ], "wc_questions_avg": [ 95.0, 123.71337841963576 ], "wc_review_avg": [ 313.5, 174.23762509859918 ], "wc_reply_reviewers_avg": [ 27.75, 28.39344114403888 ], "wc_reply_authors_avg": [ 602.0, 202.9445737141055 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=791203681200403792&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "South China University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.scut.edu.cn", "aff_unique_abbr": "SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "85gNpcUhmx", "title": "Context-Aware Unsupervised Domain Adaptive Lane Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper focuses on two crucial issues in domain-adaptive lane detection, i.e., how to effectively learn discriminative features and transfer knowledge across domains. Existing lane detection methods usually exploit a pixel-wise cross-entropy loss to train detection models. However, the loss ignores the difference in feature representation among lanes, which leads to inefficient feature learning. On the other hand, cross-domain context dependency crucial for transferring knowledge across domains remains unexplored in existing lane detection methods. This paper proposes a Context-aware Unsupervised Domain-Adaptive Lane Detection (CUDALD) method, consisting of two key components, i.e., cross-domain contrastive loss and domain-level feature aggregation, to realize domain-adaptive lane detection. The former can effectively differentiate feature representations among categories by taking domain-level features as positive samples. The latter fuses the domain-level and pixel-level features to strengthen cross-domain context dependency. Extensive experiments show that CUDALD significantly improves the detection model\u2019s performance and outperforms existing unsupervised domain adaptive lane detection methods on datasets, TuLane, MuLane, and MoLane, especially achieving the best accuracy of 92.24\\% when using RTFormer on TuLane.", "keywords": "Unsupervised domain adaptive;Lane detection;Cross-domain contrastive loss;Domain-level feature aggregation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Kunyang Zhou;Yunjian Feng;Jun Li", "authorids": "~Kunyang_Zhou1;~Yunjian_Feng2;~Jun_Li40", "gender": ";M;", "homepage": ";https://blog.csdn.net/qq_36104364?type=blog;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;0000-0002-5272-9130", "linkedin": ";;", "or_profile": "~Kunyang_Zhou1;~Yunjian_Feng2;~Jun_Li40", "aff": ";Southeast University;Southeast University", "aff_domain": ";seu.edu.cn;seu.edu.cn", "position": ";PhD student;Full Professor", "bibtex": "@misc{\nzhou2024contextaware,\ntitle={Context-Aware Unsupervised Domain Adaptive Lane Detection},\nauthor={Kunyang Zhou and Yunjian Feng and Jun Li},\nyear={2024},\nurl={https://openreview.net/forum?id=85gNpcUhmx}\n}", "github": "", "project": "", "reviewers": "1nVJ;YuT7;cVkk;ZmgM", "site": "https://openreview.net/forum?id=85gNpcUhmx", "pdf_size": 2809368, "rating": "3;3;5;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "contribution": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "78;47;82;86", "wc_strengths": "98;30;36;56", "wc_weaknesses": "93;215;118;278", "wc_questions": "4;3;22;32", "wc_review": "273;295;258;452", "wc_reply_reviewers": "0;0;0;210", "wc_reply_authors": "309;540;451;775", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.25, 15.417117110536587 ], "wc_strengths_avg": [ 55.0, 26.627053911388696 ], "wc_weaknesses_avg": [ 176.0, 74.46139939592862 ], "wc_questions_avg": [ 15.25, 12.275483697190918 ], "wc_review_avg": [ 319.5, 77.62248385616117 ], "wc_reply_reviewers_avg": [ 52.5, 90.93266739736606 ], "wc_reply_authors_avg": [ 518.75, 169.3374958477891 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D54Hn5mETXoJ:scholar.google.com/&scioq=Context-Aware+Unsupervised+Domain+Adaptive+Lane+Detection&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "CompA: Addressing the Gap in Compositional Reasoning in Audio-Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19339", "id": "86NGO8qeWs", "author_site": "Sreyan Ghosh, Ashish Seth, Sonal Kumar, Utkarsh Tyagi, Chandra Kiran Evuru, Ramaneswaran S, S Sakshi, Oriol Nieto, Ramani Duraiswami, Dinesh Manocha", "tldr": "", "abstract": "A fundamental characteristic of audio is its compositional nature. Audio-language models (ALMs) trained using a contrastive approach (e.g., CLAP) that learns a shared representation between audio and language modalities have improved performance in many downstream applications, including zero-shot audio classification, audio retrieval, etc. However, the ability of these models to effectively perform compositional reasoning remains largely unexplored and necessitates additional research. In this paper, we propose CompA, a collection of two expert-annotated benchmarks with a majority of real-world audio samples, to evaluate compositional reasoning in ALMs. Our proposed CompA-order evaluates how well an ALM understands the order or occurrence of acoustic events in audio, and CompA-attribute evaluates attribute-binding of acoustic events. An instance from either benchmark consists of two audio-caption pairs, where both audios have the same acoustic events but with different compositions. An ALM is evaluated on how well it matches the right audio to the right caption. Using this benchmark, we first show that current ALMs perform only marginally better than random chance, thereby struggling with compositional reasoning. Next, we propose CompA-CLAP, where we fine-tune CLAP using a novel learning method to improve its compositional reasoning abilities. To train CompA-CLAP, we first propose improvements to contrastive training with composition-aware hard negatives, allowing for more focused training. Next, we propose a novel modular contrastive loss that helps the model learn fine-grained compositional understanding and overcomes the acute scarcity of openly available compositional audios. CompA-CLAP significantly improves over all our baseline models on the CompA benchmark, indicating its superior compositional reasoning capabilities.", "keywords": "audio;audio-language;compositional reasoning", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/8a89ee1502f56c41795ace8c403fa62fedc8d16c.zip", "author": "Sreyan Ghosh;Ashish Seth;Sonal Kumar;Utkarsh Tyagi;Chandra Kiran Reddy Evuru;Ramaneswaran S;S Sakshi;Oriol Nieto;Ramani Duraiswami;Dinesh Manocha", "authorids": "~Sreyan_Ghosh1;~Ashish_Seth1;~Sonal_Kumar1;~Utkarsh_Tyagi1;~Chandra_Kiran_Reddy_Evuru1;~Ramaneswaran_S1;~S_Sakshi1;~Oriol_Nieto1;~Ramani_Duraiswami1;~Dinesh_Manocha3", "gender": "M;M;M;M;M;M;F;M;M;M", "homepage": "https://sreyan88.github.io/;https://cs20s030.github.io/;https://sonalkum.github.io;https://utkarsh4430.github.io;;;https://sakshi113.github.io/;https://www.urinieto.com/;http://www.umiacs.umd.edu/~ramani/;https://www.cs.umd.edu/people/dmanocha", "dblp": "173/5626;36/10405;;286/2046;355/1221;;;120/3843.html;d/RamaniDuraiswami;m/DineshManocha", "google_scholar": "5HKZJHAAAAAJ;aBn1e34AAAAJ;jiJ2DcEAAAAJ;https://scholar.google.co.in/citations?user=RLjKaTwAAAAJ;;YIhHxbwAAAAJ;F_-YNVAAAAAJ;7CyUUcMAAAAJ;GNEcpkAAAAAJ;X08l_4IAAAAJ", "orcid": ";0000-0003-3100-9342;;;;;;;0000-0002-5596-8460;0000-0001-7047-9801", "linkedin": ";;realsonalkumar/;utkarsh4430/;ckevuru/;;sakshi113/;urinieto;ramani-duraiswami-32a50b1/;dinesh-manocha-2311846", "or_profile": "~Sreyan_Ghosh1;~Ashish_Seth1;~Sonal_Kumar1;~Utkarsh_Tyagi1;~Chandra_Kiran_Reddy_Evuru1;~Ramaneswaran_S1;~S_Sakshi1;~Oriol_Nieto1;~Ramani_Duraiswami1;~Dinesh_Manocha3", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;NVIDIA;University of Maryland, College Park;Adobe Systems;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;nvidia.com;umd.edu;adobe.com;umd.edu;umd.edu", "position": "PhD student;PhD student;PhD student;MS student;MS student;Researcher;PhD student;Researcher;Full Professor;Professor", "bibtex": "@inproceedings{\nghosh2024compa,\ntitle={CompA: Addressing the Gap in Compositional Reasoning in Audio-Language Models},\nauthor={Sreyan Ghosh and Ashish Seth and Sonal Kumar and Utkarsh Tyagi and Chandra Kiran Reddy Evuru and Ramaneswaran S and S Sakshi and Oriol Nieto and Ramani Duraiswami and Dinesh Manocha},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=86NGO8qeWs}\n}", "github": "", "project": "", "reviewers": "Cw3v;Xast;x6Uk;jJm8", "pdf_size": 3917096, "rating": "6;6;6;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "contribution": "3;3;2;4", "presentation": "3;3;2;3", "wc_summary": "41;106;128;190", "wc_strengths": "63;1483;57;32", "wc_weaknesses": "181;2;91;73", "wc_questions": "129;2;130;50", "wc_review": "414;1593;406;345", "wc_reply_reviewers": "22;994;115;0", "wc_reply_authors": "1244;6047;1739;348", "reply_reviewers": "1;3;2;0", "reply_authors": "5;11;9;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 116.25, 53.25586822125802 ], "wc_strengths_avg": [ 408.75, 620.3274840759516 ], "wc_weaknesses_avg": [ 86.75, 63.782344735827955 ], "wc_questions_avg": [ 77.75, 54.46271660503174 ], "wc_review_avg": [ 689.5, 522.3181501728616 ], "wc_reply_reviewers_avg": [ 282.75, 412.90275792249196 ], "wc_reply_authors_avg": [ 2344.5, 2195.0084851772212 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 6.5, 3.840572873934304 ], "replies_avg": [ 39, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5900022586925230025&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=86NGO8qeWs", "pdf": "https://openreview.net/pdf?id=86NGO8qeWs", "email": "umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;nvidia.com;umd.edu;adobe.com;umd.edu;umd.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;1;0;2;0;0", "aff_unique_norm": "University of Maryland;NVIDIA;Adobe", "aff_unique_dep": ";NVIDIA Corporation;Adobe Systems Incorporated", "aff_unique_url": "https://www/umd.edu;https://www.nvidia.com;https://www.adobe.com", "aff_unique_abbr": "UMD;NVIDIA;Adobe", "aff_campus_unique_index": "0;0;0;0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "86w3LbTNI1", "title": "Preventing Reward Hacking with Occupancy Measure Regularization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reward hacking occurs when an agent performs very well with respect to a specified or learned reward function (often called a \"proxy\"), but poorly with respect to the true desired reward function. Since ensuring good alignment between the proxy and the true reward is remarkably difficult, prior work has proposed regularizing to a \"safe\" policy using the KL divergence between action distributions. The challenge with this divergence measure is that a small change in action distribution at a single state can lead to potentially calamitous outcomes. Our insight is that when this happens, the state occupancy measure of the policy shifts significantly\u2014the agent spends time in drastically different states than the safe policy does. We thus propose regularizing based on occupancy measure (OM) rather than action distribution. We show theoretically that there is a direct relationship between the returns of two policies under *any* reward function and their OM divergence, whereas no such relationship holds for their action distribution divergence. We then empirically find that OM regularization more effectively prevents reward hacking while allowing for performance improvement on top of the safe policy.", "keywords": "reward hacking;safety;occupancy measures;reinforcement learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Cassidy Laidlaw;Shivam Singhal;Anca Dragan", "authorids": "~Cassidy_Laidlaw1;~Shivam_Singhal1;~Anca_Dragan1", "gender": "M;M;F", "homepage": "https://cassidylaidlaw.com;;http://www.ancadragan.com/", "dblp": "241/5375;;", "google_scholar": "DzeJ67UAAAAJ;;", "orcid": ";;", "linkedin": ";shivam-singhal5601/;", "or_profile": "~Cassidy_Laidlaw1;~Shivam_Singhal1;~Anca_Dragan1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;MS student;Associate Professor", "bibtex": "@misc{\nlaidlaw2024preventing,\ntitle={Preventing Reward Hacking with Occupancy Measure Regularization},\nauthor={Cassidy Laidlaw and Shivam Singhal and Anca Dragan},\nyear={2024},\nurl={https://openreview.net/forum?id=86w3LbTNI1}\n}", "github": "", "project": "", "reviewers": "jdVB;y14X;rjUA;Jr8S;M4hZ", "site": "https://openreview.net/forum?id=86w3LbTNI1", "pdf_size": 495818, "rating": "3;5;5;6;6", "confidence": "4;4;2;4;4", "soundness": "2;2;3;3;3", "contribution": "2;1;2;2;2", "presentation": "2;3;3;4;3", "wc_summary": "73;138;186;214;163", "wc_strengths": "22;59;110;218;129", "wc_weaknesses": "145;140;177;172;299", "wc_questions": "188;55;37;171;85", "wc_review": "428;392;510;775;676", "wc_reply_reviewers": "0;0;0;16;196", "wc_reply_authors": "911;723;1049;1126;934", "reply_reviewers": "0;0;0;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 1.8, 0.4000000000000001 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 154.8, 47.997499934892446 ], "wc_strengths_avg": [ 107.6, 66.84489509304356 ], "wc_weaknesses_avg": [ 186.6, 58.03654021390317 ], "wc_questions_avg": [ 107.2, 61.22875141630768 ], "wc_review_avg": [ 556.2, 146.80381466433357 ], "wc_reply_reviewers_avg": [ 42.4, 77.04959441814084 ], "wc_reply_authors_avg": [ 948.6, 137.16646820560774 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7760325786457688938&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CPPO: Continual Learning for Reinforcement Learning with Human Feedback", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19338", "id": "86zAUE80pP", "author_site": "Han Zhang, Yu Lei, Lin Gui, Min Yang, Yulan He, HUI WANG, Ruifeng Xu", "tldr": "", "abstract": "The approach of Reinforcement Learning from Human Feedback (RLHF) is widely used for enhancing pre-trained Language Models (LM), enabling them to better align with human preferences. Existing RLHF-based LMs however require complete retraining whenever new queries or feedback are introduced, as human preferences may differ across different domains or topics. LM retraining is of\u0002ten impracticable in most real-world scenarios, due to the substantial time and computational costs involved, as well as data privacy concerns. To address this limitation, we propose Continual Proximal Policy Optimization (CPPO), a novel method that is able to continually align LM with dynamic human preferences. Specifically, CPPO adopts a weighting strategy to decide which samples should be utilized for enhancing policy learning and which should be used for solidifying past experiences. This seeks a good trade-off between policy learning and knowledge retention. Our experimental results show that CPPO outperforms strong Contin\u0002uous learning (CL) baselines when it comes to consistently aligning with human preferences. Furthermore, compared to PPO, CPPO offers more efficient and stable learning in non-continual scenarios.", "keywords": "lifelong learning;reinforcement learning;human feedback;proximal policy optimization", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/e53518d4f66f4c373da1c927bbf41ce523a3e4b9.zip", "author": "Han Zhang;Yu Lei;Lin Gui;Min Yang;Yulan He;Hui Wang;Ruifeng Xu", "authorids": "~Han_Zhang3;~Yu_Lei5;~Lin_Gui3;~Min_Yang6;~Yulan_He1;~Hui_Wang13;~Ruifeng_Xu1", "gender": "M;M;F;F;M;M;M", "homepage": ";;https://minyang.me/;https://www.kcl.ac.uk/people/yulan-he;https://openi.pcl.ac.cn;http://faculty.hitsz.edu.cn/xuruifeng;", "dblp": "26/4189-25;34/8605-3;02/1640-7;75/5430;39/721-73;93/5407-1;284/8639-4", "google_scholar": "lhdgPb8AAAAJ;https://scholar.google.com.ph/citations?user=1b3Eyx4AAAAJ;_wop6KgAAAAJ;https://scholar.google.co.uk/citations?user=SP9r32UAAAAJ;;mObXnNIAAAAJ;nHlrzV0AAAAJ", "orcid": "0000-0001-5660-6237;;;0000-0003-3948-5845;;0000-0002-4009-5679;0000-0001-8482-3140", "linkedin": ";;;yulan-he-277234a/?originalSubdomain=uk;;;", "or_profile": "~Han_Zhang3;~Lin_Gui3;~Min_Yang6;~Yulan_He1;~Hui_Wang13;~Ruifeng_Xu1;~Yu_Lei4", "aff": "Harbin Institute of Technology;King's College London, University of London;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;King's College London, University of London;Cloud Computing;Harbin Institute of Technology;Peng Cheng Laboratory, Shenzhen, China", "aff_domain": "hit.edu.cn;kcl.ac.uk;siat.ac.cn;kcl.ac.uk;pcl.ac.cn;hit.edu.cn;pcl.ac.cn", "position": "PhD student;Lecturer;Associate Professor;Full Professor;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhang2024cppo,\ntitle={{CPPO}: Continual Learning for Reinforcement Learning with Human Feedback},\nauthor={Han Zhang and Yu Lei and Lin Gui and Min Yang and Yulan He and Hui Wang and Ruifeng Xu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=86zAUE80pP}\n}", "github": "", "project": "", "reviewers": "Loz4;CUAu;qeM7;za1s", "pdf_size": 2705011, "rating": "5;6;6;8", "confidence": "4;4;2;2", "soundness": "2;3;2;4", "contribution": "2;3;2;3", "presentation": "2;2;3;4", "wc_summary": "159;61;158;127", "wc_strengths": "156;58;50;47", "wc_weaknesses": "612;327;163;90", "wc_questions": "212;2;23;1", "wc_review": "1139;448;394;265", "wc_reply_reviewers": "0;16;13;244", "wc_reply_authors": "1763;1081;347;590", "reply_reviewers": "0;1;1;2", "reply_authors": "5;3;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 126.25, 39.808133591013785 ], "wc_strengths_avg": [ 77.75, 45.35622889967816 ], "wc_weaknesses_avg": [ 298.0, 200.5779150355293 ], "wc_questions_avg": [ 59.5, 88.48304922413107 ], "wc_review_avg": [ 561.5, 339.9841908089257 ], "wc_reply_reviewers_avg": [ 68.25, 101.64736838698776 ], "wc_reply_authors_avg": [ 945.25, 541.1212317956116 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10293528444348719373&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=86zAUE80pP", "pdf": "https://openreview.net/pdf?id=86zAUE80pP", "email": "hit.edu.cn;kcl.ac.uk;siat.ac.cn;kcl.ac.uk;pcl.ac.cn;hit.edu.cn;pcl.ac.cn", "author_num": 7, "aff_unique_index": "0;1;2;1;3;0;4", "aff_unique_norm": "Harbin Institute of Technology;King's College London;Chinese Academy of Sciences;Cloud Computing;Pengcheng Laboratory", "aff_unique_dep": ";;Shenzhen Institutes of Advanced Technology;;Peng Cheng Laboratory", "aff_unique_url": "http://www.hit.edu.cn/;https://www.kcl.ac.uk;http://www.cas.cn;;", "aff_unique_abbr": "HIT;KCL;CAS;;", "aff_campus_unique_index": "0;2;0;2", "aff_campus_unique": "Harbin;;Shenzhen", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "China;United Kingdom;" }, { "id": "87XbxDnPqj", "title": "Gradient Descent Provably Solves Nonlinear Tomographic Reconstruction", "track": "main", "status": "Reject", "tldr": "", "abstract": "In computed tomography (CT), the forward model consists of a linear Radon transform followed by an exponential nonlinearity based on the attenuation of light according to the Beer\u2013Lambert Law. Conventional reconstruction often involves inverting this nonlinearity as a preprocessing step and then solving a convex inverse problem. However, this nonlinear measurement preprocessing required to use the Radon transform is poorly conditioned in the vicinity of high-density materials, such as metal. This preprocessing makes CT reconstruction methods numerically sensitive and susceptible to artifacts near high-density regions. In this paper, we study a technique where the signal is directly reconstructed from raw measurements through the nonlinear forward model. Though this optimization is nonconvex, we show that gradient descent provably converges to the global optimum at a geometric rate, perfectly reconstructing the underlying signal with a near minimal number of random measurements. We also prove similar results in the under-determined setting where the number of measurements is significantly smaller than the dimension of the signal. This is achieved by enforcing prior structural information about the signal through constraints on the optimization variables. We illustrate the benefits of direct nonlinear CT reconstruction with cone-beam CT experiments on synthetic and real 3D volumes. We show that this approach reduces metal artifacts compared to a commercial reconstruction of a human skull with metal dental crowns.", "keywords": "computed tomography;nonconvex optimization;compressive sensing", "primary_area": "optimization", "supplementary_material": "/attachment/8b984c2545ff3e41b798bb48357af00cb160610a.zip", "author": "Sara Fridovich-Keil;Fabrizio Valdivia;Gordon Wetzstein;Benjamin Recht;Mahdi Soltanolkotabi", "authorids": "~Sara_Fridovich-Keil1;fabriziovaldivia2001@gmail.com;~Gordon_Wetzstein3;~Benjamin_Recht1;~Mahdi_Soltanolkotabi1", "gender": "F;;M;M;M", "homepage": "https://sarafridov.github.io;;http://web.stanford.edu/~gordonwz/;http://www.eecs.berkeley.edu/~brecht/;http://www-bcf.usc.edu/~soltanol/", "dblp": "236/7023;;13/4660;r/BenRecht;75/6691", "google_scholar": "9xF7M6wAAAAJ;;VOf45S0AAAAJ;https://scholar.google.com.tw/citations?user=a_dbdxAAAAAJ;narJyMAAAAAJ", "orcid": ";;0000-0002-9243-6885;;", "linkedin": "sara-fridovich-keil-3aa744160/;;gordon-wetzstein-2406723/;;", "or_profile": "~Sara_Fridovich-Keil1;fabriziovaldivia2001@gmail.com;~Gordon_Wetzstein3;~Benjamin_Recht1;~Mahdi_Soltanolkotabi1", "aff": "Stanford University;;Stanford University;University of California, Berkeley;University of Southern California", "aff_domain": "stanford.edu;;stanford.edu;berkeley.edu;usc.edu", "position": "Postdoc;;Associate Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nfridovich-keil2024gradient,\ntitle={Gradient Descent Provably Solves Nonlinear Tomographic Reconstruction},\nauthor={Sara Fridovich-Keil and Fabrizio Valdivia and Gordon Wetzstein and Benjamin Recht and Mahdi Soltanolkotabi},\nyear={2024},\nurl={https://openreview.net/forum?id=87XbxDnPqj}\n}", "github": "", "project": "", "reviewers": "MBT4;TLot;1R2J;qFK5", "site": "https://openreview.net/forum?id=87XbxDnPqj", "pdf_size": 609659, "rating": "3;5;6;8", "confidence": "2;4;4;1", "soundness": "2;2;3;2", "contribution": "2;2;3;2", "presentation": "2;4;3;2", "wc_summary": "49;158;54;12", "wc_strengths": "26;97;37;1", "wc_weaknesses": "66;149;70;1", "wc_questions": "32;224;86;1", "wc_review": "173;628;247;15", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "535;1151;564;12", "reply_reviewers": "0;0;0;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 68.25, 54.29721447735602 ], "wc_strengths_avg": [ 40.25, 35.26595383652624 ], "wc_weaknesses_avg": [ 71.5, 52.461890930464946 ], "wc_questions_avg": [ 85.75, 85.41772357069696 ], "wc_review_avg": [ 265.75, 225.30798365792543 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 565.5, 403.1454452179759 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2668802563418119, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8804626619442851423&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Stanford University;University of California, Berkeley;University of Southern California", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.berkeley.edu;https://www.usc.edu", "aff_unique_abbr": "Stanford;UC Berkeley;USC", "aff_campus_unique_index": "0;0;1;2", "aff_campus_unique": "Stanford;Berkeley;Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "87YOFayjcG", "title": "JudgeLM : Fine-tuned Large Language Models are Scalable Judges", "track": "main", "status": "Reject", "tldr": "", "abstract": "Evaluating Large Language Models (LLMs) in open-ended scenarios is challenging due to existing benchmarks and metrics can not measure them comprehensively. To address this problem, we propose to fine-tune LLMs as scalable judges (JudgeLM) to evaluate LLMs efficiently and effectively in open-ended benchmarks. We first propose a comprehensive, large-scale, high-quality dataset containing task seeds, LLMs-generated answers, and GPT-4-generated judgments for fine-tuning high-performance judges, as well as a new benchmark for evaluating the judges. We train JudgeLM at different scales from 7B, 13B, to 33B parameters, and conduct a systematic analysis of its capabilities and behaviors. We then analyze the key biases in fine-tuning LLM as a judge and consider them as position bias, knowledge bias, and format bias. To address these issues, JudgeLM introduces a bag of techniques including swap augmentation, reference support, and reference drop, which clearly enhance the judge's performance. JudgeLM obtains the state-of-the-art judge performance on both the existing PandaLM benchmark and our proposed new benchmark. Our JudgeLM is efficient and the JudgeLM-7B only needs 3 mins to judge 5K samples with 8 A100 GPUs. JudgeLM obtains high agreement with the teacher judge, achieving an agreement exceeding 90% that even surpasses human-to-human agreement. JudgeLM also demonstrates extended capabilities in being judges of the single answer, multimodal models, multiple answers, and multi-turn chat.", "keywords": "Large Language Model;Evaluation of Open-ended Tasks;Scalable Judges;Judge Dataset", "primary_area": "generative models", "supplementary_material": "", "author": "Lianghui Zhu;Xinggang Wang;Xinlong Wang", "authorids": "~Lianghui_Zhu3;~Xinggang_Wang1;~Xinlong_Wang2", "gender": "M;M;M", "homepage": "https://xwcv.github.io/index.htm;;https://github.com/Unrealluver", "dblp": "95/3056;;", "google_scholar": "qNCTLV0AAAAJ;DPz0DjYAAAAJ;NvMHcs0AAAAJ", "orcid": "0000-0001-6732-7823;;", "linkedin": ";;", "or_profile": "~Xinggang_Wang1;~Xinlong_Wang2;~Lianghui_Zhu2", "aff": "Huazhong University of Science and Technology;Beijing Academy of Artificial Intelligence;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;baai.ac.cn;hust.edu.cn", "position": "Full Professor;Researcher;PhD student", "bibtex": "@misc{\nzhu2024judgelm,\ntitle={Judge{LM} : Fine-tuned Large Language Models are Scalable Judges},\nauthor={Lianghui Zhu and Xinggang Wang and Xinlong Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=87YOFayjcG}\n}", "github": "", "project": "", "reviewers": "CPpo;UdgT;rHPM;jzHn", "site": "https://openreview.net/forum?id=87YOFayjcG", "pdf_size": 2686654, "rating": "5;5;5;6", "confidence": "4;4;4;4", "soundness": "3;3;2;3", "contribution": "2;3;2;2", "presentation": "3;3;3;3", "wc_summary": "86;78;116;47", "wc_strengths": "80;46;74;100", "wc_weaknesses": "189;102;159;248", "wc_questions": "43;57;39;137", "wc_review": "398;283;388;532", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "679;655;597;1249", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.75, 24.55987581401828 ], "wc_strengths_avg": [ 75.0, 19.313207915827967 ], "wc_weaknesses_avg": [ 174.5, 52.69962049199216 ], "wc_questions_avg": [ 69.0, 39.824615503479755 ], "wc_review_avg": [ 400.25, 88.40354913689835 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 795.0, 263.80674744971935 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 107, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17023951720589094623&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.hust.edu.cn;https://www.baaic.cn", "aff_unique_abbr": "HUST;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "87zTciMFHM", "title": "EXPLAIN, AGREE and LEARN: A Recipe for Scalable Neural-Symbolic Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent progress in neural-symbolic AI (NeSy) has demonstrated that neural networks can benefit greatly from an integration with symbolic reasoning methods in terms of interpretability, data-efficiency and generalisation performance. Unfortunately, the symbolic component can lead to intractable computations for more complicated domains. This computational bottleneck has prevented the successful application of NeSy to more practical problems. We present EXPLAIN, AGREE and LEARN, an alternative paradigm that addresses the scalability problem of probabilistic NeSy learning. EXPLAIN leverages sampling to obtain a representative set of possible explanations for the symbolic component driven by a newly introduced diversity criterion. Then AGREE assigns importance to the sampled explanations based on the neural predictions. This defines the learning objective, which for sufficiently many samples is guaranteed to coincide with the objective used by exact probabilistic NeSy approaches. Using this objective, LEARN updates the neural component with direct supervision on its outputs, without the need to propagate the gradient through the symbolic component. Our approximate paradigm and its theoretical guarantees are experimentally evaluated and shown to be competitive with existing exact probabilistic NeSy frameworks, while outperforming them in terms of speed.", "keywords": "neuro-symbolic learning;variational inference;sampling;discrete latent variable model", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "/attachment/7836706a1e46eb52fbc34bce7411947546e1b893.pdf", "author": "Victor Verreet;Lennert De Smet;Luc De Raedt;Emanuele Sansone", "authorids": "~Victor_Verreet1;~Lennert_De_Smet1;~Luc_De_Raedt1;~Emanuele_Sansone1", "gender": "M;M;M;", "homepage": ";;https://people.cs.kuleuven.be/~luc.deraedt/;", "dblp": "303/4998;342/2788;r/LucDeRaedt;", "google_scholar": ";;https://scholar.google.com.tw/citations?user=dgobB6AAAAAJ;", "orcid": "0000-0003-2595-7207;0000-0003-3136-0634;0000-0002-6860-6303;", "linkedin": ";;;", "or_profile": "~Victor_Verreet1;~Lennert_De_Smet1;~Luc_De_Raedt1;~Emanuele_Sansone1", "aff": "KU Leuven;KU Leuven;KU Leuven, Belgium;", "aff_domain": "kuleuven.be;kuleuven.be;cs.kuleuven.be;", "position": "PhD student;PhD student;Full Professor;", "bibtex": "@misc{\nverreet2024explain,\ntitle={{EXPLAIN}, {AGREE} and {LEARN}: A Recipe for Scalable Neural-Symbolic Learning},\nauthor={Victor Verreet and Lennert De Smet and Luc De Raedt and Emanuele Sansone},\nyear={2024},\nurl={https://openreview.net/forum?id=87zTciMFHM}\n}", "github": "", "project": "", "reviewers": "GwsC;WFyQ;Pp9a;fkWq", "site": "https://openreview.net/forum?id=87zTciMFHM", "pdf_size": 1213246, "rating": "3;3;5;5", "confidence": "4;4;2;4", "soundness": "2;2;3;2", "contribution": "2;2;2;3", "presentation": "2;2;2;2", "wc_summary": "74;63;62;31", "wc_strengths": "23;39;53;16", "wc_weaknesses": "155;124;473;542", "wc_questions": "32;344;31;2", "wc_review": "284;570;619;591", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 57.5, 16.00781059358212 ], "wc_strengths_avg": [ 32.75, 14.359230480774379 ], "wc_weaknesses_avg": [ 323.5, 185.93345583837245 ], "wc_questions_avg": [ 102.25, 140.09349556635382 ], "wc_review_avg": [ 516.0, 135.06850113923676 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:B48ZpoaYh58J:scholar.google.com/&scioq=EXPLAIN,+AGREE+and+LEARN:+A+Recipe+for+Scalable+Neural-Symbolic+Learning&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Katholieke Universiteit Leuven;KU Leuven", "aff_unique_dep": ";", "aff_unique_url": "https://www.kuleuven.be;https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Belgium" }, { "id": "88FcNOwNvM", "title": "Compositional Image Decomposition with Diffusion Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Given an image of a natural scene, we are able to quickly decompose it into a set of components such as objects, lighting, shadows, and foreground. We can then picture how the image would look if we were to recombine certain components with those from other images, for instance producing a scene with a set of objects from our bedroom and animals from a zoo under the lighting conditions of a forest even if we have never seen such a scene in real life before. We present a method to decompose an image into such compositional components. Our approach, Decomp Diffusion, is an unsupervised method which, when given a single image, infers a set of different components in the image, each represented by a diffusion model. We demonstrate how components can capture different factors of the scene, ranging from global scene descriptors (shadows, foreground, facial expression) to local scene descriptors (objects). We further illustrate how inferred factors can be flexibly composed, even with factors inferred from other models, to generate a variety of scenes sharply different than those seen in training time.", "keywords": "Image Decomposition;Compositional Decomposition", "primary_area": "generative models", "supplementary_material": "/attachment/a931bdad2b84cb88335b8450372e276f9f454278.pdf", "author": "Jocelin Su;Nan Liu;Joshua B. Tenenbaum;Yilun Du", "authorids": "~Jocelin_Su1;~Nan_Liu4;~Joshua_B._Tenenbaum1;~Yilun_Du1", "gender": ";;;", "homepage": ";;;https://yilundu.github.io", "dblp": ";;t/JoshuaBTenenbaum;204/4379", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jocelin_Su1;~Nan_Liu4;~Joshua_B._Tenenbaum1;~Yilun_Du1", "aff": ";;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": ";;mit.edu;mit.edu", "position": ";;Professor;PhD student", "bibtex": "@misc{\nsu2024compositional,\ntitle={Compositional Image Decomposition with Diffusion Models},\nauthor={Jocelin Su and Nan Liu and Joshua B. Tenenbaum and Yilun Du},\nyear={2024},\nurl={https://openreview.net/forum?id=88FcNOwNvM}\n}", "github": "", "project": "", "reviewers": "Rmft;t1av;W4iB", "site": "https://openreview.net/forum?id=88FcNOwNvM", "pdf_size": 2766866, "rating": "5;6;8", "confidence": "4;4;4", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "2;3;3", "wc_summary": "46;55;98", "wc_strengths": "28;57;79", "wc_weaknesses": "44;117;36", "wc_questions": "46;73;216", "wc_review": "164;302;429", "wc_reply_reviewers": "115;0;44", "wc_reply_authors": "731;831;399", "reply_reviewers": "3;0;1", "reply_authors": "5;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 66.33333333333333, 22.69116323349001 ], "wc_strengths_avg": [ 54.666666666666664, 20.885933597094056 ], "wc_weaknesses_avg": [ 65.66666666666667, 36.444783196257625 ], "wc_questions_avg": [ 111.66666666666667, 74.59371436134697 ], "wc_review_avg": [ 298.3333333333333, 108.21686046494274 ], "wc_reply_reviewers_avg": [ 53.0, 47.37791327893902 ], "wc_reply_authors_avg": [ 653.6666666666666, 184.64620825303246 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16868049165076129421&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "88MalncLgU", "title": "GInX-Eval: Towards In-Distribution Evaluation of Graph Neural Networks Explanations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diverse explainability methods of graph neural networks (GNN) have recently been developed to highlight the edges and nodes in the graph that contribute the most to the model predictions. However, it is not clear yet how to evaluate the *correctness* of those explanations, whether it is from a human or a model perspective. One unaddressed bottleneck in the current evaluation procedure is the problem of out-of-distribution explanations, whose distribution differs from those of the training data. This important issue affects existing evaluation metrics such as the popular faithfulness or fidelity score. In this paper, we show the limitations of faithfulness metrics. We propose **GInX-Eval** (**G**raph **In**-distribution e**X**planation **Eval**uation), an evaluation procedure of graph explanations that overcomes the pitfalls of faithfulness and offers new insights on explainability methods. Using a retraining strategy, the GInX score measures how informative removed edges are for the model and the EdgeRank score evaluates if explanatory edges are correctly ordered by their importance. GInX-Eval verifies if ground-truth explanations are instructive to the GNN model. In addition, it shows that many popular methods, including gradient-based methods, produce explanations that are not better than a random designation of edges as important subgraphs, challenging the findings of current works in the area. Results with GInX-Eval are consistent across multiple datasets and align with human evaluation.", "keywords": "Evaluation;explainability;graph neural networks;out-of-distribution explanations;faithfulness", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/15109512a07d95ca40cf9dfe1c9457f196d56b2e.pdf", "author": "Kenza Amara;Mennatallah El-Assady;Zhitao Ying", "authorids": "~Kenza_Amara1;~Mennatallah_El-Assady1;~Zhitao_Ying1", "gender": "F;;M", "homepage": "https://ai.ethz.ch/people/kenza-amara.html;;https://www.cs.yale.edu/homes/ying-rex", "dblp": ";183/8957;209/4936", "google_scholar": "e4wlh1AAAAAJ;;6fqNXooAAAAJ", "orcid": "0000-0001-7139-5562;0000-0001-8526-2613;", "linkedin": "kenza-amara/;;rex-ying-92770148/", "or_profile": "~Kenza_Amara1;~Mennatallah_El-Assady1;~Zhitao_Ying1", "aff": "ETH AI Center;Department of Computer Science, ETHZ - ETH Zurich;Yale University", "aff_domain": "infk.ethz.ch;inf.ethz.ch;yale.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\namara2024ginxeval,\ntitle={{GI}nX-Eval: Towards In-Distribution Evaluation of Graph Neural Networks Explanations},\nauthor={Kenza Amara and Mennatallah El-Assady and Zhitao Ying},\nyear={2024},\nurl={https://openreview.net/forum?id=88MalncLgU}\n}", "github": "", "project": "", "reviewers": "EeHX;DCst;qa24", "site": "https://openreview.net/forum?id=88MalncLgU", "pdf_size": 766028, "rating": "5;6;6", "confidence": "4;4;3", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "2;4;3", "wc_summary": "94;69;145", "wc_strengths": "52;77;73", "wc_weaknesses": "215;241;152", "wc_questions": "2;494;23", "wc_review": "363;881;393", "wc_reply_reviewers": "36;0;49", "wc_reply_authors": "548;645;417", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 102.66666666666667, 31.626290048347787 ], "wc_strengths_avg": [ 67.33333333333333, 10.96458946893235 ], "wc_weaknesses_avg": [ 202.66666666666666, 37.36605708691007 ], "wc_questions_avg": [ 173.0, 227.1431266844762 ], "wc_review_avg": [ 545.6666666666666, 237.43256352535602 ], "wc_reply_reviewers_avg": [ 28.333333333333332, 20.725722075613085 ], "wc_reply_authors_avg": [ 536.6666666666666, 93.42495502927588 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5722598543388739124&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1", "aff_unique_norm": "ETH Zurich;Yale University", "aff_unique_dep": "AI Center;", "aff_unique_url": "https://www.ethz.ch;https://www.yale.edu", "aff_unique_abbr": "ETH;Yale", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Local Graph Clustering with Noisy Labels", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19337", "id": "89A5c6enfc", "author_site": "Artur Back de Luca, Kimon Fountoulakis, Shenghao Yang", "tldr": "", "abstract": "The growing interest in machine learning problems over graphs with additional node information such as texts, images, or labels has popularized methods that require the costly operation of processing the entire graph. Yet, little effort has been made to the development of fast local methods (i.e. without accessing the entire graph) that extract useful information from such data. To that end, we propose a study of local graph clustering using noisy node labels as a proxy for additional node information. In this setting, nodes receive initial binary labels based on cluster affiliation: 1 if they belong to the target cluster and 0 otherwise. Subsequently, a fraction of these labels is flipped. We investigate the benefits of incorporating noisy labels for local graph clustering. By constructing a weighted graph with such labels, we study the performance of graph diffusion-based local clustering method on both the original and the weighted graphs. From a theoretical perspective, we consider recovering an unknown target cluster with a single seed node in a random graph with independent noisy node labels. We provide sufficient conditions on the label noise under which, with high probability, using diffusion in the weighted graph yields a more accurate recovery of the target cluster. This approach proves more effective than using the given labels alone or using diffusion in the label-free original graph. Empirically, we show that reliable node labels can be obtained with just a few samples from an attributed graph. Moreover, utilizing these labels via diffusion in the weighted graph leads to significantly better local clustering performance across several real-world datasets, improving F1 scores by up to 13\\%.", "keywords": "local graph clustering;graph diffusion;attributed graphs;noisy labels", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/074bb46f3424c6bb86d5032d9a3cbba9d3838e91.zip", "author": "Artur Back de Luca;Kimon Fountoulakis;Shenghao Yang", "authorids": "~Artur_Back_de_Luca1;~Kimon_Fountoulakis1;~Shenghao_Yang1", "gender": "M;M;M", "homepage": "https://artur-deluca.github.io/;https://opallab.ca;https://cs.uwaterloo.ca/~s286yang/", "dblp": ";149/5799;41/4482-2", "google_scholar": "tL9d0UoAAAAJ;https://scholar.google.ca/citations?user=K-SafJUAAAAJ;ocLDM-AAAAAJ", "orcid": ";;", "linkedin": "https://linkedin.com/in/arturbackdeluca;;", "or_profile": "~Artur_Back_de_Luca1;~Kimon_Fountoulakis1;~Shenghao_Yang1", "aff": "University of Waterloo;University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "position": "PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nluca2024local,\ntitle={Local Graph Clustering with Noisy Labels},\nauthor={Artur Back de Luca and Kimon Fountoulakis and Shenghao Yang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=89A5c6enfc}\n}", "github": "", "project": "", "reviewers": "LuBd;PWgh;oYEs;CYJN", "pdf_size": 764106, "rating": "3;6;6;8", "confidence": "4;2;3;3", "soundness": "2;3;3;3", "contribution": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "38;95;59;91", "wc_strengths": "31;46;32;73", "wc_weaknesses": "201;98;46;67", "wc_questions": "2;269;125;209", "wc_review": "272;508;262;440", "wc_reply_reviewers": "0;11;11;24", "wc_reply_authors": "1588;1082;816;1404", "reply_reviewers": "0;1;1;1", "reply_authors": "4;4;2;3", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.75, 23.498670175139697 ], "wc_strengths_avg": [ 45.5, 16.948451256678293 ], "wc_weaknesses_avg": [ 103.0, 59.5273046592906 ], "wc_questions_avg": [ 151.25, 100.20572588430264 ], "wc_review_avg": [ 370.5, 106.31439225241331 ], "wc_reply_reviewers_avg": [ 11.5, 8.5 ], "wc_reply_authors_avg": [ 1222.5, 296.4435022057323 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5940885257860046, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11045825304576235979&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=89A5c6enfc", "pdf": "https://openreview.net/pdf?id=89A5c6enfc", "email": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "89AOrk05uy", "title": "Understanding and addressing spurious correlation via Neural Tangent Kernels: A spectral bias perspective", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The existence of spurious correlations can prompt neural networks to depend heavily on features that exhibit strong correlations with the target labels exclusively in the training set, while such correlations may not persist in real-world scenarios. As a consequence, this results in suboptimal performance within certain subgrouping of the data. In this work, we leverage the theoretical insights of the Neural Tangent Kernel (NTK) to investigate the group robustness problem in the presence of spurious correlations. Specifically, we identify that poor generalization is not solely a consequence of statistical biases inherent in the dataset; rather, it also arises from the disparity in complexity between spurious and core features. Building upon this observation, we propose a method that adjusts the spectral properties of neural networks to mitigate bias without requiring knowledge of the spurious attributes.", "keywords": "NTK;spurious correlation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Yeat Jeng Ng;Ainhize Barrainkua;Novi Quadrianto", "authorids": "~Yeat_Jeng_Ng2;~Ainhize_Barrainkua1;~Novi_Quadrianto1", "gender": "M;F;M", "homepage": "https://profiles.sussex.ac.uk/p515010-yeat-jeng-ng;;http://www.sussex.ac.uk/profiles/335583", "dblp": ";333/2278;http://dblp.uni-trier.de/pers/hd/q/Quadrianto:Novi", "google_scholar": ";;I-rLzGcAAAAJ", "orcid": ";0000-0002-2300-312X;", "linkedin": ";ainhize-barrainkua-agirre-07a000151;", "or_profile": "~Yeat_Jeng_Ng2;~Ainhize_Barrainkua1;~Novi_Quadrianto1", "aff": "University of Sussex;Basque Center for Applied Mathematics;Monash Indonesia", "aff_domain": "sussex.ac.uk;bcamath.org;monash.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nng2024understanding,\ntitle={Understanding and addressing spurious correlation via Neural Tangent Kernels: A spectral bias perspective},\nauthor={Yeat Jeng Ng and Ainhize Barrainkua and Novi Quadrianto},\nyear={2024},\nurl={https://openreview.net/forum?id=89AOrk05uy}\n}", "github": "", "project": "", "reviewers": "hgH8;cfRQ;XMhp", "site": "https://openreview.net/forum?id=89AOrk05uy", "pdf_size": 36589025, "rating": "3;3;6", "confidence": "4;4;4", "soundness": "1;2;3", "contribution": "1;1;3", "presentation": "3;3;4", "wc_summary": "188;118;175", "wc_strengths": "104;55;130", "wc_weaknesses": "826;356;217", "wc_questions": "130;62;22", "wc_review": "1248;591;544", "wc_reply_reviewers": "111;202;56", "wc_reply_authors": "307;630;238", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 1.6666666666666667, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 160.33333333333334, 30.40102337458761 ], "wc_strengths_avg": [ 96.33333333333333, 31.09483701338357 ], "wc_weaknesses_avg": [ 466.3333333333333, 260.5767108208679 ], "wc_questions_avg": [ 71.33333333333333, 44.58200932613463 ], "wc_review_avg": [ 794.3333333333334, 321.3641064102973 ], "wc_reply_reviewers_avg": [ 123.0, 60.20520464766037 ], "wc_reply_authors_avg": [ 391.6666666666667, 170.86511899416126 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Te8q6S72rikJ:scholar.google.com/&scioq=Understanding+and+addressing+spurious+correlation+via+Neural+Tangent+Kernels:+A+spectral+bias+perspective&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Sussex;Basque Center for Applied Mathematics;Monash University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sussex.ac.uk;https://www.bcamath.org/;https://www.monash.edu.id", "aff_unique_abbr": "Sussex;BCAM;Monash", "aff_campus_unique_index": "1", "aff_campus_unique": ";Indonesia", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United Kingdom;Spain;Indonesia" }, { "id": "89XNDtqhpL", "title": "MatFormer: Nested Transformer for Elastic Inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "Transformer models are deployed in a wide range of settings, from multi-accelerator clusters to standalone mobile phones. The diverse inference constraints in these scenarios necessitate practitioners to train foundation models such as PaLM 2, Llama, & ViTs as a series of models of varying sizes. Due to significant training costs, only a select few model sizes are trained and supported, limiting more fine-grained control over relevant tradeoffs, including latency, cost, and accuracy. This work introduces MatFormer, a nested Transformer architecture designed to offer elasticity in a variety of deployment constraints. Each Feed Forward Network (FFN) block of a MatFormer model is jointly optimized with a few nested smaller FFN blocks. This training procedure allows for the Mix'n'Match of model granularities across layers -- i.e., a trained universal MatFormer model enables extraction of hundreds of accurate smaller models, which were never explicitly optimized. We empirically demonstrate MatFormer's effectiveness across different model classes (decoders & encoders), modalities (language & vision), and scales (up to 2.6B parameters). We find that a 2.6B decoder-only MatFormer language model (MatLM) allows us to extract smaller models spanning from 1.5B to 2.6B, each exhibiting comparable validation loss and one-shot downstream evaluations to their independently trained counterparts. Furthermore, we observe that smaller encoders extracted from a universal MatFormer-based ViT (MatViT) encoder preserve the metric-space structure for adaptive large-scale retrieval. Finally, we showcase that speculative decoding with the accurate and consistent submodels extracted from MatFormer can further reduce inference latency.", "keywords": "Transformer;Neural Architecture Design;Large-scale deployment;Efficiency", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/9c05c9481de346aa885b02c262fbf2fc546fce1e.pdf", "author": "Fnu Devvrit;Sneha Kudugunta;Aditya Kusupati;Tim Dettmers;Kaifeng Chen;Inderjit S Dhillon;Yulia Tsvetkov;Hannaneh Hajishirzi;Sham M. Kakade;Ali Farhadi;Prateek Jain", "authorids": "~Fnu_Devvrit1;~Sneha_Kudugunta1;~Aditya_Kusupati1;~Tim_Dettmers2;~Kaifeng_Chen2;~Inderjit_S_Dhillon1;~Yulia_Tsvetkov1;~Hannaneh_Hajishirzi1;~Sham_M._Kakade1;~Ali_Farhadi3;~Prateek_Jain1", "gender": "M;F;M;M;M;M;F;F;M;M;M", "homepage": ";;http://www.adityakusupati.com/;https://timdettmers.com/;;http://www.cs.utexas.edu/users/inderjit/;https://homes.cs.washington.edu/~yuliats/;https://homes.cs.washington.edu/~hannaneh/;https://shamulent.github.io;https://homes.cs.washington.edu/~ali/;http://prateekjain.org", "dblp": ";;231/7662;172/1045;186/7404;d/InderjitSDhillon;75/8157;52/1296;s/SMKakade;37/5826;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html", "google_scholar": "c86HtPoAAAAJ;LeEwxtgAAAAJ;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;lHI3w5kAAAAJ;xjEcoNQAAAAJ;xBv5ZfkAAAAJ;SEDPkrsAAAAJ;LOV6_WIAAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ;jeOFRDsAAAAJ;qYhRbJoAAAAJ", "orcid": ";;0000-0001-8455-1851;;;;0000-0002-4634-7128;;;;", "linkedin": "devvrit/;;adityakusupati/;;kaifeng-chen-b37a2b69/;inderjit-dhillon-a20888b0/;;;;;", "or_profile": "~Fnu_Devvrit1;~Sneha_Kudugunta1;~Aditya_Kusupati1;~Tim_Dettmers2;~Kaifeng_Chen2;~Inderjit_S_Dhillon1;~Yulia_Tsvetkov1;~Hannaneh_Hajishirzi1;~Sham_M._Kakade1;~Ali_Farhadi3;~Prateek_Jain1", "aff": ", University of Texas at Austin;Google DeepMind;Department of Computer Science, University of Washington;University of Washington;Google;University of Texas, Austin;Department of Computer Science, University of Washington;University of Washington;Harvard University;University of Washington;Google", "aff_domain": "cs.utexas.edu;google.com;cs.washington.edu;cs.washington.edu;google.com;utexas.edu;cs.washington.edu;uw.edu;harvard.edu;cs.uw.edu;google.com", "position": "PhD student;Researcher;PhD student;PhD student;Researcher;Full Professor;Associate Professor;Associate Professor;Full Professor;Full Professor;Researcher", "bibtex": "@misc{\ndevvrit2024matformer,\ntitle={MatFormer: Nested Transformer for Elastic Inference},\nauthor={Fnu Devvrit and Sneha Kudugunta and Aditya Kusupati and Tim Dettmers and Kaifeng Chen and Inderjit S Dhillon and Yulia Tsvetkov and Hannaneh Hajishirzi and Sham M. Kakade and Ali Farhadi and Prateek Jain},\nyear={2024},\nurl={https://openreview.net/forum?id=89XNDtqhpL}\n}", "github": "", "project": "", "reviewers": "J4dY;CZp9;UoXf;EdnT", "site": "https://openreview.net/forum?id=89XNDtqhpL", "pdf_size": 639959, "rating": "5;5;6;8", "confidence": "4;3;4;3", "soundness": "2;3;2;4", "contribution": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "48;131;104;62", "wc_strengths": "27;95;116;45", "wc_weaknesses": "172;425;381;173", "wc_questions": "53;81;11;42", "wc_review": "300;732;612;322", "wc_reply_reviewers": "0;117;0;0", "wc_reply_authors": "968;935;811;549", "reply_reviewers": "0;1;0;0", "reply_authors": "4;4;3;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.25, 33.04826016600571 ], "wc_strengths_avg": [ 70.75, 36.09968836430586 ], "wc_weaknesses_avg": [ 287.75, 116.2956899459305 ], "wc_questions_avg": [ 46.75, 25.063668925358872 ], "wc_review_avg": [ 491.5, 185.58219203361082 ], "wc_reply_reviewers_avg": [ 29.25, 50.66248612138966 ], "wc_reply_authors_avg": [ 815.75, 164.75644903918027 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15483101880052662391&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2;1;0;2;2;3;2;1", "aff_unique_norm": "University of Texas at Austin;Google;University of Washington;Harvard University", "aff_unique_dep": ";Google DeepMind;Department of Computer Science;", "aff_unique_url": "https://www.utexas.edu;https://deepmind.com;https://www.washington.edu;https://www.harvard.edu", "aff_unique_abbr": "UT Austin;DeepMind;UW;Harvard", "aff_campus_unique_index": "0;2;3;0;2;3", "aff_campus_unique": "Austin;;Seattle;Mountain View", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "89ZekEEsSJ", "title": "Stealthy Targeted Backdoor Attack Against Image Captioning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We study backdoor attacks against image caption models, whose security issues have received less scrutiny compared with other multimodal tasks. Existing backdoor attacks typically pair a trigger either with a predefined sentence or a single word as the targeted output, yet they are unrelated to the image content, making them easily noticeable as anomalies by humans. In this paper, we present a novel method to craft targeted backdoor attacks against image caption models, which are designed to be stealthier than prior attacks. Specifically, our method first learns a special trigger by leveraging universal perturbation techniques for object detection, then places the learned trigger in the center of some specific source object and modifies the corresponding object name in the output caption to a predefined target name. During the prediction phase, the caption produced by the backdoored model for input images with the trigger can accurately convey the semantic information of the rest of the whole image, while incorrectly recognizing the source object as the predefined target. Extensive experiments demonstrate that our approach can achieve a high attack success rate while having a negligible impact on model clean performance. In addition, we show our method is stealthy in that the produced backdoor samples are indistinguishable from clean samples in both image and text domains, which can successfully bypass existing backdoor defenses, highlighting the need for better defensive mechanisms against such stealthy backdoor attacks.", "keywords": "Machine Learning;Image Caption;Backdoor Attack", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/9fcd6274e7f84de436036ee93df2145fbdfd328b.pdf", "author": "Wenshu Fan;Hongwei Li;Wenbo Jiang;Meng Hao;Xiao Zhang", "authorids": "~Wenshu_Fan1;~Hongwei_Li2;~Wenbo_Jiang1;~Meng_Hao1;~Xiao_Zhang2", "gender": "M;M;M;;M", "homepage": "https://github.com/meIody97;https://faculty.uestc.edu.cn/lihongwei/zh_CN/index.htm;https://wenbo-jiang.github.io/;;https://xiao-zhang.net", "dblp": "285/2460;39/5544-1;34/10703-1.html;;", "google_scholar": ";-o6u2gwAAAAJ;https://scholar.google.com.sg/citations?user=OjHzvJkAAAAJ;;L-lz7CUAAAAJ", "orcid": ";;0000-0002-4592-8094;;0009-0008-1837-7670", "linkedin": ";;;;", "or_profile": "~Wenshu_Fan1;~Hongwei_Li2;~Wenbo_Jiang1;~Meng_Hao1;~Xiao_Zhang2", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China, Tsinghua University;University of Electronic Science and Technology of China;;CISPA Helmholtz Center for Information Security", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;;cispa.de", "position": "PhD student;Full Professor;Postdoc;;Tenure-Track Faculty", "bibtex": "@misc{\nfan2024stealthy,\ntitle={Stealthy Targeted Backdoor Attack Against Image Captioning},\nauthor={Wenshu Fan and Hongwei Li and Wenbo Jiang and Meng Hao and Xiao Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=89ZekEEsSJ}\n}", "github": "", "project": "", "reviewers": "nNC3;1VPN;MmsG;sNE8", "site": "https://openreview.net/forum?id=89ZekEEsSJ", "pdf_size": 840700, "rating": "5;5;5;5", "confidence": "4;3;5;4", "soundness": "2;3;2;2", "contribution": "2;2;2;2", "presentation": "3;3;3;2", "wc_summary": "131;82;71;66", "wc_strengths": "81;39;53;24", "wc_weaknesses": "443;57;155;389", "wc_questions": "7;86;5;5", "wc_review": "662;264;284;484", "wc_reply_reviewers": "0;0;55;15", "wc_reply_authors": "592;202;926;916", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.5, 25.773047937719745 ], "wc_strengths_avg": [ 49.25, 21.00446381129497 ], "wc_weaknesses_avg": [ 261.0, 159.968746947646 ], "wc_questions_avg": [ 25.75, 34.79493497622894 ], "wc_review_avg": [ 423.5, 162.35993964029427 ], "wc_reply_reviewers_avg": [ 17.5, 22.5 ], "wc_reply_authors_avg": [ 659.0, 296.08951349211947 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10372884778743723752&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Electronic Science and Technology of China;CISPA Helmholtz Center for Information Security", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://www.cispa.de/", "aff_unique_abbr": "UESTC;CISPA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;Germany" }, { "id": "89bUur0Q4J", "title": "Vision-Language Subspace Prompting", "track": "main", "status": "Reject", "tldr": "", "abstract": "Prompting vision-language models like CLIP to adapt to downstream tasks is currently topical. A seminal technique to this end is context optimization, which replaces a subset of textual tokens with trainable parameters (a.k.a soft prompts). However, current pipelines use a single vector embedding induced by soft prompts as the classifier weight for visual recognition. This can lead to problems where the learned soft prompts overfit to base classes\u2019 training data, resulting in poor performance when applied to novel classes. Several approaches were proposed to address this issue by regularizing the learned soft prompts to align them with handcrafted text/hard prompts. However, excessive regularization of the soft prompts can hurt the model\u2019s performance on the base classes it is trained on. Maintaining the right balance to ensure strong base- and novel-class performance is crucial but non-trivial. In this paper, we introduce a novel subspace-based prompt learning method, named SuPr, which can effectively model subspaces spanning the embeddings\nof both the learnable soft and the textual/hard prompts. Our subspace-based alignment between hand-crafted and learnable prompts balances these effects to achieve excellent fitting of base classes as well as generalization to novel classes. With the advantages of subspace modelling, our SuPr shows its effectiveness on generalization from base to new, domain generalization, cross-dataset transfer and few-shot learning, leading to new state-of-the-art results in all settings.", "keywords": "Prompt Learning; Vision Language Models", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yujun Tong;Da Li;Dongliang Chang;Tianwei Cao;Timothy Hospedales;Yi-Zhe Song;Zhanyu Ma", "authorids": "~Yujun_Tong1;~Da_Li3;~Dongliang_Chang2;~Tianwei_Cao1;~Timothy_Hospedales1;~Yi-Zhe_Song2;~Zhanyu_Ma1", "gender": "M;M;M;M;M;M;M", "homepage": ";https://dali-dl.github.io/;https://www.dongliangchang.cn/;https://github.com/caotianwei;http://homepages.inf.ed.ac.uk/thospeda/;http://personal.ee.surrey.ac.uk/Personal/Y.Song/;https://zhanyuma.cn/", "dblp": "294/5100;43/4804-1;236/3116;;32/3545;98/1684;", "google_scholar": "8jks3_kAAAAJ;RPvaE3oAAAAJ;tIf50PgAAAAJ;;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ;https://scholar.google.co.uk/citations?user=irZFP_AAAAAJ;5GAAs7IAAAAJ", "orcid": ";0000-0002-2101-2989;0000-0002-4081-3001;;0000-0003-4867-7486;;0000-0003-2950-2488", "linkedin": ";;;;timothyhospedales/;;", "or_profile": "~Yujun_Tong1;~Da_Li3;~Dongliang_Chang2;~Tianwei_Cao1;~Timothy_Hospedales1;~Yi-Zhe_Song2;~Zhanyu_Ma1", "aff": "Beijing University of Posts and Telecommunications;University of Edinburgh;Tsinghua University;University of Chinese Academy of Sciences;Samsung AI Research Centre;University of Surrey;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;ed.ac.uk;tsinghua.edu.cn;ucas.ac.cn;samsung.com;surrey.ac.uk;bupt.edu.cn", "position": "PhD student;Visiting Scholar;Postdoc;PhD student;Principal Researcher;Professor;Full Professor", "bibtex": "@misc{\ntong2024visionlanguage,\ntitle={Vision-Language Subspace Prompting},\nauthor={Yujun Tong and Da Li and Dongliang Chang and Tianwei Cao and Timothy Hospedales and Yi-Zhe Song and Zhanyu Ma},\nyear={2024},\nurl={https://openreview.net/forum?id=89bUur0Q4J}\n}", "github": "", "project": "", "reviewers": "SWVW;nXYV;kdeh;Keaq", "site": "https://openreview.net/forum?id=89bUur0Q4J", "pdf_size": 47646175, "rating": "3;5;5;6", "confidence": "4;5;3;3", "soundness": "2;3;3;3", "contribution": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "52;48;47;37", "wc_strengths": "27;33;41;22", "wc_weaknesses": "68;207;17;32", "wc_questions": "4;6;37;3", "wc_review": "151;294;142;94", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "55;934;471;469", "reply_reviewers": "0;0;0;0", "reply_authors": "1;3;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 46.0, 5.522680508593631 ], "wc_strengths_avg": [ 30.75, 7.084313657652377 ], "wc_weaknesses_avg": [ 81.0, 75.0699673637867 ], "wc_questions_avg": [ 12.5, 14.186260959111108 ], "wc_review_avg": [ 170.25, 74.66048151465405 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 482.25, 311.01557436887305 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3458572319330373, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Po3-QoSvbEkJ:scholar.google.com/&scioq=Vision-Language+Subspace+Prompting&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;5;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;University of Edinburgh;Tsinghua University;University of Chinese Academy of Sciences;Samsung;University of Surrey", "aff_unique_dep": ";;;;AI Research;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.ed.ac.uk;https://www.tsinghua.edu.cn;http://www.ucas.ac.cn;https://www.samsung.com/global/researchers/samsung-ai-research-centre/;https://www.surrey.ac.uk", "aff_unique_abbr": "BUPT;Edinburgh;THU;UCAS;SARC;Surrey", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;1;0;0;2;1;0", "aff_country_unique": "China;United Kingdom;South Korea" }, { "id": "89l6VLPrin", "title": "Graph layouts and graph contrastive learning via neighbour embeddings", "track": "main", "status": "Reject", "tldr": "", "abstract": "In node-level graph representation learning, there are two distinct paradigms. One is known as graph layouts, where nodes are embedded into 2D space for visualization purposes. Another is graph contrastive learning, where nodes are parametrically embedded into a high-dimensional vector space based on node features. In this work, we show that these two paradigms are intimately related, and that both can be successfully approached via neighbour embedding methods. First, we introduce graph t-SNE for two-dimensional graph drawing, and show that the resulting layouts outperform all existing algorithms in terms of local structure preservation, as measured by kNN classification accuracy. Second, we introduce graph contrastive neighbor embedding (graph CNE)}, which uses a fully-connected neural network to transform graph node features into an embedding space by optimizing the contrastive InfoNCE objective. We show that graph CNE, while being conceptually simpler than most existing graph contrastive learning methods, produces competitive node representations, with state-of-the-art linear classification accuracy.", "keywords": "Graph Layout;Contrastive Learning;t-SNE", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Marius Keute;Alica Leonie Guzm\u00e1n;Dmitry Kobak", "authorids": "~Marius_Keute1;~Alica_Leonie_Guzm\u00e1n1;~Dmitry_Kobak2", "gender": "M;F;", "homepage": ";;https://dkobak.github.io/", "dblp": ";;236/5191", "google_scholar": "https://scholar.google.de/citations?user=cn2rZJ8AAAAJ;https://scholar.google.com/citations?hl=en;BUQbD5kAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Marius_Keute1;~Alica_Leonie_Guzm\u00e1n1;~Dmitry_Kobak2", "aff": ";Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": ";uni-tuebingen.de;uni-tuebingen.de", "position": ";MS student;Researcher", "bibtex": "@misc{\nkeute2024graph,\ntitle={Graph layouts and graph contrastive learning via neighbour embeddings},\nauthor={Marius Keute and Alica Leonie Guzm{\\'a}n and Dmitry Kobak},\nyear={2024},\nurl={https://openreview.net/forum?id=89l6VLPrin}\n}", "github": "", "project": "", "reviewers": "WEvC;pX5U;JHfU", "site": "https://openreview.net/forum?id=89l6VLPrin", "pdf_size": 13744935, "rating": "5;5;6", "confidence": "4;3;2", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "2;2;2", "wc_summary": "51;29;64", "wc_strengths": "22;52;16", "wc_weaknesses": "346;83;51", "wc_questions": "5;155;9", "wc_review": "424;319;140", "wc_reply_reviewers": "185;0;15", "wc_reply_authors": "1418;627;395", "reply_reviewers": "1;0;1", "reply_authors": "3;2;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 48.0, 14.445299120013633 ], "wc_strengths_avg": [ 30.0, 15.748015748023622 ], "wc_weaknesses_avg": [ 160.0, 132.16908362649212 ], "wc_questions_avg": [ 56.333333333333336, 69.78697745440923 ], "wc_review_avg": [ 294.3333333333333, 117.24712742275987 ], "wc_reply_reviewers_avg": [ 66.66666666666667, 83.8980863243548 ], "wc_reply_authors_avg": [ 813.3333333333334, 437.9287105860446 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zivpkVhNyAkJ:scholar.google.com/&scioq=Graph+layouts+and+graph+contrastive+learning+via+neighbour+embeddings&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen", "aff_campus_unique_index": "0;0", "aff_campus_unique": "T\u00fcbingen", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "A Policy Gradient Method for Confounded POMDPs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19336", "id": "8BAkNCqpGW", "author_site": "Mao Hong, Zhengling Qi, Yanxun Xu", "tldr": "", "abstract": "In this paper, we propose a policy gradient method for confounded partially observable Markov decision processes (POMDPs) with continuous state and observation spaces in the offline setting. We first establish a novel identification result to non-parametrically estimate any history-dependent policy gradient under POMDPs using the offline data. The identification enables us to solve a sequence of conditional moment restrictions and adopt the min-max learning procedure with general function approximation for estimating the policy gradient. We then provide a finite-sample non-asymptotic bound for estimating the gradient uniformly over a pre-specified policy class in terms of the sample size, length of horizon, concentratability coefficient and the measure of ill-posedness in solving the conditional moment restrictions. Lastly, by deploying the proposed gradient estimation in the gradient ascent algorithm, we show the global convergence of the proposed algorithm in finding the history-dependent optimal policy under some technical conditions. To the best of our knowledge, this is the first work studying the policy gradient method for POMDPs under the offline setting.", "keywords": "Offline Reinforcement Learning;Confounded POMDP;Policy Gradient;Statistical Guarantee;Function Approximation", "primary_area": "learning theory", "supplementary_material": "", "author": "Mao Hong;Zhengling Qi;Yanxun Xu", "authorids": "~Mao_Hong1;~Zhengling_Qi1;~Yanxun_Xu1", "gender": "M;;F", "homepage": ";https://sites.google.com/view/statsqizl/home?authuser=0;http://www.ams.jhu.edu/~yxu70", "dblp": ";173/0201;", "google_scholar": "GUKNcVUAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "mao-hong-a45624195/;;", "or_profile": "~Mao_Hong1;~Zhengling_Qi1;~Yanxun_Xu1", "aff": "Johns Hopkins University;George Washington University;Johns Hopkins University", "aff_domain": "jh.edu;gwu.edu;jhu.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhong2024a,\ntitle={A Policy Gradient Method for Confounded {POMDP}s},\nauthor={Mao Hong and Zhengling Qi and Yanxun Xu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8BAkNCqpGW}\n}", "github": "", "project": "", "reviewers": "4mko;KDFK;YGPH;5Pwh", "pdf_size": 961571, "rating": "8;8;8;8", "confidence": "2;3;3;1", "soundness": "4;3;3;4", "contribution": "3;3;3;4", "presentation": "3;2;3;3", "wc_summary": "65;94;50;191", "wc_strengths": "113;90;53;106", "wc_weaknesses": "259;273;34;94", "wc_questions": "105;79;48;204", "wc_review": "542;536;185;595", "wc_reply_reviewers": "59;0;0;117", "wc_reply_authors": "1406;698;536;1197", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;1;3", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.0, 54.868023474515645 ], "wc_strengths_avg": [ 90.5, 23.200215516240362 ], "wc_weaknesses_avg": [ 165.0, 103.32231124011889 ], "wc_questions_avg": [ 109.0, 58.44227921633447 ], "wc_review_avg": [ 464.5, 162.99463181344348 ], "wc_reply_reviewers_avg": [ 44.0, 48.54379466007988 ], "wc_reply_authors_avg": [ 959.25, 354.78963837744755 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16526567701232919548&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=8BAkNCqpGW", "pdf": "https://openreview.net/pdf?id=8BAkNCqpGW", "email": "jh.edu;gwu.edu;jhu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Johns Hopkins University;George Washington University", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.gwu.edu", "aff_unique_abbr": "JHU;GWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8Cc6qOPvFo", "title": "Text-Driven Image Editing using Cycle-Consistency-Driven Metric Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We present a simple but effective training-free method for text-driven image-to-image translation based on pretrained text-to-image diffusion models. Since a naive application of the pre-trained diffusion models for the manipulation tasks often significantly destroys the structure or background of the source image, we revise the original backward process for the target image by meaningfully aligning better with a given target task while preserving the background or structure of a source image. We derive a new guidance objective term that is a combination of maximizing the similarity with target prompts rather than the source prompt based on the pre-trained CLIP and minimizing the distance with the source latents. Moreover, contrary to existing methods based on the diffusion models, we exploit the cycle-consistency objective in order to further maintain the background of the source image, where we perform an iterative optimization process by alternately optimizing the source and target latents. Experimental results demonstrate that the proposed method achieves outstanding editing performance on various tasks when combined with the pre-trained Stable Diffusion.", "keywords": "Diffusion Models;Text-Driven Image-to-Image Translation", "primary_area": "generative models", "supplementary_material": "", "author": "Hyunsoo Lee;Minsoo Kang;Bohyung Han", "authorids": "~Hyunsoo_Lee1;~Minsoo_Kang1;~Bohyung_Han1", "gender": "M;M;Not Specified", "homepage": ";https://kminsoo.github.io;http://cvlab.snu.ac.kr/~bhhan", "dblp": ";29/5268;73/4880.html", "google_scholar": "https://scholar.google.co.kr/citations?user=6JNXaH0AAAAJ;in5F4IUAAAAJ;9aaeCToAAAAJ", "orcid": ";;", "linkedin": "philip21/;;", "or_profile": "~Hyunsoo_Lee1;~Minsoo_Kang1;~Bohyung_Han1", "aff": "Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@misc{\nlee2024textdriven,\ntitle={Text-Driven Image Editing using Cycle-Consistency-Driven Metric Learning},\nauthor={Hyunsoo Lee and Minsoo Kang and Bohyung Han},\nyear={2024},\nurl={https://openreview.net/forum?id=8Cc6qOPvFo}\n}", "github": "", "project": "", "reviewers": "q62i;Cs1x;k61j;GXjj", "site": "https://openreview.net/forum?id=8Cc6qOPvFo", "pdf_size": 11275276, "rating": "3;3;5;6", "confidence": "4;4;4;5", "soundness": "2;2;2;2", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "69;78;85;16", "wc_strengths": "22;20;121;52", "wc_weaknesses": "103;52;236;84", "wc_questions": "1;53;2;2", "wc_review": "195;203;444;154", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "161;137;255;95", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.0, 27.15695122800054 ], "wc_strengths_avg": [ 53.75, 40.84345112744514 ], "wc_weaknesses_avg": [ 118.75, 70.104832215761 ], "wc_questions_avg": [ 14.5, 22.23173407541571 ], "wc_review_avg": [ 249.0, 114.10740554407501 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 162.0, 58.66003750424986 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ArsSUTvMqkgJ:scholar.google.com/&scioq=Text-Driven+Image+Editing+using+Cycle-Consistency-Driven+Metric+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "8Cw3yFqPDX", "title": "Buffered Asynchronous Federated Learning with Local Differential Privacy", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated Learning (FL) allows multiple parties to collaboratively train a machine learning (ML) model without having to disclose their training data.\nClients train their own models locally and share only model updates with an aggregation server.\nThe first FL deployments have been in synchronous settings, with all clients performing training and sharing model updates simultaneously.\nMore recently, {\\em Asynchronous FL} (Async-FL) has emerged as a new approach that allows clients to train at their own pace and send/receive updates when they are ready.\n\nWhile FL is inherently less privacy-invasive than alternative centralized ML approaches, (aggregate) model updates can still leak sensitive information about clients' data.\nTherefore, FL algorithms need to satisfy Differential Privacy (DP) to provably limit leakage.\nAlas, previous work on Async-FL has only considered Central DP, which requires trust in the server, and thus may not always be viable.\nIn this paper, we present the first technique that satisfies {\\em Local DP} (LDP) in the context of the state-of-the-art aggregation algorithm for Async-FL, namely, FedBuff.\nWe experimentally demonstrate on three benchmark FL datasets that our LDP technique performs equally well and, in some cases, better than FedBuff with Central DP.\nFinally, we study how the {\\em staleness} of the model updates received by the asynchronous FL clients can be used to improve utility while preserving privacy under different attack setups.", "keywords": "Asynchronous Federated Learning;Differential Privacy", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Mohammad Naseri;Nicolas Kourtellis;Emiliano De Cristofaro", "authorids": "~Mohammad_Naseri1;~Nicolas_Kourtellis1;~Emiliano_De_Cristofaro1", "gender": "M;;M", "homepage": ";https://emilianodc.com;", "dblp": ";36/6225;96/8779", "google_scholar": "FqFAeIYAAAAJ;https://scholar.google.com/citations?hl=en;Q5oWwiQAAAAJ", "orcid": ";;0000-0002-5674-1698", "linkedin": ";;", "or_profile": "~Mohammad_Naseri1;~Emiliano_De_Cristofaro1;~Nicolas_Kourtellis2", "aff": "Flower Labs;University of California, Riverside;Telefonica Research", "aff_domain": "flower.ai;ucr.edu;telefonica.com", "position": "Researcher;Full Professor;Director of Research", "bibtex": "@misc{\nnaseri2024buffered,\ntitle={Buffered Asynchronous Federated Learning with Local Differential Privacy},\nauthor={Mohammad Naseri and Nicolas Kourtellis and Emiliano De Cristofaro},\nyear={2024},\nurl={https://openreview.net/forum?id=8Cw3yFqPDX}\n}", "github": "", "project": "", "reviewers": "g7c5;gsJz;fiZQ;xZb7", "site": "https://openreview.net/forum?id=8Cw3yFqPDX", "pdf_size": 366060, "rating": "1;3;3;5", "confidence": "4;4;4;5", "soundness": "1;1;2;3", "contribution": "1;1;2;2", "presentation": "2;3;2;2", "wc_summary": "26;45;21;140", "wc_strengths": "24;31;26;68", "wc_weaknesses": "109;65;85;47", "wc_questions": "78;58;3;26", "wc_review": "237;199;135;281", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 58.0, 48.181946826586405 ], "wc_strengths_avg": [ 37.25, 17.93564885918544 ], "wc_weaknesses_avg": [ 76.5, 23.08137777516758 ], "wc_questions_avg": [ 41.25, 28.838992700855556 ], "wc_review_avg": [ 213.0, 53.5723809439155 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V00-zUNGZJ8J:scholar.google.com/&scioq=Buffered+Asynchronous+Federated+Learning+with+Local+Differential+Privacy&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Flower Labs;University of California, Riverside;Telefonica", "aff_unique_dep": ";;Research", "aff_unique_url": ";https://www.ucr.edu;https://www.telefonica.com", "aff_unique_abbr": ";UCR;Telefonica", "aff_campus_unique_index": "1", "aff_campus_unique": ";Riverside", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Spain" }, { "id": "8DLVrWL78S", "title": "Streamlining Generative Models for Structure-Based Drug Design", "track": "main", "status": "Reject", "tldr": "", "abstract": "Generative models for structure-based drug design (SBDD) aim to generate novel 3D molecules for specified protein targets $\\textit{in silico}$. The prevailing paradigm focuses on model expressivity - typically with powerful Graph Neural Network (GNN) models - but is agnostic to binding affinity during training, potentially overlooking better molecules. We address this issue with a two-pronged approach: learn an economical surrogate for affinity to infer an unlabeled molecular graph, and optimize for labels conditioned on this graph and desired molecular properties (e.g., QED, SA). The resulting model FastSBDD achieves state-of-the-art results as well as streamlined computation and model size (up to 1000x faster and with 100x fewer trainable parameters compared to existing methods), paving way for improved docking software. We also establish rigorous theoretical results to expose the representation limits of GNNs in SBDD contexts and the generalizability of our affinity scoring model, advocating more emphasis on generalization going forward.", "keywords": "drug design;binding;docking;graph neural networks;generalization bounds", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/2e87e16cc7197b69e1920028c29e14f2093dd5f6.zip", "author": "Rafal Karczewski;Markus Heinonen;Vikas Garg", "authorids": "~Rafal_Karczewski1;~Markus_Heinonen1;~Vikas_Garg2", "gender": ";M;", "homepage": ";https://users.aalto.fi/~heinom10/;", "dblp": "228/6790;22/7709;", "google_scholar": ";hFtfHZoAAAAJ;", "orcid": ";;", "linkedin": "rafal-karczewski-906ab010a;;", "or_profile": "~Rafal_Karczewski1;~Markus_Heinonen1;~Vikas_Garg2", "aff": "Aalto University;Aalto University;", "aff_domain": "aalto.fi;aalto.fi;", "position": "PhD student;Researcher;", "bibtex": "@misc{\nkarczewski2024streamlining,\ntitle={Streamlining Generative Models for Structure-Based Drug Design},\nauthor={Rafal Karczewski and Markus Heinonen and Vikas Garg},\nyear={2024},\nurl={https://openreview.net/forum?id=8DLVrWL78S}\n}", "github": "", "project": "", "reviewers": "A4Na;aZds;zMwy;jdjh", "site": "https://openreview.net/forum?id=8DLVrWL78S", "pdf_size": 1101616, "rating": "3;3;5;5", "confidence": "3;4;4;4", "soundness": "2;1;2;3", "contribution": "2;2;2;3", "presentation": "3;2;3;2", "wc_summary": "69;71;115;31", "wc_strengths": "34;69;22;44", "wc_weaknesses": "210;220;166;130", "wc_questions": "35;19;58;25", "wc_review": "348;379;361;230", "wc_reply_reviewers": "112;0;206;0", "wc_reply_authors": "1142;716;2052;575", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;5;2", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 71.5, 29.744747435471695 ], "wc_strengths_avg": [ 42.25, 17.297037318569906 ], "wc_weaknesses_avg": [ 181.5, 36.010415160061676 ], "wc_questions_avg": [ 34.25, 14.85555451674558 ], "wc_review_avg": [ 329.5, 58.4914523669912 ], "wc_reply_reviewers_avg": [ 79.5, 86.16698903872643 ], "wc_reply_authors_avg": [ 1121.25, 576.4856351202517 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JEfpuRRtM_sJ:scholar.google.com/&scioq=Streamlining+Generative+Models+for+Structure-Based+Drug+Design&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Finland" }, { "id": "8DW3aSOnou", "title": "Video Deblurring with Adaptive High-frequency Extraction", "track": "main", "status": "Reject", "tldr": "", "abstract": "State-of-the-art video deblurring methods use deep network architectures to recover sharpened video frames. Blurring especially degrades high-frequency information yet this aspect is often overlooked by recent models that focus more on enhancing architectural design. The recovery of high frequency detailing can be non-trivial, in part due to the spectral bias of neural networks. Neural networks are biased towards learning low frequency functions, making it to prioritize learning low frequency components. To enhance the learning of latent high frequencies, it is necessary to enforce explicit structures to capture the fine details or edges. This work merges the principles of the classic unsharp masking with a deep learning framework to emphasize the essential role of high-frequency information in deblurring. We generate an adaptive kernel, constructed from a convex combination of dynamic coefficients and predefined high-pass filtering kernels. This kernel is then employed in a spatio-temporal 3D convolution process to extract high-frequency components from the data. This method significantly improves video deblurring, achieving a noteworthy enhancement with an increase of up to 0.61dB in PSNR over top models on GORPO dataset. Additionally, it outpaces the majority of them in inference time.", "keywords": "video deblurring;deep learning;unsharp masking", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Bo Ji;Angela Yao", "authorids": "~Bo_Ji2;~Angela_Yao1", "gender": ";", "homepage": ";http://www.angelayao.com", "dblp": ";64/8484", "google_scholar": ";https://scholar.google.ch/citations?user=-LJCZMMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Bo_Ji2;~Angela_Yao1", "aff": ";National University of Singapore", "aff_domain": ";nus.edu.sg", "position": ";Associate Professor", "bibtex": "@misc{\nji2024video,\ntitle={Video Deblurring with Adaptive High-frequency Extraction},\nauthor={Bo Ji and Angela Yao},\nyear={2024},\nurl={https://openreview.net/forum?id=8DW3aSOnou}\n}", "github": "", "project": "", "reviewers": "Ebb6;HuzH;a8DG;5xHX", "site": "https://openreview.net/forum?id=8DW3aSOnou", "pdf_size": 16821352, "rating": "3;3;5;8", "confidence": "5;5;4;2", "soundness": "2;2;2;3", "contribution": "1;2;2;3", "presentation": "1;2;3;3", "wc_summary": "28;51;62;22", "wc_strengths": "18;12;54;41", "wc_weaknesses": "95;8;65;166", "wc_questions": "4;226;48;4", "wc_review": "145;297;229;233", "wc_reply_reviewers": "0;0;138;0", "wc_reply_authors": "528;603;104;233", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 40.75, 16.361158271956175 ], "wc_strengths_avg": [ 31.25, 17.020208576865326 ], "wc_weaknesses_avg": [ 83.5, 56.967095765889276 ], "wc_questions_avg": [ 70.5, 91.55735907069403 ], "wc_review_avg": [ 226.0, 53.99073994677235 ], "wc_reply_reviewers_avg": [ 34.5, 59.75575286112627 ], "wc_reply_authors_avg": [ 367.0, 205.39109036177786 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9975093361076329, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NjfiiTs63KYJ:scholar.google.com/&scioq=Video+Deblurring+with+Adaptive+High-frequency+Extraction&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "title": "CLAP: Collaborative Adaptation for Patchwork Learning", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19335", "id": "8EyRkd3Qj2", "author_site": "Sen Cui, Abudukelimu Wuerkaixi, Weishen Pan, Jian Liang, Lei Fang, Changshui Zhang, Fei Wang", "tldr": "", "abstract": "In this paper, we investigate a new practical learning scenario, where the data distributed in different sources/clients are typically generated with various modalities. Existing research on learning from multi-source data mostly assume that each client owns the data of all modalities, which may largely limit its practicability. In light of the expensiveness and sparsity of multimodal data, we propose patchwork learning to jointly learn from fragmented multimodal data in distributed clients. Considering the concerns on data privacy, patchwork learning aims to impute incomplete multimodal data for diverse downstream tasks without accessing the raw data directly. Local clients could miss different modality combinations. Due to the statistical heterogeneity induced by non-i.i.d. data, the imputation is more challenging since the learned dependencies fail to adapt to the imputation of other clients. In this paper, we provide a novel imputation framework to tackle modality combination heterogeneity and statistical heterogeneity simultaneously, called ``collaborative adaptation''. In particular, for two observed modality combinations from two clients, we learn the transformations between their maximal intersection and other modalities by proposing a novel ELBO. We improve the worst-performing required transformations through a Pareto min-max optimization framework. In extensive experiments, we demonstrate the superiority of the proposed method compared to existing related methods on benchmark data sets and a real-world clinical data set.", "keywords": "Patchwork learning;robustness", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Sen Cui;Abudukelimu Wuerkaixi;Weishen Pan;Jian Liang;Lei Fang;Changshui Zhang;Fei Wang", "authorids": "~Sen_Cui1;~Abudukelimu_Wuerkaixi1;~Weishen_Pan1;~Jian_Liang3;~Lei_Fang6;~Changshui_Zhang2;~Fei_Wang3", "gender": "M;M;M;M;;M;", "homepage": ";https://www.baidu.com;https://scholar.google.com/citations?user=PtTBMhUAAAAJ;;https://linkedin.com/in/leifang;http://bigeye.au.tsinghua.edu.cn/english/Introduction.html;https://wcm-wanglab.github.io/index.html", "dblp": "267/5483;293/3368;161/2032;19/2208;;z/ChangshuiZhang;52/3194-9.html", "google_scholar": "UzQuG1UAAAAJ;;PtTBMhUAAAAJ;mrunnpoAAAAJ;;GL9M37YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0009-0006-0431-5642;;;;", "linkedin": ";;;;;;fei-wang-50682425/", "or_profile": "~Sen_Cui1;~Abudukelimu_Wuerkaixi1;~Weishen_Pan1;~Jian_Liang3;~Lei_Fang6;~Changshui_Zhang2;~Fei_Wang3", "aff": "Tsinghua University;Tsinghua University, Beijing;Weill Cornell Medicine, Cornell University;Kuaishou Technology;;Tsinghua University;Cornell University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;med.cornell.edu;kuaishou.com;;mail.tsinghua.edu.cn;cornell.edu", "position": "PhD student;PhD student;Postdoc;Senior Algorithm Engineer;;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncui2024clap,\ntitle={{CLAP}: Collaborative Adaptation for Patchwork Learning},\nauthor={Sen Cui and Abudukelimu Wuerkaixi and Weishen Pan and Jian Liang and Lei Fang and Changshui Zhang and Fei Wang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8EyRkd3Qj2}\n}", "github": "", "project": "", "reviewers": "QCRo;ccS1;scKN;DjdF", "pdf_size": 4277230, "rating": "6;8;8;8", "confidence": "4;4;5;2", "soundness": "4;3;3;3", "contribution": "3;4;4;3", "presentation": "3;3;3;3", "wc_summary": "128;126;143;104", "wc_strengths": "158;173;166;67", "wc_weaknesses": "145;146;85;33", "wc_questions": "76;146;102;60", "wc_review": "507;591;496;264", "wc_reply_reviewers": "0;22;0;62", "wc_reply_authors": "1720;1152;683;1040", "reply_reviewers": "0;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 125.25, 13.91716565971678 ], "wc_strengths_avg": [ 141.0, 43.05229378325852 ], "wc_weaknesses_avg": [ 102.25, 46.99667541433117 ], "wc_questions_avg": [ 96.0, 32.526911934581186 ], "wc_review_avg": [ 464.5, 121.45060724426206 ], "wc_reply_reviewers_avg": [ 21.0, 25.317977802344327 ], "wc_reply_authors_avg": [ 1148.75, 372.5207209001937 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16746551509627980679&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=8EyRkd3Qj2", "pdf": "https://openreview.net/pdf?id=8EyRkd3Qj2", "email": "tsinghua.edu.cn;tsinghua.edu.cn;med.cornell.edu;kuaishou.com;;mail.tsinghua.edu.cn;cornell.edu", "author_num": 7, "aff_unique_index": "0;0;1;2;0;1", "aff_unique_norm": "Tsinghua University;Cornell University;Kuaishou Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.weill.cornell.edu;https://www.kuaishou.com", "aff_unique_abbr": "THU;Cornell;Kuaishou", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Weill Cornell Medicine", "aff_country_unique_index": "0;0;1;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Language-Interfaced Tabular Oversampling via Progressive Imputation and Self-Authentication", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19334", "id": "8F6bws5JBy", "author_site": "June Yong Yang, Geondo Park, Joowon Kim, Hyeongwon Jang, Eunho Yang", "tldr": "", "abstract": "Tabular data in the wild are frequently afflicted with class-imbalance, biasing machine learning model predictions towards major classes. A data-centric solution to this problem is oversampling - where the classes are balanced by adding synthetic minority samples via generative methods. However, although tabular generative models are capable of generating synthetic samples under a balanced distribution, their integrity suffers when the number of minority samples is low. To this end, pre-trained generative language models with rich prior knowledge are a fitting candidate for the task at hand. Nevertheless, an oversampling strategy tailored for tabular data that utilizes the extensive capabilities of such language models is yet to emerge. In this paper, we propose a novel oversampling framework for tabular data to channel the abilities of generative language models. By leveraging its conditional sampling capabilities, we synthesize minority samples by progressively masking the important features of the majority class samples and imputing them towards the minority distribution. To reduce the inclusion of imperfectly converted samples, we utilize the power of the language model itself to self-authenticate the labels of the samples generated by itself, sifting out ill-converted samples. Extensive experiments on a variety of datasets and imbalance ratios reveal that the proposed method successfully generates reliable minority samples to boost the performance of machine learning classifiers, even under heavy imbalance ratios.", "keywords": "Tabular data;imbalanced learning;language models", "primary_area": "generative models", "supplementary_material": "", "author": "June Yong Yang;Geondo Park;Joowon Kim;Hyeongwon Jang;Eunho Yang", "authorids": "~June_Yong_Yang1;~Geondo_Park1;~Joowon_Kim1;~Hyeongwon_Jang1;~Eunho_Yang1", "gender": ";M;M;;M", "homepage": "http://mli.kaist.ac.kr/people/;;https://github.com/kjwispro;https://github.com/HyeongWon-Jang;https://sites.google.com/site/hleehome2/", "dblp": "277/5624;256/5123;;206/3852;96/2621", "google_scholar": "nkLNWg0AAAAJ;;;MT2tKmQAAAAJ;", "orcid": ";;;;", "linkedin": ";;;hyeongwon-jang-478591292?trk=contact-info;", "or_profile": "~June_Yong_Yang1;~Geondo_Park1;~Joowon_Kim1;~Hyeongwon_Jang1;~Eunho_Yang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science and Technology (KAIST);Seoul National University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;snu.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;MS student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nyang2024languageinterfaced,\ntitle={Language-Interfaced Tabular Oversampling via Progressive Imputation and Self-Authentication},\nauthor={June Yong Yang and Geondo Park and Joowon Kim and Hyeongwon Jang and Eunho Yang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8F6bws5JBy}\n}", "github": "", "project": "", "reviewers": "tYE6;gyRK;puAc", "pdf_size": 4395848, "rating": "6;6;6", "confidence": "3;3;3", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "3;2;4", "wc_summary": "52;104;78", "wc_strengths": "54;28;71", "wc_weaknesses": "106;134;119", "wc_questions": "21;5;39", "wc_review": "233;271;307", "wc_reply_reviewers": "86;28;25", "wc_reply_authors": "696;1459;1355", "reply_reviewers": "1;1;1", "reply_authors": "2;4;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 78.0, 21.228911104120876 ], "wc_strengths_avg": [ 51.0, 17.682382946499793 ], "wc_weaknesses_avg": [ 119.66666666666667, 11.440668201153676 ], "wc_questions_avg": [ 21.666666666666668, 13.888444437333106 ], "wc_review_avg": [ 270.3333333333333, 30.214051182999096 ], "wc_reply_reviewers_avg": [ 46.333333333333336, 28.075295585660754 ], "wc_reply_authors_avg": [ 1170.0, 337.8471054584702 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16290416663341375138&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=8F6bws5JBy", "pdf": "https://openreview.net/pdf?id=8F6bws5JBy", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;snu.ac.kr;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.snu.ac.kr", "aff_unique_abbr": "KAIST;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Learning Personalized Causally Invariant Representations for Heterogeneous Federated Clients", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19333", "id": "8FHWkY0SwF", "author_site": "Xueyang Tang, Song Guo, Jie ZHANG, Jingcai Guo", "tldr": "", "abstract": "Personalized federated learning (PFL) has gained great success in tackling the scenarios where target datasets are heterogeneous across the local clients. However, the application of the existing PFL methods to real-world setting is hindered by the common assumption that the test data on each client is in-distribution (IND) with respect to its training data. Due to the bias of training dataset, the modern machine learning model prefers to rely on shortcut which can perform well on the training data but fail to generalize to the unseen test data that is out-of-distribution (OOD). This pervasive phenomenon is called shortcut learning and has attracted plentiful efforts in centralized situations. In PFL, the limited data diversity on federated clients makes mitigating shortcut and meanwhile preserving personalization knowledge rather difficult. In this paper, we analyse this challenging problem by formulating the structural causal models (SCMs) for heterogeneous federated clients. From the proposed SCMs, we derive two significant causal signatures which inspire a provable shortcut discovery and removal method under federated learning, namely FedSDR. Specifically, FedSDR is divided into two steps: 1) utilizing the available training data distributed among local clients to discover all the shortcut features in a collaborative manner. 2) developing the optimal personalized causally invariant predictor for each client by eliminating the discovered shortcut features. We provide theoretical analysis to prove that our method can draw complete shortcut features and produce the optimal personalized invariant predictor that can generalize to unseen OOD data on each client. The experimental results on diverse datasets validate the superiority of FedSDR over the state-of-the-art PFL methods on OOD generalization performance.", "keywords": "Personalized Federated Learning;Invariant Learning;Causality;Out-of-distribution Generalization;Shortcut Learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Xueyang Tang;Song Guo;Jie ZHANG;Jingcai Guo", "authorids": "~Xueyang_Tang1;~Song_Guo5;~Jie_ZHANG18;~Jingcai_Guo1", "gender": "M;M;F;M", "homepage": ";https://cse.hkust.edu.hk/~songguo/;https://cugzj.github.io/zhangjie.github.io/;https://jingcaiguo.github.io/", "dblp": ";01/267-1;84/6889-76;192/7270", "google_scholar": "wAGIpRAAAAAJ;https://scholar.google.com/citations?hl=en;JRCNlI8AAAAJ;YjSHPjcAAAAJ", "orcid": "0000-0003-4284-9806;;0000-0002-8073-2118;0000-0002-0449-4525", "linkedin": ";;;jingcai-guo", "or_profile": "~Xueyang_Tang1;~Song_Guo5;~Jie_ZHANG18;~Jingcai_Guo1", "aff": "The Hong Kong Polytechnic University;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;The Hong Kong Polytechnic University;The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;cse.ust.hk;polyu.edu.hk;polyu.edu.hk", "position": "PhD student;Full Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ntang2024learning,\ntitle={Learning Personalized Causally Invariant Representations for Heterogeneous Federated Clients},\nauthor={Xueyang Tang and Song Guo and Jie ZHANG and Jingcai Guo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8FHWkY0SwF}\n}", "github": "", "project": "", "reviewers": "Qvuh;2uxd;zjFS", "pdf_size": 2607041, "rating": "3;6;6", "confidence": "4;3;4", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "2;3;3", "wc_summary": "39;72;59", "wc_strengths": "33;27;36", "wc_weaknesses": "349;117;47", "wc_questions": "3;66;69", "wc_review": "424;282;211", "wc_reply_reviewers": "0;18;39", "wc_reply_authors": "1091;1171;523", "reply_reviewers": "0;1;1", "reply_authors": "3;3;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 56.666666666666664, 13.572848714334887 ], "wc_strengths_avg": [ 32.0, 3.7416573867739413 ], "wc_weaknesses_avg": [ 171.0, 129.0684572878543 ], "wc_questions_avg": [ 46.0, 30.430248109405877 ], "wc_review_avg": [ 305.6666666666667, 88.55255815364994 ], "wc_reply_reviewers_avg": [ 19.0, 15.937377450509228 ], "wc_reply_authors_avg": [ 928.3333333333334, 288.46875432570204 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12102156348141143717&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=8FHWkY0SwF", "pdf": "https://openreview.net/pdf?id=8FHWkY0SwF", "email": "polyu.edu.hk;cse.ust.hk;polyu.edu.hk;polyu.edu.hk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Hong Kong Polytechnic University;Hong Kong University of Science and Technology", "aff_unique_dep": ";Department of Computer Science and Engineering", "aff_unique_url": "https://www.polyu.edu.hk;https://www.ust.hk", "aff_unique_abbr": "PolyU;HKUST", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "8FP6eJsVCv", "title": "Explanation Shift: How Did the Distribution Shift Impact the Model?", "track": "main", "status": "Reject", "tldr": "", "abstract": "The performance of machine learning models on new data is critical for their success in real-world applications. However, the model's performance may deteriorate if the new data is sampled from a different distribution than the training data. Current methods to detect shifts in the input or output data distributions have limitations in identifying model behavior changes. In this paper, we define \\emph{explanation shift} as the statistical comparison between how predictions from training data are explained and how predictions on new data are explained. We propose explanation shift as a key indicator to investigate the interaction between distribution shifts and learned models. We introduce an Explanation Shift Detector that operates on the explanation distributions, providing more sensitive and explainable changes in interactions between distribution shifts and learned models. We compare explanation shifts with other methods that are based on distribution shifts, showing that monitoring for explanation shifts results in more sensitive indicators for varying model behavior. We provide theoretical and experimental evidence and demonstrate the effectiveness of our approach on synthetic and real data. Additionally, we release an open-source Python package, \\texttt{skshift}, which implements our method and provides usage tutorials for further reproducibility.", "keywords": "Model Monitoring;Distribution Shift;Explainable AI", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Carlos Mougan;Klaus Broelemann;David Masip;Gjergji Kasneci;Thanassis Tiropanis;Steffen Staab", "authorids": "~Carlos_Mougan1;~Klaus_Broelemann1;~David_Masip4;~Gjergji_Kasneci2;~Thanassis_Tiropanis1;~Steffen_Staab2", "gender": "M;;M;M;;M", "homepage": "https://cmougan.eu;;http://github.com/david26694/;https://www.gov.sot.tum.de/rds/prof-dr-gjergji-kasneci/;https://www.southampton.ac.uk/people/5x5rrv/professor-thanassis-tiropanis;https://www.ki.uni-stuttgart.de/de/institut/team/Staab-00004/", "dblp": "293/7915;00/7271.html;;69/3216;64/6175;s/SteffenStaab", "google_scholar": "dQ5WrokAAAAJ;;;Zbc8GK4AAAAJ;T0MK3pUAAAAJ;https://scholar.google.com/citations?hl=de", "orcid": ";;;0000-0002-3123-7268;0000-0002-6195-2852;0000-0002-0780-4154", "linkedin": "carlosmougan/;;;;tiropanis/;", "or_profile": "~Carlos_Mougan1;~Klaus_Broelemann1;~David_Masip4;~Gjergji_Kasneci2;~Thanassis_Tiropanis1;~Steffen_Staab2", "aff": "University of Southampton;SCHUFA;;University of Tuebingen;University of Southampton;University of Southampton", "aff_domain": "soton.ac.uk;schufa.de;;uni-tuebingen.de;ecs.soton.ac.uk;soton.ac.uk", "position": "PhD student;Principal Researcher;;Professor;Full Professor;Full Professor", "bibtex": "@misc{\nmougan2024explanation,\ntitle={Explanation Shift: How Did the Distribution Shift Impact the Model?},\nauthor={Carlos Mougan and Klaus Broelemann and David Masip and Gjergji Kasneci and Thanassis Tiropanis and Steffen Staab},\nyear={2024},\nurl={https://openreview.net/forum?id=8FP6eJsVCv}\n}", "github": "", "project": "", "reviewers": "QGE3;Kdfx;hRuz;3HUZ", "site": "https://openreview.net/forum?id=8FP6eJsVCv", "pdf_size": 1303631, "rating": "3;5;5;8", "confidence": "4;4;4;3", "soundness": "3;2;3;4", "contribution": "2;2;2;3", "presentation": "2;3;2;4", "wc_summary": "110;236;72;90", "wc_strengths": "40;51;66;99", "wc_weaknesses": "364;173;367;41", "wc_questions": "61;159;254;149", "wc_review": "575;619;759;379", "wc_reply_reviewers": "0;0;137;0", "wc_reply_authors": "770;787;732;320", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 127.0, 64.35060217278468 ], "wc_strengths_avg": [ 64.0, 22.214859891523062 ], "wc_weaknesses_avg": [ 236.25, 137.42156854002212 ], "wc_questions_avg": [ 155.75, 68.34974396440707 ], "wc_review_avg": [ 583.0, 135.97058505426827 ], "wc_reply_reviewers_avg": [ 34.25, 59.322740159234044 ], "wc_reply_authors_avg": [ 652.25, 192.85535382768091 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8892972917998875, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9342819475098152131&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Southampton;SCHUFA Holding AG;University of Tuebingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.southampton.ac.uk;https://www.schufa.de;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Southampton;SCHUFA;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "id": "8FhwHJGUPZ", "title": "Dual-Balancing for Multi-Task Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multi-task learning (MTL), a learning paradigm to learn multiple related tasks simultaneously, has achieved great success in various fields. However, task balancing problem remains a significant challenge in MTL, with the disparity in loss/gradient scales often leading to performance compromises. In this paper, we propose a Dual-Balancing Multi-Task Learning (DB-MTL) method to alleviate the task balancing problem from both loss and gradient perspectives. Specifically, DB-MTL ensures loss-scale balancing by performing a logarithm transformation on each task loss, and guarantees gradient-magnitude balancing via normalizing all task gradients to the same magnitude as the maximum gradient norm. Extensive experiments conducted on several benchmark datasets consistently demonstrate the state-of-the-art performance of DB-MTL.", "keywords": "multi-task learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Baijiong Lin;Weisen Jiang;Feiyang Ye;Yu Zhang;Pengguang Chen;Ying-Cong Chen;Shu Liu;James Kwok", "authorids": "~Baijiong_Lin1;~Weisen_Jiang1;~Feiyang_Ye4;~Yu_Zhang3;~Pengguang_Chen1;~Ying-Cong_Chen1;~Shu_Liu4;~James_Kwok1", "gender": "M;M;M;M;M;M;M;", "homepage": "https://baijiong-lin.github.io/;https://wayson-ust.github.io/;https://feiyang-ye.github.io/;http://cse.sustech.edu.cn/faculty/~zhangy/;https://github.com/akuxcw;https://www.yingcong.me/;https://shuliu1993.github.io/;", "dblp": "279/2950;302/7625;285/4704;50/671-6;189/7442.html;137/6578;57/1180-5;", "google_scholar": "KVdbYTYAAAAJ;https://scholar.google.com/citations?hl=en;3EX25cAAAAAJ;https://scholar.google.com.hk/citations?user=jaRS5w4AAAAJ;https://scholar.google.com.hk/citations?user=lMnVrgIAAAAJ;https://scholar.google.com.hk/citations?user=n7j4bJUAAAAJ;BUEDUFkAAAAJ;", "orcid": "0000-0002-4257-0226;;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Baijiong_Lin1;~Weisen_Jiang1;~Feiyang_Ye4;~Yu_Zhang3;~Pengguang_Chen1;~Ying-Cong_Chen1;~Shu_Liu4;~James_Kwok1", "aff": "The Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology;University of Technology Sydney;Southern University of Science and Technology;SmartMore;Hong Kong University of Science and Technology;SmartMore Ltd.;", "aff_domain": "connect.hkust-gz.edu.cn;ust.hk;uts.edu.au;sustc.edu.cn;smartmore.com;hkust-gz.edu.cn;smartmore.com;", "position": "PhD student;PhD student;PhD student;Associate Professor;Researcher;Assistant Professor;Principal Researcher;", "bibtex": "@misc{\nlin2024dualbalancing,\ntitle={Dual-Balancing for Multi-Task Learning},\nauthor={Baijiong Lin and Weisen Jiang and Feiyang Ye and Yu Zhang and Pengguang Chen and Ying-Cong Chen and Shu Liu and James Kwok},\nyear={2024},\nurl={https://openreview.net/forum?id=8FhwHJGUPZ}\n}", "github": "", "project": "", "reviewers": "gC4S;H1EY;PmP7;SooC;8dJR", "site": "https://openreview.net/forum?id=8FhwHJGUPZ", "pdf_size": 484187, "rating": "3;5;5;6;8", "confidence": "4;4;4;5;4", "soundness": "2;2;2;3;3", "contribution": "2;2;2;3;3", "presentation": "2;2;2;3;3", "wc_summary": "106;40;164;76;74", "wc_strengths": "54;49;42;46;75", "wc_weaknesses": "87;360;238;5;40", "wc_questions": "49;5;30;134;25", "wc_review": "296;454;474;261;214", "wc_reply_reviewers": "0;0;0;33;12", "wc_reply_authors": "1180;1356;994;300;251", "reply_reviewers": "0;0;0;1;1", "reply_authors": "3;3;3;2;2", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 92.0, 41.626914370392626 ], "wc_strengths_avg": [ 53.2, 11.582745788456206 ], "wc_weaknesses_avg": [ 146.0, 133.31016465371272 ], "wc_questions_avg": [ 48.6, 44.93817975841923 ], "wc_review_avg": [ 339.8, 104.88546133759436 ], "wc_reply_reviewers_avg": [ 9.0, 12.86856635371633 ], "wc_reply_authors_avg": [ 816.2, 456.3465349928714 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.1846372364689991, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16214532746501801863&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;3;0;3", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Technology Sydney;Southern University of Science and Technology;SmartMore", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ust.hk;https://www.uts.edu.au;https://www.sustech.edu.cn;", "aff_unique_abbr": "HKUST;UTS;SUSTech;", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Guangzhou;Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Australia;" }, { "id": "8GCcSXlkZN", "title": "Dense Representation Learning for a Joint-Embedding Predictive Architecture", "track": "main", "status": "Reject", "tldr": "", "abstract": "The joint-embedding predictive architecture (JEPA) recently has shown impressive results in extracting visual representations from unlabeled imagery under a masking strategy.\nHowever, we reveal its disadvantage lies in the inadequate grasp of local semantics for dense representations, a shortfall stemming from its \nmasked modeling on the embedding space and the consequent in less discriminative or even missing local semantics.\nTo bridge this gap, we introduce Dense-JEPA, a novel masked modeling objective rooted in JEPA, tailored for enhanced dense representation learning.\nOur key idea is simple: we consider a set of semantically similar neighboring patches as a target of a masked patch.\nTo be specific, the proposed Dense-JEPA (a) computes feature similarities between each masked patch and its corresponding neighboring patches to select patches having semantically meaningful relations, and (b) employs lightweight cross-attention heads to aggregate features of neighboring patches as the masked targets.\nConsequently, Dense-JEPA learns better dense representations, which can be beneficial to a wide range of downstream tasks.\nThrough extensive experiments, we demonstrate our effectiveness across various visual benchmarks, including ImageNet-1K image classification, ADE20K semantic segmentation, and COCO object detection tasks.", "keywords": "Self-supervised Learning;Joint-Embedding Predictive Architecture;Masked Image Modeling", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Shentong Mo;Sukmin Yun", "authorids": "~Shentong_Mo1;~Sukmin_Yun1", "gender": ";", "homepage": ";https://sites.google.com/view/sukmin-yun", "dblp": ";234/9078", "google_scholar": ";fQcZ_hQAAAAJ", "orcid": ";", "linkedin": ";sukmin-yun-975b67129/", "or_profile": "~Shentong_Mo1;~Sukmin_Yun1", "aff": ";Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": ";mbzuai.ac.ae", "position": ";Postdoc", "bibtex": "@misc{\nmo2024dense,\ntitle={Dense Representation Learning for a Joint-Embedding Predictive Architecture},\nauthor={Shentong Mo and Sukmin Yun},\nyear={2024},\nurl={https://openreview.net/forum?id=8GCcSXlkZN}\n}", "github": "", "project": "", "reviewers": "wqKm;NVsT;KGbU;RoAg", "site": "https://openreview.net/forum?id=8GCcSXlkZN", "pdf_size": 40485812, "rating": "5;5;6;8", "confidence": "4;5;5;4", "soundness": "2;3;3;3", "contribution": "2;2;3;3", "presentation": "3;2;2;3", "wc_summary": "49;28;55;173", "wc_strengths": "61;9;34;61", "wc_weaknesses": "419;88;151;144", "wc_questions": "79;14;98;2", "wc_review": "608;139;338;380", "wc_reply_reviewers": "82;0;0;65", "wc_reply_authors": "1267;520;432;356", "reply_reviewers": "1;0;0;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.25, 56.751101310899685 ], "wc_strengths_avg": [ 41.25, 21.637640814099857 ], "wc_weaknesses_avg": [ 200.5, 128.4922176631721 ], "wc_questions_avg": [ 48.25, 41.026668156212736 ], "wc_review_avg": [ 366.25, 166.637893349622 ], "wc_reply_reviewers_avg": [ 36.75, 37.238253181372514 ], "wc_reply_authors_avg": [ 643.75, 364.4834529851801 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wwgTT6p-FuIJ:scholar.google.com/&scioq=Dense+Representation+Learning+for+a+Joint-Embedding+Predictive+Architecture&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_country_unique_index": "0", "aff_country_unique": "United Arab Emirates" }, { "id": "8GmPLkO0oR", "title": "NeRFuser: Diffusion Guided Multi-Task 3D Policy Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper presents NeRFuser, a language-conditioned multi-task policy framework that integrates neural rendering pre-training and diffusion training to enforce multi-modality learning in action sequence spaces. To learn a generalizable multi-task policy with few demonstrations, the pre-training phase of NeRFuser leverages neural rendering to distill 2D semantic features from foundation models such as Stable Diffusion to a 3D space, which provides a comprehensive semantic understanding regarding the scene. Consequently, it allows various applications to challenging robotic tasks requiring rich 3D semantics and accurate geometry. Furthermore, we introduce a novel approach utilizing diffusion training to learn a vision and language feature that encapsulates the inherent multi-modality in the multi-task demonstrations. By reconstructing the action sequences from different tasks via the diffusion process, the model is capable of distinguishing different modalities and thus improving the robustness and the generalizability of the learned representation. NeRFuser significantly surpasses SOTA NeRF-based multi-task manipulation approaches with over 30\\% improvement in success rate. Videos: https://nerfuser.github.io/.", "keywords": "Robotic Manipulation;Neural Radiance Field;Pre-Training;Diffusion", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/cfdcf956dff0183739f7a1634e3e9d52d7f4eec9.zip", "author": "Ge Yan;Yueh-Hua Wu;Xiaolong Wang", "authorids": "~Ge_Yan3;~Yueh-Hua_Wu1;~Xiaolong_Wang3", "gender": "Not Specified;;M", "homepage": "https://geyan21.github.io/;;https://xiaolonw.github.io/", "dblp": "169/8155-6;;91/952-4", "google_scholar": "ma7qW2kAAAAJ;;Y8O9N_0AAAAJ", "orcid": ";;", "linkedin": "ge-yan/;;", "or_profile": "~Ge_Yan3;~Yueh-Hua_Wu1;~Xiaolong_Wang3", "aff": "University of California, San Diego;;University of California, San Diego", "aff_domain": "ucsd.edu;;ucsd.edu", "position": "MS student;;Assistant Professor", "bibtex": "@misc{\nyan2024nerfuser,\ntitle={Ne{RF}user: Diffusion Guided Multi-Task 3D Policy Learning},\nauthor={Ge Yan and Yueh-Hua Wu and Xiaolong Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=8GmPLkO0oR}\n}", "github": "", "project": "", "reviewers": "ui5H;bCxa;3vtu;Pcik", "site": "https://openreview.net/forum?id=8GmPLkO0oR", "pdf_size": 20086596, "rating": "3;5;6;8", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "contribution": "2;2;3;3", "presentation": "2;3;2;4", "wc_summary": "127;72;69;142", "wc_strengths": "54;19;73;94", "wc_weaknesses": "191;102;162;276", "wc_questions": "91;95;57;152", "wc_review": "463;288;361;664", "wc_reply_reviewers": "465;0;262;50", "wc_reply_authors": "1948;1354;606;1279", "reply_reviewers": "1;0;2;1", "reply_authors": "6;5;5;3", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 102.5, 32.45381333526154 ], "wc_strengths_avg": [ 60.0, 27.577164466275352 ], "wc_weaknesses_avg": [ 182.75, 62.67924297564545 ], "wc_questions_avg": [ 98.75, 34.10553474144629 ], "wc_review_avg": [ 444.0, 141.40898132721273 ], "wc_reply_reviewers_avg": [ 194.25, 184.6867280017706 ], "wc_reply_authors_avg": [ 1296.75, 475.6192673767538 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.75, 1.0897247358851685 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.16012815380508713, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:39_eYI8LQSAJ:scholar.google.com/&scioq=NeRFuser:+Diffusion+Guided+Multi-Task+3D+Policy+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning with a Mole: Transferable latent spatial representations for navigation without reconstruction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19332", "id": "8HCARN2hhw", "author_site": "Guillaume Bono, Leonid Antsfeld, Assem Sadek, Gianluca Monaci, Christian Wolf", "tldr": "", "abstract": "Agents navigating in 3D environments require some form of memory, which should hold a compact and actionable representation of the history of observations useful for decision taking and planning. In most end-to-end learning approaches the representation is latent and usually does not have a clearly defined interpretation, whereas classical robotics addresses this with scene reconstruction resulting in some form of map, usually estimated with geometry and sensor models and/or learning. In this work we propose to learn an actionable representation of the scene independently of the targeted downstream task and without explicitly optimizing reconstruction. The learned representation is optimized by a blind auxiliary agent trained to navigate with it on multiple short sub episodes branching out from a waypoint and, most importantly, without any direct visual observation. We argue and show that the blindness property is important and forces the (trained) latent representation to be the only means for planning. With probing experiments we show that the learned representation optimizes navigability and not reconstruction. On downstream tasks we show that it is robust to changes in distribution, in particular the sim2real gap, which we evaluate with a real physical robot in a real office building, significantly improving performance.", "keywords": "Navigation;Embodied AI;Perception", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/f84a134b21d0b098ddf46d3a14fdee05504d4afc.zip", "author": "Guillaume Bono;Leonid Antsfeld;Assem Sadek;Gianluca Monaci;Christian Wolf", "authorids": "~Guillaume_Bono1;~Leonid_Antsfeld1;~Assem_Sadek1;~Gianluca_Monaci1;~Christian_Wolf5", "gender": "M;M;M;;M", "homepage": ";;https://www.assemsadek.com;;https://chriswolfvision.github.io/www/", "dblp": "225/4959;;;23/903;38/2606-1.html", "google_scholar": "MkNg3H0AAAAJ;;JC9TIAwAAAAJ;https://scholar.google.co.uk/citations?user=t7MzKqIAAAAJ;idYS1AIAAAAJ", "orcid": "0000-0003-3001-2857;;;;", "linkedin": ";leonid-antsfeld-8bb3615;assemsadek/;gmonaci;christian-wolf-522761249/", "or_profile": "~Guillaume_Bono1;~Leonid_Antsfeld1;~Assem_Sadek1;~Gianluca_Monaci1;~Christian_Wolf5", "aff": "Naver Labs Europe;Naver Labs Europe;;Naver Labs Europe;Naver Labs Europe", "aff_domain": "naverlabs.com;naverlabs.com;;naverlabs.com;naverlabs.com", "position": "Researcher;Researcher;;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nbono2024learning,\ntitle={Learning with a Mole: Transferable latent spatial representations for navigation without reconstruction},\nauthor={Guillaume Bono and Leonid Antsfeld and Assem Sadek and Gianluca Monaci and Christian Wolf},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8HCARN2hhw}\n}", "github": "", "project": "", "reviewers": "SbcL;sYMN;hBzC", "pdf_size": 1779340, "rating": "6;6;8", "confidence": "4;3;4", "soundness": "3;3;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "167;238;80", "wc_strengths": "112;43;161", "wc_weaknesses": "529;187;169", "wc_questions": "85;81;36", "wc_review": "893;549;446", "wc_reply_reviewers": "61;13;12", "wc_reply_authors": "802;714;748", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 161.66666666666666, 64.61338010728807 ], "wc_strengths_avg": [ 105.33333333333333, 48.40339749324306 ], "wc_weaknesses_avg": [ 295.0, 165.62608490210712 ], "wc_questions_avg": [ 67.33333333333333, 22.216110270602176 ], "wc_review_avg": [ 629.3333333333334, 191.12357840471233 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 22.866763848189994 ], "wc_reply_authors_avg": [ 754.6666666666666, 36.23380864453651 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10585416350282073239&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=8HCARN2hhw", "pdf": "https://openreview.net/pdf?id=8HCARN2hhw", "email": "naverlabs.com;naverlabs.com;;naverlabs.com;naverlabs.com", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "NAVER LABS", "aff_unique_dep": "", "aff_unique_url": "https://labs.naver.com", "aff_unique_abbr": "NLE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Unknown" }, { "id": "8HG2QrtXXB", "title": "HelmSim: Learning Helmholtz Dynamics for Interpretable Fluid Simulation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Fluid simulation is a long-standing challenge due to the intrinsic high-dimensional non-linear dynamics. Previous methods usually utilize the non-linear modeling capability of deep models to directly estimate velocity fields for future prediction. However, skipping over inherent physical properties but directly learning superficial velocity fields will overwhelm the model from generating precise or physics reliable results. In this paper, we propose the HelmSim toward an accurate and interpretable simulator for fluid. Inspired by Helmholtz theorem, we design a HelmDynamic block to learn the Helmholtz dynamics, which decomposes fluid dynamics into more solvable curl-free and divergence-free parts, physically corresponding to potential and stream functions of fluid. By embedding the HelmDynamic block into a Multiscale Intergation Network, HelmSim can integrate learned Helmholtz dynamics along temporal dimension in multiple spatial scales to yield future fluid. Comparing with previous velocity estimating methods, HelmSim is faithfully derived from Helmholtz theorem and ravels out complex fluid dynamics with physically interpretable evidence. Experimentally, our proposed HelmSim achieves the consistent state-of-the-art in both numerical simulated and real-world observed benchmarks, even for scenarios with complex boundaries.", "keywords": "Interpretable fluid simulation;Helmholtz decomposition", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "/attachment/e0cb9b76d4ec4e489adfb3588548f9b766677853.zip", "author": "Lanxiang Xing;Haixu Wu;Yuezhou Ma;Jianmin Wang;Mingsheng Long", "authorids": "~Lanxiang_Xing2;~Haixu_Wu1;~Yuezhou_Ma1;~Jianmin_Wang1;~Mingsheng_Long5", "gender": "M;M;M;M;M", "homepage": "https://github.com/BluesCrossing;;https://github.com/mayz20;https://www.thss.tsinghua.edu.cn/en/faculty/jianminwang.htm;http://ise.thss.tsinghua.edu.cn/~mlong", "dblp": ";286/8115;359/0553;06/3456-1.html;74/9023", "google_scholar": ";oLL_x0wAAAAJ;;https://scholar.google.com.tw/citations?user=MiovcboAAAAJ;_MjXpXkAAAAJ", "orcid": "0000-0001-5928-3242;;;0000-0001-6841-7943;0000-0002-5412-9120", "linkedin": ";;;;", "or_profile": "~Lanxiang_Xing2;~Haixu_Wu1;~Yuezhou_Ma1;~Jianmin_Wang1;~Mingsheng_Long2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;PhD student;Undergrad student;Full Professor;Associate Professor", "bibtex": "@misc{\nxing2024helmsim,\ntitle={HelmSim: Learning Helmholtz Dynamics for Interpretable Fluid Simulation},\nauthor={Lanxiang Xing and Haixu Wu and Yuezhou Ma and Jianmin Wang and Mingsheng Long},\nyear={2024},\nurl={https://openreview.net/forum?id=8HG2QrtXXB}\n}", "github": "", "project": "", "reviewers": "rfHe;2nz9;Mqos;eKS7", "site": "https://openreview.net/forum?id=8HG2QrtXXB", "pdf_size": 23713244, "rating": "3;5;6;6", "confidence": "4;3;3;5", "soundness": "2;2;2;3", "contribution": "1;2;2;2", "presentation": "2;3;2;2", "wc_summary": "80;46;67;52", "wc_strengths": "46;49;76;81", "wc_weaknesses": "379;244;377;17", "wc_questions": "375;141;75;199", "wc_review": "880;480;595;349", "wc_reply_reviewers": "0;309;39;7", "wc_reply_authors": "1206;901;1367;1122", "reply_reviewers": "0;1;1;1", "reply_authors": "5;3;4;4", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.25, 13.254716141811564 ], "wc_strengths_avg": [ 63.0, 15.636495771111889 ], "wc_weaknesses_avg": [ 254.25, 147.49809320801404 ], "wc_questions_avg": [ 197.5, 111.4753335944773 ], "wc_review_avg": [ 576.0, 195.90941784406385 ], "wc_reply_reviewers_avg": [ 88.75, 128.0085446366765 ], "wc_reply_authors_avg": [ 1149.0, 168.08182531136436 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HdqK5jlVER8J:scholar.google.com/&scioq=HelmSim:+Learning+Helmholtz+Dynamics+for+Interpretable+Fluid+Simulation&hl=en&as_sdt=0,14", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CoBIT: A Contrastive Bi-directional Image-Text Generation Model", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19331", "id": "8ISRqgtjPc", "author_site": "Haoxuan You, Xiaoyue Guo, Zhecan Wang, Kai-Wei Chang, Jason Baldridge, Jiahui Yu", "tldr": "", "abstract": "The field of Vision-and-Language (VL) has witnessed a proliferation of pretrained foundation models. Current techniques typically employ only one type of training objective, whether it's (1) contrastive objectives (like CLIP), (2) image-to-text generative objectives (like PaLI), or (3) text-to-image generative objectives (like Parti). However, all these three objectives are mutually relevant and are all based on image-text pairs. Intuitively, the first two objectives can be considered as complementary projections between two modalities, and contrastive learning can preserve global alignment and generations facilitate fine-grained understanding. Inspired by this, we present a Contrastive Bi-directional Image-Text generation model (CoBIT) to first time unify the three pre-training objectives in one framework. Specifically, CoBIT employs a novel unicoder-decoder structure consisting of an image unicoder, a text unicoder, and a cross-modal decoder. The image/text unicoders can switch between encoding and decoding in different tasks, enabling flexibility and shared knowledge that benefits both image-to-text and text-to-image generations. CoBIT achieves superior performance in image understanding, image-text understanding (Retrieval, Captioning, VQA, SNLI-VE), and text-based content creation, particularly in zero-shot scenarios.", "keywords": "Contrastive Pre-training;Vision-Language;Text-to-Image Generation;Auto-regressive Model.", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Haoxuan You;Mandy Guo;Zhecan Wang;Kai-Wei Chang;Jason Michael Baldridge;Jiahui Yu", "authorids": "~Haoxuan_You1;~Mandy_Guo2;~Zhecan_Wang2;~Kai-Wei_Chang1;~Jason_Michael_Baldridge1;~Jiahui_Yu1", "gender": "M;M;M;M;M;F", "homepage": "https://hxyou.github.io/;https://www.zhecanwang.com/;http://kwchang.net;https://research.google/people/jasonbaldridge/?&type=google;http://jiahuiyu.com/;", "dblp": "210/2628;167/4251;18/2428;90/6617;185/1060;", "google_scholar": "BhysChMAAAAJ;uqHPnmgAAAAJ;fqDBtzYAAAAJ;TP_JZm8AAAAJ;-CLCMk4AAAAJ;qOiCKewAAAAJ", "orcid": ";0009-0003-7785-4637;0000-0001-5365-0072;;;", "linkedin": ";jameszhecanwang/;kai-wei-chang-41239040;jason-baldridge-9b26295/;jiahuiyuu/;", "or_profile": "~Haoxuan_You1;~Zhecan_Wang2;~Kai-Wei_Chang1;~Jason_Michael_Baldridge1;~Jiahui_Yu1;~Xiaoyue_Guo1", "aff": "Columbia University;Columbia University;Amazon;Google;Google Brain;", "aff_domain": "columbia.edu;columbia.edu;amazon.com;google.com;google.com;", "position": "PhD student;PhD student;Researcher;Research Scientist;Research Scientist;", "bibtex": "@inproceedings{\nyou2024cobit,\ntitle={Co{BIT}: A Contrastive Bi-directional Image-Text Generation Model},\nauthor={Haoxuan You and Mandy Guo and Zhecan Wang and Kai-Wei Chang and Jason Michael Baldridge and Jiahui Yu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8ISRqgtjPc}\n}", "github": "", "project": "", "reviewers": "GAkS;vaSs;PXu6;wSdD;Ui8A", "pdf_size": 3360100, "rating": "5;6;6;8;8", "confidence": "4;4;5;5;4", "soundness": "3;3;2;4;4", "contribution": "2;3;3;2;3", "presentation": "3;3;3;4;2", "wc_summary": "70;140;47;68;123", "wc_strengths": "18;111;72;66;148", "wc_weaknesses": "192;150;476;107;198", "wc_questions": "51;82;19;20;1", "wc_review": "331;483;614;261;470", "wc_reply_reviewers": "0;0;15;0;248", "wc_reply_authors": "1132;683;845;571;874", "reply_reviewers": "0;0;1;0;1", "reply_authors": "3;2;3;2;2", "rating_avg": [ 6.6, 1.2 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 89.6, 35.556152772762125 ], "wc_strengths_avg": [ 83.0, 43.91810560577494 ], "wc_weaknesses_avg": [ 224.6, 129.89780598609045 ], "wc_questions_avg": [ 34.6, 28.639832401744254 ], "wc_review_avg": [ 431.8, 123.7827128479579 ], "wc_reply_reviewers_avg": [ 52.6, 97.87257021249621 ], "wc_reply_authors_avg": [ 821.0, 190.60430215501432 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2721655269759087, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14505239298930891113&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=8ISRqgtjPc", "pdf": "https://openreview.net/pdf?id=8ISRqgtjPc", "email": "columbia.edu;columbia.edu;amazon.com;google.com;google.com;", "author_num": 6, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "Columbia University;Amazon;Google", "aff_unique_dep": ";Amazon.com, Inc.;Google", "aff_unique_url": "https://www.columbia.edu;https://www.amazon.com;https://www.google.com", "aff_unique_abbr": "Columbia;Amazon;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "8Itp6Axs9Z", "title": "SelfDreamer: Dual-Prototypical Regularization for Frame-masked Model-based Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In the realm of reinforcement learning (RL), the conventional approach involves\ntraining agents in unknown environments using extensive experiences comprising\nhigh-dimensional state representations (typically images), actions, and rewards.\nHowever, this standard setup imposes substantial data transmission overhead in\nscenarios where edge devices are employed for data collection, and cloud servers\nare utilized for model training. This paper introduces a novel paradigm termed\n\u201dframe-masked RL,\u201d which is devised to enhance data efficiency while examining the impact on existing methods. Concurrently, we introduce a model-based\nalgorithm, \u201dSelfDreamer,\u201d tailored to mitigate the information loss incurred due\nto frame masking. SelfDreamer leverages action-transition dual prototypes to embed action information within the world model and align the hidden states in the\nrepresentation space. Empirical evaluations reveal that SelfDreamer consistently\noutperforms state-of-the-art methods across six continuous control tasks sourced\nfrom the DeepMind Control Suite, demonstrating superior or comparable performance while utilizing only half of the observations from the environment.", "keywords": "reinforcement learning;prototypical learning;deep learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/d3a5594c17942719049a62ab3dcc99bd2be9bbb2.zip", "author": "Wei Ding;Ming-Syan Chen", "authorids": "~Wei_Ding6;~Ming-Syan_Chen2", "gender": "M;M", "homepage": ";https://arbor.ee.ntu.edu.tw/~mschen", "dblp": ";c/MingSyanChen", "google_scholar": "https://scholar.google.com.tw/citations?user=DS7DBXgAAAAJ;KTmCrFkAAAAJ", "orcid": ";0000-0002-0711-8197", "linkedin": "wei-ding-7b0829260/;", "or_profile": "~Wei_Ding6;~Ming-Syan_Chen2", "aff": "National Taiwan University;National Taiwan University", "aff_domain": "ntu.edu.tw;ntu.edu", "position": "PhD student;Full Professor", "bibtex": "@misc{\nding2024selfdreamer,\ntitle={SelfDreamer: Dual-Prototypical Regularization for Frame-masked Model-based Reinforcement Learning},\nauthor={Wei Ding and Ming-Syan Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=8Itp6Axs9Z}\n}", "github": "", "project": "", "reviewers": "Ngz6;GHX9;Euds;CGK5", "site": "https://openreview.net/forum?id=8Itp6Axs9Z", "pdf_size": 411206, "rating": "3;3;5;5", "confidence": "5;4;4;5", "soundness": "2;2;2;3", "contribution": "2;2;2;2", "presentation": "2;1;3;3", "wc_summary": "105;143;47;70", "wc_strengths": "46;78;48;110", "wc_weaknesses": "196;518;465;271", "wc_questions": "258;347;78;95", "wc_review": "605;1086;638;546", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 91.25, 36.32062086473743 ], "wc_strengths_avg": [ 70.5, 26.091186251299497 ], "wc_weaknesses_avg": [ 362.5, 133.02349416550447 ], "wc_questions_avg": [ 194.5, 112.65100976023251 ], "wc_review_avg": [ 718.75, 214.57792873452757 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DgXQSSOGPG8J:scholar.google.com/&scioq=SelfDreamer:+Dual-Prototypical+Regularization+for+Frame-masked+Model-based+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "8JCn0kmS8W", "title": "WavJourney: Compositional Audio Creation with Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite breakthroughs in audio generation models, their capabilities are often confined to domain-specific conditions such as speech transcriptions and audio captions. However, real-world audio creation aims to generate harmonious audio containing various elements such as speech, music, and sound effects with controllable conditions, which is challenging to address using existing audio generation systems. We present WavJourney, a novel framework that leverages Large Language Models (LLMs) to connect various audio models for audio creation. WavJourney allows users to create storytelling audio content with diverse audio elements simply from textual descriptions. Specifically, given a text instruction, WavJourney first prompts LLMs to generate an audio script that serves as a structured semantic representation of audio elements. The audio script is then converted into a computer program, where each line of the program calls a task-specific audio generation model or computational operation function. The computer program is then executed to obtain a compositional and interpretable solution for audio creation. Experimental results suggest that WavJourney is capable of synthesizing realistic audio aligned with textually-described semantic, spatial and temporal conditions, achieving state-of-the-art results on text-to-audio generation benchmarks. Additionally, we introduce a new multi-genre story benchmark. Subjective evaluations demonstrate the potential of WavJourney in crafting engaging storytelling audio content from text. We further demonstrate that WavJourney can facilitate human-machine co-creation in multi-round dialogues. To foster future research, the code and synthesized audio are available at: https://anonymous.4open.science/w/WavJourney_Anonymous/.", "keywords": "Audio Generation;Audio Synthesis;Large Language Models (LLMs);AIGC;Computational Creativity", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Xubo Liu;Zhongkai Zhu;Haohe Liu;Yi Yuan;Meng Cui;Qiushi Huang;Jinhua Liang;Yin Cao;Qiuqiang Kong;Mark D Plumbley;Wenwu Wang", "authorids": "~Xubo_Liu1;~Zhongkai_Zhu1;~Haohe_Liu2;~Yi_Yuan2;~Meng_Cui1;~Qiushi_Huang1;~Jinhua_Liang1;~Yin_Cao1;~Qiuqiang_Kong1;~Mark_D_Plumbley1;~Wenwu_Wang1", "gender": "M;M;M;M;M;M;M;M;M;;M", "homepage": "https://liuxubo717.github.io/;;https://haoheliu.github.io/;https://www.surrey.ac.uk/people/yi-yuan;https://www.surrey.ac.uk/people/meng-cui;;https://jinhualiang.github.io/;;https://qiuqiangkong.github.io/;https://www.surrey.ac.uk/people/mark-plumbley;http://personal.ee.surrey.ac.uk/Personal/W.Wang/", "dblp": "235/1970/;;272/5570;;35/4727;204/2933;;;;84/1168;https://dblp.org/pers/hd/w/Wang:Wenwu", "google_scholar": "-OlNYSgAAAAJ;CcqM9L4AAAAJ;g3O4lJMAAAAJ;;https://scholar.google.com/citations?view_op=list_works;F_yGB9sAAAAJ;mDVXe7sAAAAJ;J9edRm4AAAAJ;;28TCymYAAAAJ;https://scholar.google.co.uk/citations?user=JQFnV5IAAAAJ", "orcid": ";;0000-0003-1036-7888;;;;0000-0002-4570-0735;;;0000-0002-9708-1075;", "linkedin": ";;haohe-liu-4483a71a4/;;;;jinhua-l1ang/;;;;https://uk.linkedin.com/in/wenwu", "or_profile": "~Xubo_Liu1;~Zhongkai_Zhu1;~Haohe_Liu2;~Yi_Yuan2;~Meng_Cui1;~Qiushi_Huang1;~Jinhua_Liang1;~Yin_Cao1;~Qiuqiang_Kong1;~Mark_D_Plumbley1;~Wenwu_Wang1", "aff": "University of Surrey;;Meta Facebook;University of Surrey;University of Surrey;University of Surrey;Queen Mary University of London;;;University of Surrey;University of Surrey", "aff_domain": "surrey.ac.uk;;meta.com;surrey.ac.uk;surrey.ac.uk;surrey.ac.uk;qmul.ac.uk;;;surrey.ac.uk;surrey.ac.uk", "position": "PhD student;;Intern;PhD student;PhD student;PhD student;PhD student;;;Full Professor;Full Professor", "bibtex": "@misc{\nliu2024wavjourney,\ntitle={WavJourney: Compositional Audio Creation with Large Language Models},\nauthor={Xubo Liu and Zhongkai Zhu and Haohe Liu and Yi Yuan and Meng Cui and Qiushi Huang and Jinhua Liang and Yin Cao and Qiuqiang Kong and Mark D Plumbley and Wenwu Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=8JCn0kmS8W}\n}", "github": "", "project": "", "reviewers": "xGho;HBpU;koFK;jF76;iM1V", "site": "https://openreview.net/forum?id=8JCn0kmS8W", "pdf_size": 13661954, "rating": "3;5;5;6;6", "confidence": "3;3;4;3;3", "soundness": "2;2;3;2;2", "contribution": "2;2;2;2;3", "presentation": "3;3;3;3;2", "wc_summary": "86;225;83;163;226", "wc_strengths": "4;107;33;61;76", "wc_weaknesses": "185;337;149;125;157", "wc_questions": "100;29;30;2;114", "wc_review": "375;698;295;351;573", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "679;718;514;538;436", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 156.6, 63.14617961523879 ], "wc_strengths_avg": [ 56.2, 35.38022046285184 ], "wc_weaknesses_avg": [ 190.6, 75.66663729808535 ], "wc_questions_avg": [ 55.0, 43.85430423573039 ], "wc_review_avg": [ 458.4, 152.13888391860905 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 577.0, 105.50450227359968 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3696469140543305659&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0;0;2;0;0", "aff_unique_norm": "University of Surrey;Meta;Queen Mary University of London", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.surrey.ac.uk;https://meta.com;https://www.qmul.ac.uk", "aff_unique_abbr": "Surrey;Meta;QMUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;1;0;0;0;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "8JKZZxJAZ3", "title": "Nonnegative Matrix Factorization through Canonical Edges", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper we present a novel approach to nonnegative matrix factorization (NMF) by introducing the concept of nonnegative canonical edges (NCEs). These NCEs are intersections of the principal subspace containing the data to be factored with canonical faces of the nonnegative orthant. Through this lens, our approach yields a closed-form solution to the special NMF case where (at least one of) the factors are required to be orthogonal. In the general NMF case, NCEs provide a deterministic optimal solution whenever the data resides within or in proximity to the cone formed by the NCEs. Furthermore, NCEs provide an improved initialization for classical NMF methods in general. Despite these advancements, numerous fundamental questions regarding NCEs in the context of NMF remain unexplored, offering exciting avenues for future research.", "keywords": "nonnegative matrix factorization;orthogonal", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/741f884deb358c33aa8f1fdfa8cd4d5edf39c9c8.zip", "author": "Daniel L. Pimentel-Alarc\u00f3n", "authorids": "~Daniel_L._Pimentel-Alarc\u00f3n1", "gender": "", "homepage": "https://danielpimentel.github.io/", "dblp": "150/6256", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Daniel_L._Pimentel-Alarc\u00f3n1", "aff": "University of Wisconsin, Madison", "aff_domain": "wisc.edu", "position": "Assistant Professor", "bibtex": "@misc{\npimentel-alarc{\\'o}n2024nonnegative,\ntitle={Nonnegative Matrix Factorization through Canonical Edges},\nauthor={Daniel L. Pimentel-Alarc{\\'o}n},\nyear={2024},\nurl={https://openreview.net/forum?id=8JKZZxJAZ3}\n}", "github": "", "project": "", "reviewers": "qZPt;bRYi;VKXr", "site": "https://openreview.net/forum?id=8JKZZxJAZ3", "pdf_size": 3373057, "rating": "3;5;5", "confidence": "4;3;3", "soundness": "2;2;2", "contribution": "1;2;2", "presentation": "1;2;1", "wc_summary": "78;106;33", "wc_strengths": "13;51;15", "wc_weaknesses": "331;121;127", "wc_questions": "3;26;54", "wc_review": "425;304;229", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 72.33333333333333, 30.070288030250428 ], "wc_strengths_avg": [ 26.333333333333332, 17.46106780494506 ], "wc_weaknesses_avg": [ 193.0, 97.61147473529944 ], "wc_questions_avg": [ 27.666666666666668, 20.8539897594894 ], "wc_review_avg": [ 319.3333333333333, 80.74789298936675 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BR1mSqziwU8J:scholar.google.com/&scioq=Nonnegative+Matrix+Factorization+through+Canonical+Edges&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Wisconsin", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "lpNTK: Better Generalisation with Less Data via Sample Interaction During Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19330", "id": "8Ju0VmvMCW", "author_site": "Shangmin Guo, YI REN, Stefano Albrecht, Kenny Smith", "tldr": "", "abstract": "Although much research has been done on proposing new models or loss functions to improve the generalisation of artificial neural networks (ANNs), less attention has been directed to the impact of the training data on generalisation. In this work, we start from approximating the interaction between samples, i.e. how learning one sample would modify the model's prediction on other samples. Through analysing the terms involved in weight updates in supervised learning, we find that labels influence the interaction between samples. Therefore, we propose the labelled pseudo Neural Tangent Kernel (lpNTK) which takes label information into consideration when measuring the interactions between samples. We first prove that lpNTK asymptotically converges to the empirical neural tangent kernel in terms of the Frobenius norm under certain assumptions. Secondly, we illustrate how lpNTK helps to understand learning phenomena identified in previous work, specifically the learning difficulty of samples and forgetting events during learning. Moreover, we also show that using lpNTK to identify and remove poisoning training samples does not hurt the generalisation performance of ANNs.", "keywords": "generalisation;data selection;neural tangent kernel;sample interaction;learning dynamics", "primary_area": "learning theory", "supplementary_material": "", "author": "Shangmin Guo;Yi Ren;Stefano V Albrecht;Kenny Smith", "authorids": "~Shangmin_Guo1;~Yi_Ren6;~Stefano_V_Albrecht1;~Kenny_Smith1", "gender": "M;M;;M", "homepage": ";https://joshua-ren.github.io/;https://agents-lab.org/stefano-albrecht/;http://www.ling.ed.ac.uk/~kenny", "dblp": "183/0949;;118/3975;58/6224", "google_scholar": "cpOrbSoAAAAJ;5QNce38AAAAJ;https://scholar.google.co.uk/citations?user=ceSFqCcAAAAJ;", "orcid": "0000-0003-1716-0994;;0000-0002-8735-1465;0000-0002-4530-6914", "linkedin": ";;;", "or_profile": "~Shangmin_Guo1;~Yi_Ren6;~Stefano_V_Albrecht1;~Kenny_Smith1", "aff": "University of Edinburgh;University of British Columbia;University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;ubc.ca;ed.ac.uk;ed.ac.uk", "position": "PhD student;PhD student;Associate Professor;Professor", "bibtex": "@inproceedings{\nguo2024lpntk,\ntitle={lp{NTK}: Better Generalisation with Less Data via Sample Interaction During Learning},\nauthor={Shangmin Guo and Yi Ren and Stefano V Albrecht and Kenny Smith},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8Ju0VmvMCW}\n}", "github": "", "project": "", "reviewers": "4TQg;3vgc;9biR", "pdf_size": 3163503, "rating": "6;6;8", "confidence": "4;2;3", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "2;2;3", "wc_summary": "73;93;106", "wc_strengths": "58;139;80", "wc_weaknesses": "254;172;202", "wc_questions": "4;55;87", "wc_review": "389;459;475", "wc_reply_reviewers": "31;0;55", "wc_reply_authors": "938;907;957", "reply_reviewers": "1;0;1", "reply_authors": "3;2;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 90.66666666666667, 13.572848714334887 ], "wc_strengths_avg": [ 92.33333333333333, 34.198765409814946 ], "wc_weaknesses_avg": [ 209.33333333333334, 33.875589375766666 ], "wc_questions_avg": [ 48.666666666666664, 34.179265969622904 ], "wc_review_avg": [ 441.0, 37.345236197762446 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 22.51419305435771 ], "wc_reply_authors_avg": [ 934.0, 20.607442021431645 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11154955795369655285&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=8Ju0VmvMCW", "pdf": "https://openreview.net/pdf?id=8Ju0VmvMCW", "email": "ed.ac.uk;ubc.ca;ed.ac.uk;ed.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Edinburgh;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.ubc.ca", "aff_unique_abbr": "Edinburgh;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Canada" }, { "id": "8LBS1nixTJ", "title": "HashOrder: Accelerating Graph Processing Through Hashing-based Reordering", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph processing systems are a fundamental tool across various domains such as machine learning, and their efficiency has become increasingly crucial due to the rapid growth in data volume. A major bottleneck in graph processing systems is poor cache utilization. Graph reordering techniques can mitigate this bottleneck and significantly speed up graph workloads by improving the data locality of the graph memory layout. However, since existing approaches use greedy algorithms or simple heuristics to find good orderings, they suffer from either high computational overhead or suboptimal ordering quality. To this end, we propose HashOrder, a probabilistic algorithm for graph reordering based on randomized hashing. We theoretically show that hashing-based orderings have quality guarantees under reasonable assumptions. HashOrder produces high-quality orderings while being lightweight and parallelizable. We empirically show that HashOrder beats the efficiency-quality tradeoff curve of existing algorithms. Evaluations on various graph processing workloads and GNN data loaders reveal that HashOrder is competitive with or outperforms the existing best method while being 592$\\times$ more efficient in reordering, speeding up PageRank by up to 2.49$\\times$ and GNN data loaders by up to 2.33$\\times$.", "keywords": "graph processing;graph reordering;efficiency;hashing", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Tianyi Zhang;Aditya Desai;Gaurav Gupta;Anshumali Shrivastava", "authorids": "~Tianyi_Zhang6;~Aditya_Desai1;~Gaurav_Gupta6;~Anshumali_Shrivastava1", "gender": "M;M;M;M", "homepage": "https://github.com/tonyzhang617;https://gaurav16gupta.github.io/;https://www.cs.rice.edu/~as143/;https://apd10.github.io/", "dblp": "17/322-11.html;;63/9828;18/8339", "google_scholar": "ekRl428AAAAJ;;https://scholar.google.com.tw/citations?user=SGT23RAAAAAJ;ymdbDZwAAAAJ", "orcid": ";;;0009-0002-9111-9391", "linkedin": ";;;aditya-desai-ai/", "or_profile": "~Tianyi_Zhang6;~Gaurav_Gupta6;~Anshumali_Shrivastava1;~Adity_Desai1", "aff": "Rice University;;ThirdAI Corp.;Rice University", "aff_domain": "rice.edu;;thirdai.com;rice.edu", "position": "PhD student;;CEO;PhD student", "bibtex": "@misc{\nzhang2024hashorder,\ntitle={HashOrder: Accelerating Graph Processing Through Hashing-based Reordering},\nauthor={Tianyi Zhang and Aditya Desai and Gaurav Gupta and Anshumali Shrivastava},\nyear={2024},\nurl={https://openreview.net/forum?id=8LBS1nixTJ}\n}", "github": "", "project": "", "reviewers": "EBBV;KkKE;3s87;L3q7;M1A7;n1d2", "site": "https://openreview.net/forum?id=8LBS1nixTJ", "pdf_size": 2025142, "rating": "3;5;5;6;6;8", "confidence": "4;3;4;3;3;4", "soundness": "2;3;2;3;2;3", "contribution": "1;2;3;3;2;3", "presentation": "3;3;3;3;2;3", "wc_summary": "133;50;58;75;40;80", "wc_strengths": "65;90;77;26;28;62", "wc_weaknesses": "147;58;352;103;188;46", "wc_questions": "79;20;13;29;18;41", "wc_review": "424;218;500;233;274;229", "wc_reply_reviewers": "297;198;0;0;65;29", "wc_reply_authors": "505;437;538;476;1055;381", "reply_reviewers": "1;2;0;0;1;1", "reply_authors": "2;3;1;1;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.3333333333333335, 0.7453559924999298 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 72.66666666666667, 30.263656237951302 ], "wc_strengths_avg": [ 58.0, 23.713568549109883 ], "wc_weaknesses_avg": [ 149.0, 103.06955580254207 ], "wc_questions_avg": [ 33.333333333333336, 22.320892057044276 ], "wc_review_avg": [ 313.0, 109.00458705944443 ], "wc_reply_reviewers_avg": [ 98.16666666666667, 111.54881841099389 ], "wc_reply_authors_avg": [ 565.3333333333334, 224.5697120173501 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 1.8333333333333333, 0.6871842709362768 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.11111111111111109, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6449314624127255120&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Rice University;ThirdAI Corp.", "aff_unique_dep": ";", "aff_unique_url": "https://www.rice.edu;", "aff_unique_abbr": "Rice;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fast-ELECTRA for Efficient Pre-training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19329", "id": "8OBuqbLb8h", "author_site": "Chengyu Dong, Liyuan Liu, Hao Cheng, Jingbo Shang, Jianfeng Gao, Xiaodong Liu", "tldr": "", "abstract": "ELECTRA pre-trains language models by detecting tokens in a sequence that have been replaced by an auxiliary model. Although ELECTRA offers a significant boost in efficiency, its potential is constrained by the training cost brought by the auxiliary model. Notably, this model, which is jointly trained with the main model, only serves to assist the training of the main model and is discarded post-training. This results in a substantial amount of training cost being expended in vain. To mitigate this issue, we propose Fast-ELECTRA, which leverages an existing language model as the auxiliary model. To construct a learning curriculum for the main model, we smooth its output distribution via temperature scaling following a descending schedule. Our approach rivals the performance of state-of-the-art ELECTRA-style pre-training methods, while significantly eliminating the computation and memory cost brought by the joint training of the auxiliary model. Our method also reduces the sensitivity to hyper-parameters and enhances the pre-training stability.", "keywords": "Language model Pre-training;ELECTRA;Efficiency", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Chengyu Dong;Liyuan Liu;Hao Cheng;Jingbo Shang;Jianfeng Gao;Xiaodong Liu", "authorids": "~Chengyu_Dong1;~Liyuan_Liu3;~Hao_Cheng4;~Jingbo_Shang2;~Jianfeng_Gao1;~Xiaodong_Liu1", "gender": ";M;M;M;;M", "homepage": "https://www.chengyu-dong.me/;https://sites.google.com/site/hcheng2site/Home;https://shangjingbo1226.github.io/;https://www.microsoft.com/en-us/research/people/jfgao/;;https://liyuanlucasliu.github.io/", "dblp": "14/3155;09/5158-2;151/3145.html;92/5339;65/622;06/1624", "google_scholar": "Ppfi7j0AAAAJ;https://scholar.google.com/citations?hl=en;0SkFI4MAAAAJ;https://scholar.google.com/citations?hl=en;NIewcxMAAAAJ;RmvbkzYAAAAJ", "orcid": ";0000-0001-7988-3149;;;;", "linkedin": ";;;;;", "or_profile": "~Chengyu_Dong1;~Hao_Cheng4;~Jingbo_Shang2;~Jianfeng_Gao1;~Xiaodong_Liu1;~Liyuan_Liu1", "aff": "University of California, San Diego;Microsoft Research;University of California, San Diego;Microsoft Research;Microsoft Research;University of Illinois, Urbana Champaign", "aff_domain": "ucsd.edu;microsoft.com;ucsd.edu;microsoft.com;microsoft.com;illinois.edu", "position": "PhD student;Researcher;Assistant Professor;Principal Researcher;Researcher;PhD student", "bibtex": "@inproceedings{\ndong2024fastelectra,\ntitle={Fast-{ELECTRA} for Efficient Pre-training},\nauthor={Chengyu Dong and Liyuan Liu and Hao Cheng and Jingbo Shang and Jianfeng Gao and Xiaodong Liu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8OBuqbLb8h}\n}", "github": "", "project": "", "reviewers": "QMNg;rs11;dT26", "pdf_size": 418386, "rating": "6;6;6", "confidence": "3;4;4", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "2;4;3", "wc_summary": "30;47;209", "wc_strengths": "16;12;85", "wc_weaknesses": "46;19;235", "wc_questions": "2;2;76", "wc_review": "94;80;605", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "322;125;1156", "reply_reviewers": "0;0;0", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 95.33333333333333, 80.67355342503652 ], "wc_strengths_avg": [ 37.666666666666664, 33.50953429829918 ], "wc_weaknesses_avg": [ 100.0, 96.0937042682818 ], "wc_questions_avg": [ 26.666666666666668, 34.883934538536344 ], "wc_review_avg": [ 259.6666666666667, 244.2544210904323 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 534.3333333333334, 446.88129171950004 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6732446403804894749&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=8OBuqbLb8h", "pdf": "https://openreview.net/pdf?id=8OBuqbLb8h", "email": "ucsd.edu;microsoft.com;ucsd.edu;microsoft.com;microsoft.com;illinois.edu", "author_num": 6, "aff_unique_index": "0;1;0;1;1;2", "aff_unique_norm": "University of California, San Diego;Microsoft;University of Illinois Urbana-Champaign", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.ucsd.edu;https://www.microsoft.com/en-us/research;https://illinois.edu", "aff_unique_abbr": "UCSD;MSR;UIUC", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "San Diego;;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MgNO: Efficient Parameterization of Linear Operators via Multigrid", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19328", "id": "8OxL034uEr", "author_site": "Juncai He, Xinliang Liu, Jinchao Xu", "tldr": "", "abstract": "In this work, we propose a concise neural operator architecture for operator learning. Drawing an analogy with a conventional fully connected neural network, we define the neural operator as follows: the output of the $i$-th neuron in a nonlinear operator layer is defined by $\\mathcal O_i(u) = \\sigma\\left( \\sum_j \\mathcal W_{ij} u + \\mathcal B_{ij}\\right)$. Here, $\\mathcal W_{ij}$ denotes the bounded linear operator connecting $j$-th input neuron to $i$-th output neuron, and the bias $\\mathcal B_{ij}$ takes the form of a function rather than a scalar. Given its new universal approximation property, the efficient parameterization of the bounded linear operators between two neurons (Banach spaces) plays a critical role. As a result, we introduce MgNO, utilizing multigrid structures to parameterize these linear operators between neurons. This approach offers both mathematical rigor and practical expressivity. Additionally, MgNO obviates the need for conventional lifting and projecting operators typically required in previous neural operators. Moreover, it seamlessly accommodates diverse boundary conditions. Our empirical observations reveal that MgNO exhibits superior ease of training compared to CNN-based models, while also displaying a reduced susceptibility to overfitting when contrasted with spectral-type neural operators. We demonstrate the efficiency and accuracy of our method with consistently state-of-the-art performance on different types of partial differential equations (PDEs).", "keywords": "neural operator; multigrid; universal approximation; boundary condition", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/a7015fd3efce1ca846b507d9c03a7562ff5a1fce.zip", "author": "Juncai He;Xinliang Liu;Jinchao Xu", "authorids": "~Juncai_He1;~Xinliang_Liu1;~Jinchao_Xu1", "gender": "M;M;M", "homepage": "https://juncaihe.github.io;https://cemse.kaust.edu.sa/scml/people/person/xinliang-liu;https://www.personal.psu.edu/jxx1/", "dblp": "223/4286;67/10364;", "google_scholar": "CG5GBW0AAAAJ;9AsSTc4AAAAJ;pBHiYxcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Juncai_He1;~Xinliang_Liu1;~Jinchao_Xu1", "aff": "King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;Pennsylvania State University", "aff_domain": "kaust.edu.sa;kaust.edu.sa;psu.edu", "position": "Researcher;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhe2024mgno,\ntitle={Mg{NO}: Efficient Parameterization of Linear Operators via Multigrid},\nauthor={Juncai He and Xinliang Liu and Jinchao Xu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8OxL034uEr}\n}", "github": "", "project": "", "reviewers": "4o6c;RPM7;H6UK;5jaY", "pdf_size": 3022526, "rating": "6;6;6;8", "confidence": "3;2;2;4", "soundness": "3;3;3;4", "contribution": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "75;52;120;73", "wc_strengths": "24;57;92;91", "wc_weaknesses": "14;91;12;100", "wc_questions": "57;56;38;36", "wc_review": "170;256;262;300", "wc_reply_reviewers": "0;12;0;0", "wc_reply_authors": "321;1095;650;954", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.0, 24.78911051248108 ], "wc_strengths_avg": [ 66.0, 28.044607324760317 ], "wc_weaknesses_avg": [ 54.25, 41.37858745776612 ], "wc_questions_avg": [ 46.75, 9.781998773256925 ], "wc_review_avg": [ 247.0, 47.54997371187496 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 755.0, 297.73394163245814 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4854488223594303538&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "openreview": "https://openreview.net/forum?id=8OxL034uEr", "pdf": "https://openreview.net/pdf?id=8OxL034uEr", "email": "kaust.edu.sa;kaust.edu.sa;psu.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "King Abdullah University of Science and Technology;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kast.kau.edu.sa;https://www.psu.edu", "aff_unique_abbr": "KAUST;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Saudi Arabia;United States" }, { "title": "R&B: Region and Boundary Aware Zero-shot Grounded Text-to-image Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19327", "id": "8Q4uVOJ5bX", "author_site": "Jiayu Xiao, Henglei Lv, Liang Li, Shuhui Wang, Qingming Huang", "tldr": "", "abstract": "Recent text-to-image (T2I) diffusion models have achieved remarkable progress in generating high-quality images given text-prompts as input. However, these models fail to convey appropriate spatial composition specified by a layout instruction. In this work, we probe into zero-shot grounded T2I generation with diffusion models, that is, generating images corresponding to the input layout information without training auxiliary modules or finetuning diffusion models. We propose a **R**egion and **B**oundary (R&B) aware cross-attention guidance approach that gradually modulates the attention maps of diffusion model during generative process, and assists the model to synthesize images (1) with high fidelity, (2) highly compatible with textual input, and (3) interpreting layout instructions accurately. Specifically, we leverage the discrete sampling to bridge the gap between consecutive attention maps and discrete layout constraints, and design a region-aware loss to refine the generative layout during diffusion process. We further propose a boundary-aware loss to strengthen object discriminability within the corresponding regions. Experimental results show that our method outperforms existing state-of-the-art zero-shot grounded T2I generation methods by a large margin both qualitatively and quantitatively on several benchmarks. \nProject page: https://sagileo.github.io/Region-and-Boundary.", "keywords": "Stable Diffusion;training-free;grounded text-to-image generation;controllable generation", "primary_area": "generative models", "supplementary_material": "/attachment/c8db5295ecda49880047fa64f1d9cfd900352249.zip", "author": "Jiayu Xiao;Henglei Lv;Liang Li;Shuhui Wang;Qingming Huang", "authorids": "~Jiayu_Xiao1;~Henglei_Lv1;~Liang_Li3;~Shuhui_Wang1;~Qingming_Huang1", "gender": "M;M;M;M;", "homepage": ";https://github.com/sagileo;http://www.ict.cas.cn/sourcedb_2018_ict_cas/cn/jssrck/201711/t20171114_4894220.html;https://vipl.ict.ac.cn/people/shwang/;https://qmhuang-ucas.github.io/", "dblp": ";;14/1395-3.html;37/2537;68/4388", "google_scholar": "8P4k11gAAAAJ;;Q-4mZnQAAAAJ;h-JxBSYAAAAJ;https://scholar.google.com.hk/citations?user=J1vMnRgAAAAJ", "orcid": ";;;0000-0002-5931-0527;", "linkedin": ";;;;", "or_profile": "~Jiayu_Xiao1;~Henglei_Lv1;~Liang_Li3;~Shuhui_Wang1;~Qingming_Huang2", "aff": "Institute of Computing Technology, University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ucas.ac.cn", "position": "PhD student;MS student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxiao2024rb,\ntitle={R\\&B: Region and Boundary Aware Zero-shot Grounded Text-to-image Generation},\nauthor={Jiayu Xiao and Henglei Lv and Liang Li and Shuhui Wang and Qingming Huang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8Q4uVOJ5bX}\n}", "github": "", "project": "", "reviewers": "F2Yp;cFE6;KFuf;Nptk", "pdf_size": 16579484, "rating": "6;6;6;6", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "contribution": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "116;78;78;113", "wc_strengths": "19;84;71;142", "wc_weaknesses": "110;214;280;109", "wc_questions": "67;64;21;124", "wc_review": "312;440;450;488", "wc_reply_reviewers": "57;0;101;30", "wc_reply_authors": "449;1113;1392;1379", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;3;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.25, 18.280795934531955 ], "wc_strengths_avg": [ 79.0, 43.754999714318366 ], "wc_weaknesses_avg": [ 178.25, 72.60294415517872 ], "wc_questions_avg": [ 69.0, 36.5991803186902 ], "wc_review_avg": [ 422.5, 66.26273462512697 ], "wc_reply_reviewers_avg": [ 47.0, 37.12815643147395 ], "wc_reply_authors_avg": [ 1083.25, 382.73775290660836 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6353490822039373341&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=8Q4uVOJ5bX", "pdf": "https://openreview.net/pdf?id=8Q4uVOJ5bX", "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ucas.ac.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8Q6UmFhhQS", "title": "Neural Network Diffusion", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Diffusion models have achieved remarkable success in image and video generation. In this work, we demonstrate that diffusion models can also generate high-performing neural network parameters. Our approach is simple, utilizing an autoencoder and a standard latent diffusion model. The autoencoder extracts the latent representation of trained model parameters. A diffusion model is then trained to synthesize these latent parameter representations from random noise. It then generates new representations that are passed through the autoencoder\u2019s decoder, whose outputs are ready to use as new sets of network parameters. Across various tasks and datasets, our diffusion process consistently generates models of comparable or improved performance over SGD-trained models, with minimal additional cost. Our results encourage more exploration on the versatile use of diffusion models.", "keywords": "Parameter Generation;Diffusion Model", "primary_area": "generative models", "supplementary_material": "/attachment/3f160407ba5c0438d03f45552e75133ba90740b8.zip", "author": "Kai Wang;xu Zhao Pan;Zhuang Liu;Zelin Zang;Trevor Darrell;Yang You", "authorids": "~Kai_Wang8;~xu_Zhao_Pan1;~Zhuang_Liu1;~Zelin_Zang2;~Trevor_Darrell2;~Yang_You1", "gender": "M;M;M;M;M;M", "homepage": "https://kaiwang960112.github.io/;;;https://www.comp.nus.edu.sg/~youy/;https://people.eecs.berkeley.edu/~trevor/;https://liuzhuang13.github.io/", "dblp": "78/2022-36;278/2033.html;226/7615;33/8167-1.html;d/TrevorDarrell;56/11346-3", "google_scholar": "i2II0XIAAAAJ;;foERjnQAAAAJ;jF4dPZwAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ;7OTD-LEAAAAJ", "orcid": "0000-0002-1154-5175;;;;;", "linkedin": ";;;yang-you-0b92914b/;;zhuang-liu-19306b1b1/", "or_profile": "~Kai_Wang8;~xu_Zhao_Pan1;~Zelin_Zang2;~Yang_You1;~trevor_darrell1;~Zhuang_Liu2", "aff": "National University of Singapore;Harbin Institute of Technology;National University of Singapore;National University of Singapore;Electrical Engineering & Computer Science Department;FAIR, Meta", "aff_domain": "u.nus.edu;hit.edu.cn;nus.edu.sg;nus.edu.sg;eecs.berkeley.edu;meta.com", "position": "PhD student;PhD student;Intern;Professor;Professor;Research Scientist", "bibtex": "@misc{\nwang2024neural,\ntitle={Neural Network Diffusion},\nauthor={Kai Wang and xu Zhao Pan and Zhuang Liu and Zelin Zang and Trevor Darrell and Yang You},\nyear={2024},\nurl={https://openreview.net/forum?id=8Q6UmFhhQS}\n}", "github": "", "project": "", "reviewers": "jEgU;GLkA;aG24", "site": "https://openreview.net/forum?id=8Q6UmFhhQS", "pdf_size": 6427964, "rating": "3;3;6", "confidence": "3;4;3", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "1;3;3", "wc_summary": "48;111;62", "wc_strengths": "33;60;43", "wc_weaknesses": "55;243;100", "wc_questions": "180;160;5", "wc_review": "316;574;210", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 73.66666666666667, 27.010286106510527 ], "wc_strengths_avg": [ 45.333333333333336, 11.145502331533658 ], "wc_weaknesses_avg": [ 132.66666666666666, 80.15124591809051 ], "wc_questions_avg": [ 115.0, 78.20912137766712 ], "wc_review_avg": [ 366.6666666666667, 152.86013941581442 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10519989663900027339&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0;2;3", "aff_unique_norm": "National University of Singapore;Harbin Institute of Technology;Electrical Engineering & Computer Science Department;Meta", "aff_unique_dep": ";;Electrical Engineering & Computer Science;Facebook AI Research (FAIR)", "aff_unique_url": "https://www.nus.edu.sg;http://www.hit.edu.cn/;;https://meta.com", "aff_unique_abbr": "NUS;HIT;;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1;0;0;3", "aff_country_unique": "Singapore;China;;United States" }, { "title": "Class Incremental Learning via Likelihood Ratio Based Task Prediction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19326", "id": "8QfK9Dq4q0", "author_site": "Haowei Lin, Yijia Shao, Weinan Qian, Ningxin Pan, Yiduo Guo, Bing Liu", "tldr": "", "abstract": "Class incremental learning (CIL) is a challenging setting of continual learning, which learns a series of tasks sequentially. Each task consists of a set of unique classes. The key feature of CIL is that no task identifier (or task-id) is provided at test time. Predicting the task-id for each test sample is a challenging problem. An emerging theory-guided approach (called TIL+OOD) is to train a task-specific model for each task in a shared network for all tasks based on a task-incremental learning (TIL) method to deal with catastrophic forgetting. The model for each task is an out-of-distribution (OOD) detector rather than a conventional classifier. The OOD detector can perform both within-task (in-distribution (IND)) class prediction and OOD detection. The OOD detection capability is the key to task-id prediction during inference. However, this paper argues that using a traditional OOD detector for task-id prediction is sub-optimal because additional information (e.g., the replay data and the learned tasks) available in CIL can be exploited to design a better and principled method for task-id prediction. We call the new method TPL (Task-id Prediction based on Likelihood Ratio). TPL markedly outperforms strong CIL baselines and has negligible catastrophic forgetting. The code of TPL is publicly available at https://github.com/linhaowei1/TPL.", "keywords": "Class Incremental Learning;Continual Learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Haowei Lin;Yijia Shao;Weinan Qian;Ningxin Pan;Yiduo Guo;Bing Liu", "authorids": "~Haowei_Lin1;~Yijia_Shao1;~Weinan_Qian1;~Ningxin_Pan1;~Yiduo_Guo2;~Bing_Liu1", "gender": "M;F;M;F;M;M", "homepage": "https://linhaowei1.github.io/;https://cs.stanford.edu/~shaoyj/;https://github.com/SouthwestWindQ;https://github.com/pnx2003;https://www.cs.uic.edu/~liub/;https://github.com/gydpku", "dblp": "235/2798;329/4063;;;l/BingLiu1.html;196/5954.html", "google_scholar": "Ng-DmJgAAAAJ;H0zcQh4AAAAJ;;;Kt1bjZoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0009-0006-9809-4835;;;;;", "linkedin": ";;;;;", "or_profile": "~Haowei_Lin1;~Yijia_Shao1;~Weinan_Qian1;~Ningxin_Pan1;~Bing_Liu1;~Yiduo_GUO1", "aff": "Peking University;Computer Science Department, Stanford University;Peking University;Peking University;University of Illinois at Chicago;Peking University", "aff_domain": "pku.edu.cn;cs.stanford.edu;pku.edu.cn;pku.edu.cn;uic.edu;pku.edu.cn", "position": "PhD student;PhD student;Undergrad student;Undergrad student;Full Professor;PhD student", "bibtex": "@inproceedings{\nlin2024class,\ntitle={Class Incremental Learning via Likelihood Ratio Based Task Prediction},\nauthor={Haowei Lin and Yijia Shao and Weinan Qian and Ningxin Pan and Yiduo Guo and Bing Liu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8QfK9Dq4q0}\n}", "github": "", "project": "", "reviewers": "aMSi;SU84;fLUE;h9ds", "pdf_size": 2115954, "rating": "5;5;6;8", "confidence": "4;4;4;3", "soundness": "3;3;3;4", "contribution": "2;3;2;3", "presentation": "1;3;3;3", "wc_summary": "67;99;71;185", "wc_strengths": "71;52;58;165", "wc_weaknesses": "359;103;80;40", "wc_questions": "4;6;371;92", "wc_review": "501;260;580;482", "wc_reply_reviewers": "297;0;201;0", "wc_reply_authors": "1802;1052;2042;353", "reply_reviewers": "1;0;1;0", "reply_authors": "3;3;4;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 105.5, 47.52630850381713 ], "wc_strengths_avg": [ 86.5, 45.83939353874569 ], "wc_weaknesses_avg": [ 145.5, 125.30861901720887 ], "wc_questions_avg": [ 118.25, 150.18717488520784 ], "wc_review_avg": [ 455.75, 118.84101775060662 ], "wc_reply_reviewers_avg": [ 124.5, 129.04359728401872 ], "wc_reply_authors_avg": [ 1312.25, 663.3778617801471 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6922209428065594215&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=8QfK9Dq4q0", "pdf": "https://openreview.net/pdf?id=8QfK9Dq4q0", "email": "pku.edu.cn;cs.stanford.edu;pku.edu.cn;pku.edu.cn;uic.edu;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Peking University;Stanford University;University of Illinois at Chicago", "aff_unique_dep": ";Computer Science Department;", "aff_unique_url": "http://www.pku.edu.cn;https://www.stanford.edu;https://www.uic.edu", "aff_unique_abbr": "Peking U;Stanford;UIC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Chicago", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "8S14xeFQAY", "title": "Segmenting the Unknown: Discrete Diffusion Models for Non-Deterministic Segmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Safety critical applications of deep-learning require models able to handle ambiguity and uncertainty.\nWe introduce discrete diffusion models to capture uncertainty in semantic segmentation, with application in both oncology and autonomous driving.\nUnlike prior approaches that tackle these tasks in distinct ways, we formulate both as estimating a complex posterior distribution over images, and present a unified solution that leverages the discrete diffusion framework.\nOur contributions include the adaptation of discrete diffusion for semantic segmentation to model uncertainty and the introduction of an auto-regressive diffusion framework for future forecasting.\nExperimental evaluation on medical imaging data and real-world future prediction tasks demonstrates the superiority of our generative framework over deterministic models and its competitive performance compared to methods specific to these domains separately.", "keywords": "segmentation;diffusion;future-prediction", "primary_area": "generative models", "supplementary_material": "", "author": "Evann COURDIER;Angelos Katharopoulos;Fran\u00e7ois Fleuret", "authorids": "~Evann_COURDIER1;~Angelos_Katharopoulos1;~Fran\u00e7ois_Fleuret2", "gender": "M;;M", "homepage": "https://evannc.notion.site/Evann-Courdier-70ea6da3f1fb43c6add377ffb5ec9e1c?pvs=4;https://angeloskath.github.io;https://fleuret.org/francois/", "dblp": ";188/1159;90/5265", "google_scholar": "q4Bfz-4AAAAJ;CNSO4uIAAAAJ;https://scholar.google.ch/citations?user=Bj1tRlsAAAAJ", "orcid": ";;0000-0001-9457-7393", "linkedin": "evann-courdier/;;francois-fleuret/", "or_profile": "~Evann_COURDIER1;~Angelos_Katharopoulos1;~Francois_Fleuret1", "aff": "EPFL - EPF Lausanne;Apple;University of Geneva", "aff_domain": "epfl.ch;apple.com;unige.ch", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\ncourdier2024segmenting,\ntitle={Segmenting the Unknown: Discrete Diffusion Models for Non-Deterministic Segmentation},\nauthor={Evann COURDIER and Angelos Katharopoulos and Fran{\\c{c}}ois Fleuret},\nyear={2024},\nurl={https://openreview.net/forum?id=8S14xeFQAY}\n}", "github": "", "project": "", "reviewers": "ofDT;27GV;qH1j", "site": "https://openreview.net/forum?id=8S14xeFQAY", "pdf_size": 13811735, "rating": "3;5;6", "confidence": "5;4;3", "soundness": "2;3;3", "contribution": "2;2;2", "presentation": "2;2;3", "wc_summary": "85;117;38", "wc_strengths": "60;51;62", "wc_weaknesses": "115;209;200", "wc_questions": "140;1;278", "wc_review": "400;378;578", "wc_reply_reviewers": "41;49;182", "wc_reply_authors": "527;466;808", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 80.0, 32.44482495971687 ], "wc_strengths_avg": [ 57.666666666666664, 4.784233364802441 ], "wc_weaknesses_avg": [ 174.66666666666666, 42.35039026450117 ], "wc_questions_avg": [ 139.66666666666666, 113.08502209498047 ], "wc_review_avg": [ 452.0, 89.54700813911465 ], "wc_reply_reviewers_avg": [ 90.66666666666667, 64.66494843078092 ], "wc_reply_authors_avg": [ 600.3333333333334, 148.9392120594469 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:txilMNL7L0UJ:scholar.google.com/&scioq=Segmenting+the+Unknown:+Discrete+Diffusion+Models+for+Non-Deterministic+Segmentation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "EPFL;Apple;University of Geneva", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.epfl.ch;https://www.apple.com;https://www.unige.ch", "aff_unique_abbr": "EPFL;Apple;UNIGE", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;United States" }, { "id": "8S7eGD15b6", "title": "Subspace Grid-sweep: ML Defense Evaluation via Constrained Brute-force Search", "track": "main", "status": "Reject", "tldr": "", "abstract": "It is becoming increasingly imperative to design robust ML defenses. However, recent work has found that many defenses that initially resist state-of-the-art attacks can be broken by an adaptive adversary. Attacks can initially make defenses look strong by not finding potential adversarial examples due to obfuscated gradients, limited compute, unlucky initialization, etc. In this work, we make steps towards more reliable defense evaluation by introducing a new defense evaluation tool, Subspace Grid-sweep, that leverages deterministic inference to more simply evaluate adversarial robustness. We use Subspace Grid-sweep to show that a previously published, but now broken, defense could have been known to be broken without performing a fully adaptive attack. In order to make Subspace Grid-sweep applicable to random defenses, we show how to make deterministic variants of random defenses while retaining similar empirical effectiveness. As a result, we show that randomness may not be necessary for these defense\u2019s robustness.", "keywords": "artificial intelligence;machine learning;robustness;adversarial machine learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/b49118d34683604988200ad7d1b37406792df7d3.zip", "author": "Keane Lucas;Matthew Jagielski;Florian Tram\u00e8r;Lujo Bauer;Nicholas Carlini", "authorids": "~Keane_Lucas1;~Matthew_Jagielski1;~Florian_Tram\u00e8r1;~Lujo_Bauer1;~Nicholas_Carlini1", "gender": "M;M;;;", "homepage": "https://keanelucas.com;https://jagielski.github.io/;;;http://nicholas.carlini.com", "dblp": "250/5769;218/5156;;;145/1806", "google_scholar": "vJEa5voAAAAJ;_8rw_GMAAAAJ;;;", "orcid": "0000-0002-4705-3412;;;;", "linkedin": "keane-lucas/;;;;", "or_profile": "~Keane_Lucas1;~Matthew_Jagielski1;~Florian_Tram\u00e8r1;~Lujo_Bauer1;~Nicholas_Carlini1", "aff": "Carnegie Mellon University;Google;;;Google", "aff_domain": "cmu.edu;google.com;;;google.com", "position": "PhD student;Researcher;;;Researcher", "bibtex": "@misc{\nlucas2024subspace,\ntitle={Subspace Grid-sweep: {ML} Defense Evaluation via Constrained Brute-force Search},\nauthor={Keane Lucas and Matthew Jagielski and Florian Tram{\\`e}r and Lujo Bauer and Nicholas Carlini},\nyear={2024},\nurl={https://openreview.net/forum?id=8S7eGD15b6}\n}", "github": "", "project": "", "reviewers": "2Hxt;LfCf;EJoi;YZ44", "site": "https://openreview.net/forum?id=8S7eGD15b6", "pdf_size": 1990390, "rating": "5;5;5;6", "confidence": "3;4;4;3", "soundness": "3;2;2;3", "contribution": "3;3;2;3", "presentation": "3;2;3;3", "wc_summary": "42;47;75;153", "wc_strengths": "56;108;35;48", "wc_weaknesses": "55;218;277;74", "wc_questions": "12;25;32;6", "wc_review": "165;398;419;281", "wc_reply_reviewers": "9;219;45;12", "wc_reply_authors": "450;1971;1165;377", "reply_reviewers": "1;2;2;1", "reply_authors": "1;3;4;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.25, 44.39805738993543 ], "wc_strengths_avg": [ 61.75, 27.73422975314079 ], "wc_weaknesses_avg": [ 156.0, 94.08772502298055 ], "wc_questions_avg": [ 18.75, 10.280442597476044 ], "wc_review_avg": [ 315.75, 101.68425394327285 ], "wc_reply_reviewers_avg": [ 71.25, 86.46494954604438 ], "wc_reply_authors_avg": [ 990.75, 644.2733794748934 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BLd-hQcysvYJ:scholar.google.com/&scioq=Subspace+Grid-sweep:+ML+Defense+Evaluation+via+Constrained+Brute-force+Search&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8SPSIfR2e0", "title": "Dissecting Language Models: Machine Unlearning via Selective Pruning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Understanding and shaping the behaviour of Large Language Models (LLMs) is increasingly important as applications become more powerful and more frequently adopted.\nThis paper introduces a machine unlearning method specifically designed for LLMs. \nWe introduce a selective pruning method for LLMs that removes neurons based on their relative importance on a targeted capability compared to overall network performance. \nThis approach is a compute- and data-efficient method for identifying and removing neurons that enable specific behaviours.\nOur findings reveal that both feed-forward and attention neurons in LLMs are specialized; \nthat is, for specific tasks, certain neurons are more crucial than others.", "keywords": "language models;pruning;machine unlearning;capability removal;intepretability;modularity", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Nicky Pochinkov;Nandi Schoots", "authorids": "~Nicky_Pochinkov1;~Nandi_Schoots1", "gender": "M;F", "homepage": "https://nicky.pro;https://safeandtrustedai.org/person/nandi-schoots/", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "https://linkedin.com/in/nicky-pochinkov;", "or_profile": "~Nicky_Pochinkov1;~Nandi_Schoots1", "aff": "Independant;Imperial College London", "aff_domain": "nicky.pro;ic.ac.uk", "position": "Researcher;PhD student", "bibtex": "@misc{\npochinkov2024dissecting,\ntitle={Dissecting Language Models: Machine Unlearning via Selective Pruning},\nauthor={Nicky Pochinkov and Nandi Schoots},\nyear={2024},\nurl={https://openreview.net/forum?id=8SPSIfR2e0}\n}", "github": "", "project": "", "reviewers": "sDUD;Q3q4;9FMg;q42J", "site": "https://openreview.net/forum?id=8SPSIfR2e0", "pdf_size": 4096323, "rating": "5;6;6;6", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "238;71;84;118", "wc_strengths": "45;88;45;90", "wc_weaknesses": "817;353;229;91", "wc_questions": "209;114;192;10", "wc_review": "1309;626;550;309", "wc_reply_reviewers": "367;12;0;0", "wc_reply_authors": "1379;755;600;294", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 127.75, 65.9256209678756 ], "wc_strengths_avg": [ 67.0, 22.01136070305514 ], "wc_weaknesses_avg": [ 372.5, 272.8529823916169 ], "wc_questions_avg": [ 131.25, 78.63642603781024 ], "wc_review_avg": [ 698.5, 371.39231279066615 ], "wc_reply_reviewers_avg": [ 94.75, 157.25993609308125 ], "wc_reply_authors_avg": [ 757.0, 395.5711061237916 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2293050757298981416&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Independent;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": ";https://www.imperial.ac.uk", "aff_unique_abbr": ";ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";United Kingdom" }, { "id": "8T7m27VC3S", "title": "3D Dense Captioning beyond Nouns: A Middleware for Autonomous Driving", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, language foundation models have revolutionized many fields and how they could enable smarter and safer autonomous vehicles remains elusive. We believe one major obstacle is the lack of a comprehensive and standard middleware representation that links perception and planning. We rethink the limitations of existing middleware (e.g., 3D boxes or occupancy) and propose 3\\textbf{D} d\\textbf{e}n\\textbf{s}e capt\\textbf{i}onin\\textbf{g} beyond \\textbf{n}ouns (or abbreviated as DESIGN). For each input scenario, DESIGN refers to a set of 3D bounding boxes with a language description for each. Notably, the \\textbf{comprehensive} description involves not only what the box is (noun) but also its attribute (adjective), location (preposition) and moving status (adverb). We design a scalable rule-based auto-labelling methodology to generate DESIGN ground truth, guaranteeing that the middleware is \\textbf{standard}. Using this methodology, we construct a large-scale dataset nuDesign based upon nuScenes, which consists of an unprecedented number of 2300k sentences. We also present an extensive benchmarking on nuDesign, featuring a model named DESIGN-former that takes multi-modal inputs and predicts reliable DESIGN outputs. Through qualitative visualizations, we demonstrate that DEISGN, as a novel 3D scene understanding middleware, has good interpretability. We release our code, data and models, hoping this middleware could trigger better autonomous driving algorithms and systems that benefit from the power of language foundation models.", "keywords": "Autonomous Driving;Dense Captioning;Foundation model", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Bu Jin;Yupeng Zheng;Pengfei Li;Sujie Hu;Zhijie Yan;Xinyu Liu;Yuhang Zheng;Jingjing Huang;Jinwei Zhu;Guyue Zhou;Yilun Chen;Hao Zhao", "authorids": "~Bu_Jin1;~Yupeng_Zheng1;~Pengfei_Li6;~Sujie_Hu1;~Zhijie_Yan3;~Xinyu_Liu9;~Yuhang_Zheng2;~Jingjing_Huang2;~Jinwei_Zhu1;~Guyue_Zhou2;~Yilun_Chen3;~Hao_Zhao1", "gender": ";;M;F;M;F;M;F;M;M;M;M", "homepage": ";;https://github.com/Philipflyg;https://husujie.github.io/;;https://liuxinyv.github.io/;;https://github.com//JJingH;http://hao.360.com/?a1004;https://air.tsinghua.edu.cn/en/info/1046/1196.htm;https://air.tsinghua.edu.cn/info/1046/1769.htm;https://sites.google.com/view/fromandto", "dblp": ";;;;;;;;;133/4199;;08/3737-2.html", "google_scholar": ";;https://scholar.google.com/citations?view_op=list_works;;4PXGeaYAAAAJ;kgRjFN8AAAAJ;;;;;XGnsL5MAAAAJ;ygQznUQAAAAJ", "orcid": ";;;;;;0000-0001-8215-6962;;;;;", "linkedin": ";;;;;%E6%98%95%E7%85%9C-%E5%88%98-847812247/;;;;;;", "or_profile": "~Bu_Jin1;~Yupeng_Zheng1;~Pengfei_Li6;~Sujie_Hu1;~Zhijie_Yan3;~Xinyu_Liu9;~Yuhang_Zheng2;~Jingjing_Huang2;~Jinwei_Zhu1;~Guyue_Zhou2;~Yilun_Chen3;~Hao_Zhao1", "aff": ";;Tsinghua University;Minzu University of China;Beihang University;Hong Kong University of Science and Technology;Beihang University;Shanghai University;Beijing Jiaotong University;Tsinghua University;;Peking University", "aff_domain": ";;tsinghua.edu.cn;muc.edu.cn;buaa.edu.cn;connect.ust.hk;buaa.edu.cn;shu.edu.cn;bjtu.edu.cn;tsinghua.edu.cn;;pku.edu.cn", "position": ";;PhD student;Undergrad student;MS student;PhD student;MS student;Undergrad student;Undergrad student;Associate Professor;;Postdoc", "bibtex": "@misc{\njin2024d,\ntitle={3D Dense Captioning beyond Nouns: A Middleware for Autonomous Driving},\nauthor={Bu Jin and Yupeng Zheng and Pengfei Li and Sujie Hu and Zhijie Yan and Xinyu Liu and Yuhang Zheng and Jingjing Huang and Jinwei Zhu and Guyue Zhou and Yilun Chen and Hao Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=8T7m27VC3S}\n}", "github": "", "project": "", "reviewers": "rzAd;a5o2;R3EY", "site": "https://openreview.net/forum?id=8T7m27VC3S", "pdf_size": 11612601, "rating": "3;3;6", "confidence": "4;5;5", "soundness": "2;2;2", "contribution": "2;2;3", "presentation": "2;2;2", "wc_summary": "106;142;73", "wc_strengths": "66;267;43", "wc_weaknesses": "358;357;169", "wc_questions": "32;42;3", "wc_review": "562;808;288", "wc_reply_reviewers": "264;203;0", "wc_reply_authors": "1712;762;340", "reply_reviewers": "1;1;0", "reply_authors": "3;1;1", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 107.0, 28.178005607210743 ], "wc_strengths_avg": [ 125.33333333333333, 100.6125682451695 ], "wc_weaknesses_avg": [ 294.6666666666667, 88.86068997156292 ], "wc_questions_avg": [ 25.666666666666668, 16.539514973407037 ], "wc_review_avg": [ 552.6666666666666, 212.39167173460973 ], "wc_reply_reviewers_avg": [ 155.66666666666666, 112.85487239026158 ], "wc_reply_authors_avg": [ 938.0, 573.7757982580537 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jFZz7htIdqEJ:scholar.google.com/&scioq=3D+Dense+Captioning+beyond+Nouns:+A+Middleware+for+Autonomous+Driving&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;2;4;5;0;6", "aff_unique_norm": "Tsinghua University;Minzu University of China;Beihang University;Hong Kong University of Science and Technology;Shanghai University;Beijing Jiao Tong University;Peking University", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.muc.edu.cn/;http://www.buaa.edu.cn/;https://www.ust.hk;https://www.shu.edu.cn;http://www.njtu.edu.cn/en;http://www.pku.edu.cn", "aff_unique_abbr": "THU;MUC;BUAA;HKUST;SHU;BJTU;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8TAGx549Ns", "title": "REX: Rapid Exploration and eXploitation for AI agents", "track": "main", "status": "Reject", "tldr": "", "abstract": "AI agents leveraging the capabilities of Large Language Models (LLMs) and Reinforcement Learning (RL) techniques have garnered growing attention due to their commendable performance in autonomously executing real-world tasks. Effective exploration of the action space is paramount for the successful accomplishment of diverse tasks by these AI agents. In this paper, we propose an enhanced approach for $\\textbf{R}$apid $\\textbf{E}$xploration and e$\\textbf{X}$ploitation of action space for LLM-based AI agents, called $\\textbf{REX}$. Existing LLM-driven agents have inherent limitations, such as a heavy reliance on precise descriptions for decision-making, and the lack of a systematic approach to leverage try-and-fail procedures akin to traditional RL. REX introduces an additional layer of rewards and integrates concepts similar to Upper Confidence Bound (UCB) scores, leading to more robust and efficient AI agent performance. This approach has the advantage of enabling the utilization of offline behaviors from logs and allowing seamless integration with existing foundation models while it does not require any model fine-tuning. Through comparative analysis with existing methods such as Chain-of-Thought(CoT) and Reflexion, REX-based methods demonstrate comparable performance and, in certain cases, even surpass the results achieved by these existing techniques. Notably, REX-based methods exhibit remarkable reductions in execution time while systematically exploring the action space of AI agents, enhancing their practical applicability across a diverse set of scenarios.", "keywords": "AI agent;Large Language Models;Upper Confidence Bound", "primary_area": "generative models", "supplementary_material": "/attachment/3976967bb4823484c0b7b2480d8a7c08e6e43a11.zip", "author": "Rithesh R N;Shelby Heinecke;Juan Carlos Niebles;Zhiwei Liu;Le Xue;Weiran Yao;Yihao Feng;Zeyuan Chen;Akash Gokul;Devansh Arpit;Ran Xu;Phil L Mui;Huan Wang;Caiming Xiong;Silvio Savarese", "authorids": "~Rithesh_R_N1;~Shelby_Heinecke1;~Juan_Carlos_Niebles1;~Zhiwei_Liu3;~Le_Xue1;~Weiran_Yao1;~Yihao_Feng1;~Zeyuan_Chen1;~Akash_Gokul1;~Devansh_Arpit2;~Ran_Xu1;~Phil_L_Mui1;~Huan_Wang1;~Caiming_Xiong1;~Silvio_Savarese1", "gender": "M;F;M;;M;M;M;M;;M;M;;M;M;M", "homepage": "https://ritheshrn.github.io/;http://www.shelbyh.ai;http://www.niebles.net/;https://sites.google.com/view/zhiwei-jim;;;;https://www.linkedin.com/in/zeyuan-chen-0253b6141/;;;;;http://www.cs.yale.edu/homes/wang-huan/;http://cmxiong.com/;", "dblp": "352/4275;;26/647;90/9499-1.html;304/2195;192/3295;204/3696;191/1578-1.html;;120/8494;;;70/6155-16.html;80/7282;50/3578", "google_scholar": "https://scholar.google.ca/citations?user=Y1XpJucAAAAJ;tS937l8AAAAJ;hqNhUCYAAAAJ;https://scholar.google.com/citations?;https://scholar.google.com/citations?view_op=list_works;rr_leUAAAAAJ;uqnNle0AAAAJ;znf-4mgAAAAJ;;https://scholar.google.ca/citations?hl=en;sgBB2sUAAAAJ;;7NpTttkAAAAJ;vaSdahkAAAAJ;ImpbxLsAAAAJ", "orcid": ";;;0000-0003-1525-1067;0000-0003-2810-770X;;;0009-0003-2471-5449;;;;;;;", "linkedin": "rithesh-r-n/;shelbyheinecke;;;le-tycho-xue-5abbb9157/;;;zeyuan-chen-/;;;;;huanwangyale/;caiming-xiong-150a1417;", "or_profile": "~Rithesh_R_N1;~Shelby_Heinecke1;~Juan_Carlos_Niebles1;~Zhiwei_Liu3;~Le_Xue1;~Weiran_Yao1;~Yihao_Feng1;~Zeyuan_Chen1;~Akash_Gokul1;~Devansh_Arpit2;~Ran_Xu1;~Phil_L_Mui1;~Huan_Wang1;~Caiming_Xiong1;~Silvio_Savarese1", "aff": "SalesForce.com;Salesforce Research;Stanford University;Salesforce AI Research;Salesforce;SalesForce.com;Salesforce AI Research;Salesforce Inc;;VaniLabs;SalesForce.com;;Salesforce.com;Salesforce Research;Stanford University", "aff_domain": "salesforce.com;salesforce.com;stanford.edu;salesforce.com;salesforce.com;salesforce.com;salesforce.com;salesforce.com;;vanilabs.com;salesforce.com;;salesforce.com;salesforce.com;stanford.edu", "position": "Researcher;Researcher;Adjunct Professor;Researcher;Researcher;Researcher;Researcher;Researcher;;Principal Researcher;senior manager;;Researcher;Research Scientist;Adjunct Professor", "bibtex": "@misc{\nn2024rex,\ntitle={{REX}: Rapid Exploration and eXploitation for {AI} agents},\nauthor={Rithesh R N and Shelby Heinecke and Juan Carlos Niebles and Zhiwei Liu and Le Xue and Weiran Yao and Yihao Feng and Zeyuan Chen and Akash Gokul and Devansh Arpit and Ran Xu and Phil L Mui and Huan Wang and Caiming Xiong and Silvio Savarese},\nyear={2024},\nurl={https://openreview.net/forum?id=8TAGx549Ns}\n}", "github": "", "project": "", "reviewers": "b79r;3Nop;1Dq3;up34", "site": "https://openreview.net/forum?id=8TAGx549Ns", "pdf_size": 642008, "rating": "3;3;5;5", "confidence": "4;4;2;4", "soundness": "2;2;2;3", "contribution": "2;2;2;2", "presentation": "2;2;3;2", "wc_summary": "186;62;50;49", "wc_strengths": "134;40;51;21", "wc_weaknesses": "244;284;73;26", "wc_questions": "355;119;127;219", "wc_review": "919;505;301;315", "wc_reply_reviewers": "71;17;12;13", "wc_reply_authors": "894;916;768;745", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.75, 57.52988353890524 ], "wc_strengths_avg": [ 61.5, 43.21168823362494 ], "wc_weaknesses_avg": [ 156.75, 109.4471904618844 ], "wc_questions_avg": [ 205.0, 95.09994742374992 ], "wc_review_avg": [ 510.0, 249.505510961181 ], "wc_reply_reviewers_avg": [ 28.25, 24.752525123712125 ], "wc_reply_authors_avg": [ 830.75, 75.09785283215493 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15336770226670021155&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0;0;0;0;0;2;0;0;0;1", "aff_unique_norm": "Salesforce;Stanford University;VaniLabs", "aff_unique_dep": ";;", "aff_unique_url": "https://www.salesforce.com;https://www.stanford.edu;", "aff_unique_abbr": "Salesforce;Stanford;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "8TyGCAuCGd", "title": "Look Ma, No Training! Observation Space Design for Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Many scientific communities agree on the potential of reinforcement learning (RL) agents to solve real-world problems, yet such consensus does not extend to how these agents should be designed. In some practical applications, the increasing literature on RL does not shed light on which RL components work better for a particular problem, they are usually treated just as configuration elements to be reported. One of these components is the choice of observation space, which in some cases entails dealing with tens of thousands of observable features. Choosing a rich yet efficient observation space is key to encoding useful information while limiting the tangible implications of adding extra features. Gaining understanding of feature relevance has already been studied for RL. In comparison to supervised learning, the effect of dependencies across states adds a layer of complexity to the structure of the problem. Many of the proposed methods require training RL agents from scratch several times, which is costly in real-world applications. In this paper we propose a simple and cost-efficient way to find good observation spaces that does not require training. Specifically, we propose leveraging multiple random policies when comparing candidate spaces for the same problem. By conducting rollouts with different random policies for each candidate space, we are able to identify statistically-significant signals that indicate which features are better suited for the application considered. We demonstrate the usefulness of our approach in different RL problems, including Traffic Signal Control. By combining random policy sampling with the Hill Climbing search algorithm, we find observation spaces that use less features and achieve comparable or greater return. Overall, this work suggests a straightforward and inexpensive approach to an important aspect of RL design that is often overlooked and is crucial for applied problems.", "keywords": "observation space design;real-world reinforcement learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/bc80203cac72704cc955fd8185b5448ce4560428.zip", "author": "Juan Jose Garau-Luis;Ram Krishna Goel;Edward Crawley;Cathy Wu", "authorids": "~Juan_Jose_Garau-Luis1;~Ram_Krishna_Goel1;~Edward_Crawley1;~Cathy_Wu1", "gender": "M;M;;F", "homepage": ";;https://aeroastro.mit.edu/people/edward-f-crawley/;http://wucathy.com", "dblp": ";;;155/3740", "google_scholar": "FpaKuysAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-8594-303X", "linkedin": "juanjosegarau/;ram-krishna-goel-5a662320a/;;cathywu/", "or_profile": "~Juan_Jose_Garau-Luis1;~Ram_Krishna_Goel1;~Edward_Crawley1;~Cathy_Wu1", "aff": ";Massachusetts Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": ";mit.edu;;mit.edu", "position": ";Undergrad student;;Assistant Professor", "bibtex": "@misc{\ngarau-luis2024look,\ntitle={Look Ma, No Training! Observation Space Design for Reinforcement Learning},\nauthor={Juan Jose Garau-Luis and Ram Krishna Goel and Edward Crawley and Cathy Wu},\nyear={2024},\nurl={https://openreview.net/forum?id=8TyGCAuCGd}\n}", "github": "", "project": "", "reviewers": "NkaL;CJ7V;N2rj;fGnS", "site": "https://openreview.net/forum?id=8TyGCAuCGd", "pdf_size": 1037059, "rating": "1;3;5;5", "confidence": "4;4;4;5", "soundness": "1;1;2;2", "contribution": "2;2;3;1", "presentation": "3;3;3;3", "wc_summary": "169;38;107;34", "wc_strengths": "51;134;51;31", "wc_weaknesses": "395;500;271;257", "wc_questions": "34;56;52;18", "wc_review": "649;728;481;340", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 1.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.0, 55.52927156014204 ], "wc_strengths_avg": [ 66.75, 39.67603180762915 ], "wc_weaknesses_avg": [ 355.75, 99.09938193550957 ], "wc_questions_avg": [ 40.0, 15.165750888103101 ], "wc_review_avg": [ 549.5, 150.28722500598644 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RErkHn1mq6sJ:scholar.google.com/&scioq=Look+Ma,+No+Training!+Observation+Space+Design+for+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Revisiting Link Prediction: a data perspective", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19325", "id": "8Ur2xmuw7w", "author_site": "Haitao Mao, Juanhui Li, Harry Shomer, Bingheng Li, Wenqi Fan, Yao Ma, Tong Zhao, Neil Shah, Jiliang Tang", "tldr": "", "abstract": "Link prediction, a fundamental task on graphs, has proven indispensable in various applications, e.g., friend recommendation, protein analysis, and drug interaction prediction. However, since datasets span a multitude of domains, they could have distinct underlying mechanisms of link formation. Evidence in existing literature underscores the absence of a universally best algorithm suitable for all datasets. In this paper, we endeavor to explore principles of link prediction across diverse datasets from a data-centric perspective. We recognize three fundamental factors critical to link prediction: local structural proximity, global structural proximity, and feature proximity. We then unearth relationships among those factors where (i) global structural proximity only shows effectiveness when local structural proximity is deficient. (ii) The incompatibility can be found between feature and structural proximity. Such incompatibility leads to GNNs for Link Prediction (GNN4LP) consistently underperforming on edges where the feature proximity factor dominates. Inspired by these new insights from a data perspective, we offer practical instruction for GNN4LP model design and guidelines for selecting appropriate benchmark datasets for more comprehensive evaluations.", "keywords": "Link Prediction;Graph Neural Network", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/7f292b427499ef2c6a3ca14529b1b63f683de205.zip", "author": "Haitao Mao;Juanhui Li;Harry Shomer;Bingheng Li;Wenqi Fan;Yao Ma;Tong Zhao;Neil Shah;Jiliang Tang", "authorids": "~Haitao_Mao1;~Juanhui_Li1;~Harry_Shomer1;~Bingheng_Li1;~Wenqi_Fan1;~Yao_Ma3;~Tong_Zhao3;~Neil_Shah2;~Jiliang_Tang1", "gender": "F;;M;M;M;M;M;M;M", "homepage": "https://juanhui28.github.io/;https://www.cse.msu.edu/~shomerha/;https://github.com/uestclbh;https://wenqifan03.github.io;https://yaoma24.github.io/;https://tzhao.io/;http://nshah.net;https://www.cse.msu.edu/~tangjili/;", "dblp": "313/9527.html;;;218/7410;212/7871.html;94/6503-3;71/7771;64/10812;", "google_scholar": "5J0dd-sAAAAJ;_6eE2vsAAAAJ;;https://scholar.google.com/citations?hl=en;wf9TTOIAAAAJ;05cRc-MAAAAJ;Qut69OgAAAAJ;WtzKMWAAAAAJ;3GmlKM4AAAAJ", "orcid": "0000-0003-4909-1778;0000-0001-5081-1870;0009-0000-0950-9012;0000-0002-4049-1233;;0000-0001-7660-1732;0000-0003-3261-8430;0000-0001-7125-3898;", "linkedin": ";;;wenqi-fan-a425a7196/;;;;;", "or_profile": "~Juanhui_Li1;~Harry_Shomer1;~Bingheng_Li1;~Wenqi_Fan1;~Yao_Ma3;~Tong_Zhao3;~Neil_Shah2;~Jiliang_Tang1;~Mao_Haitao1", "aff": "Amazon;Michigan State University;University of Electronic Science and Technology of China;The Hong Kong Polytechnic University;Rensselaer Polytechnic Institute;Snap Inc.;Snap Inc.;Michigan State University;Michigan State University", "aff_domain": "amazon.com;msu.edu;uestc.edu.cn;polyu.edu.hk;rpi.edu;snap.com;snap.com;msu.edu;msu.edu", "position": "Intern;PhD student;Undergrad student;Assistant Professor;Assistant Professor;Researcher;Research Scientist;Full Professor;PhD student", "bibtex": "@inproceedings{\nmao2024revisiting,\ntitle={Revisiting Link Prediction: a data perspective},\nauthor={Haitao Mao and Juanhui Li and Harry Shomer and Bingheng Li and Wenqi Fan and Yao Ma and Tong Zhao and Neil Shah and Jiliang Tang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8Ur2xmuw7w}\n}", "github": "", "project": "", "reviewers": "zuoh;cwp2;wtEd;Rht2", "pdf_size": 842601, "rating": "5;6;6;8", "confidence": "3;4;5;5", "soundness": "3;3;3;4", "contribution": "1;3;2;4", "presentation": "3;1;3;3", "wc_summary": "114;128;98;83", "wc_strengths": "83;120;30;146", "wc_weaknesses": "87;246;11;80", "wc_questions": "14;18;73;3", "wc_review": "298;512;212;312", "wc_reply_reviewers": "0;0;0;42", "wc_reply_authors": "1118;1808;887;959", "reply_reviewers": "0;0;0;1", "reply_authors": "4;5;3;4", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 105.75, 16.887495373796554 ], "wc_strengths_avg": [ 94.75, 43.573931426943794 ], "wc_weaknesses_avg": [ 106.0, 86.1132974632838 ], "wc_questions_avg": [ 27.0, 27.120103244641236 ], "wc_review_avg": [ 333.5, 109.93975623040102 ], "wc_reply_reviewers_avg": [ 10.5, 18.186533479473212 ], "wc_reply_authors_avg": [ 1193.0, 364.7745879306836 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2697033961358863084&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=8Ur2xmuw7w", "pdf": "https://openreview.net/pdf?id=8Ur2xmuw7w", "email": "amazon.com;msu.edu;uestc.edu.cn;polyu.edu.hk;rpi.edu;snap.com;snap.com;msu.edu;msu.edu", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;5;1;1", "aff_unique_norm": "Amazon;Michigan State University;University of Electronic Science and Technology of China;Hong Kong Polytechnic University;Rensselaer Polytechnic Institute;Snap Inc.", "aff_unique_dep": "Amazon.com, Inc.;;;;;", "aff_unique_url": "https://www.amazon.com;https://www.msu.edu;https://www.uestc.edu.cn;https://www.polyu.edu.hk;https://www.rpi.edu;https://www.snapinc.com", "aff_unique_abbr": "Amazon;MSU;UESTC;PolyU;RPI;Snap", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;1;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "8V3C2ijJ33", "title": "Lightweight In-Context Tuning for Multimodal Unified Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In-context learning (ICL) involves reasoning from given contextual examples. As more modalities comes, this procedure is becoming more challenging as the interleaved input modalities convolutes the understanding process. This is exemplified by the observation that multimodal models often struggle to effectively extrapolate from contextual examples to perform ICL. To address these challenges, we introduce Multimodal In-context Tuning (M$^{2}$IXT), a lightweight module to enhance the ICL capabilities of multimodal unified models. The proposed M$^{2}$IXT module perceives an expandable context window to incorporate various labeled examples of multiple modalities (e.g., text, image, and coordinates). It can be prepended to various multimodal unified models (e.g., OFA, Unival, LLaVA) of different architectures and trained via a mixed-tasks strategy to enable rapid few-shot adaption on multiple tasks and datasets. When tuned on as little as 50K multimodal data, M$^{2}$IXT can boost the few-shot ICL performance significantly (e.g., 18% relative increase for OFA), and obtained state-of-the-art results across an array of tasks including visual question answering, image captioning, visual grounding, and visual entailment, while being considerably small in terms of model parameters (e.g., ~20x smaller than Flamingo or MMICL), highlighting the flexibility and effectiveness of M$^{2}$IXT as a multimodal in-context learner.", "keywords": "Multi-modality;Unified Autoregressive Model;Multi-tasking;In-Context Tuning;Few-shot Adaptation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yixin Chen;Shuai Zhang;Boran Han;Jiaya Jia", "authorids": "~Yixin_Chen4;~Shuai_Zhang7;~Boran_Han1;~Jiaya_Jia1", "gender": "M;;;M", "homepage": "https://yix-chen.github.io/;;;https://jiaya.me", "dblp": "59/983;;;31/5649", "google_scholar": "tEWGP3sAAAAJ;;;https://scholar.google.com.tw/citations?user=XPAkzTEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yixin_Chen4;~Shuai_Zhang7;~Boran_Han1;~Jiaya_Jia1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;;;Department of Computer Science and Engineering, Hong Kong University of Science and Technology", "aff_domain": "cse.cuhk.edu.hk;;;cse.ust.hk", "position": "PhD student;;;Full Professor", "bibtex": "@misc{\nchen2024lightweight,\ntitle={Lightweight In-Context Tuning for Multimodal Unified Models},\nauthor={Yixin Chen and Shuai Zhang and Boran Han and Jiaya Jia},\nyear={2024},\nurl={https://openreview.net/forum?id=8V3C2ijJ33}\n}", "github": "", "project": "", "reviewers": "rfnh;1Zj1;kbgq;bTML", "site": "https://openreview.net/forum?id=8V3C2ijJ33", "pdf_size": 3375440, "rating": "3;5;5;6", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "19;129;78;49", "wc_strengths": "33;200;94;37", "wc_weaknesses": "378;437;120;6", "wc_questions": "21;4;35;119", "wc_review": "451;770;327;211", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.75, 40.56091098582476 ], "wc_strengths_avg": [ 91.0, 67.39807118901845 ], "wc_weaknesses_avg": [ 235.25, 178.12828944331105 ], "wc_questions_avg": [ 44.75, 44.25141240683737 ], "wc_review_avg": [ 439.75, 208.70478552251743 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2956087388086531757&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Chinese University of Hong Kong;Hong Kong University of Science and Technology", "aff_unique_dep": "Department of Computer Science and Engineering;Department of Computer Science and Engineering", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.ust.hk", "aff_unique_abbr": "CUHK;HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "8VHCeoBGxB", "title": "Revisiting the Temporal Modeling in Spatio-Temporal Predictive Learning under A Unified View", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Spatio-temporal predictive learning plays a crucial role in self-supervised learning, with wide-ranging applications across a diverse range of fields. Previous approaches for temporal modeling fall into two categories: recurrent-based and recurrent-free methods. The former, while meticulously processing frames one by one, neglect short-term spatio-temporal information redundancies, leading to inefficiencies. The latter naively stack frames sequentially, overlooking the inherent temporal dependencies. In this paper, we re-examine the two dominant temporal modeling approaches within the realm of spatio-temporal predictive learning, offering a unified perspective. Building upon this analysis, we introduce USTEP (Unified Spatio-TEmporal Predictive learning), an innovative framework that reconciles the recurrent-based and recurrent-free methods by integrating both micro-temporal and macro-temporal scales. Extensive experiments on a wide range of spatio-temporal predictive learning demonstrate that USTEP achieves significant improvements over existing temporal modeling approaches, thereby establishing it as a robust solution for a wide range of spatio-temporal applications.", "keywords": "self-supervised learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/65f8f7dbbb41ca55d8a80cd4687076b81425c700.zip", "author": "Cheng Tan;Jue Wang;Zhangyang Gao;Siyuan Li;Lirong Wu;Jun Xia;Stan Z. Li", "authorids": "~Cheng_Tan1;~Jue_Wang9;~Zhangyang_Gao1;~Siyuan_Li6;~Lirong_Wu1;~Jun_Xia1;~Stan_Z._Li2", "gender": "M;;M;M;;M;M", "homepage": "https://chengtan9907.github.io/;https://scholar.google.com.hk/citations?hl=zh-CN&pli=1&user=NjYyuQQAAAAJ;;https://lupin1998.github.io/;;http://junxia97.github.io/;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "70/1533-12.html;;275/3266;63/9705-2;15/10330;;l/StanZLi", "google_scholar": "6kTV6aMAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;4SclT-QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;Tk7TrCoAAAAJ;aPKKpSYAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0003-1026-6083;0000-0001-6806-2468;;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Cheng_Tan1;~Jue_Wang9;~Zhangyang_Gao1;~Siyuan_Li6;~Lirong_Wu1;~Jun_Xia1;~Stan_Z._Li1", "aff": "Zhejiang University & Westlake University;Zhongnan University of Economics and Law;Westlake University, China;Alibaba Group;Westlake University;Westlake University, China;Westlake University", "aff_domain": "westlake.edu.cn;zuel.edu.cn;westlake.edu.cn;alibaba-inc.com;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;Undergrad student;PhD student;Intern;PhD student;PhD student;Chair Professor", "bibtex": "@misc{\ntan2024revisiting,\ntitle={Revisiting the Temporal Modeling in Spatio-Temporal Predictive Learning under A Unified View},\nauthor={Cheng Tan and Jue Wang and Zhangyang Gao and Siyuan Li and Lirong Wu and Jun Xia and Stan Z. Li},\nyear={2024},\nurl={https://openreview.net/forum?id=8VHCeoBGxB}\n}", "github": "", "project": "", "reviewers": "ygZp;nrq1;sHRf;8Yx7", "site": "https://openreview.net/forum?id=8VHCeoBGxB", "pdf_size": 942443, "rating": "3;3;5;6", "confidence": "4;3;3;3", "soundness": "3;3;2;2", "contribution": "3;2;2;2", "presentation": "3;2;3;3", "wc_summary": "91;60;62;89", "wc_strengths": "20;80;28;175", "wc_weaknesses": "276;295;141;110", "wc_questions": "2;62;152;5", "wc_review": "389;497;383;379", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.5, 14.534441853748634 ], "wc_strengths_avg": [ 75.75, 61.75910863994071 ], "wc_weaknesses_avg": [ 205.5, 81.02623032080415 ], "wc_questions_avg": [ 55.25, 60.75925855373813 ], "wc_review_avg": [ 412.0, 49.20365840057018 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3352675855393393460&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;2;2;2", "aff_unique_norm": "Zhejiang University;Zhongnan University of Economics and Law;Westlake University;Alibaba Group", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.zju.edu.cn;http://www.zuel.edu.cn/;https://www.westlake.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "ZJU;ZUEL;WU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Context is Environment", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19324", "id": "8VPWfqtQMX", "author_site": "Sharut Gupta, Stefanie Jegelka, David Lopez-Paz, Kartik Ahuja", "tldr": "", "abstract": "Two lines of work are taking the central stage in AI research. On the one hand, the community is making increasing efforts to build models that discard spurious correlations and generalize better in novel test environments. Unfortunately, the hard lesson so far is that no proposal convincingly outperforms a simple empirical risk minimization baseline. On the other hand, large language models (LLMs) have erupted as algorithms able to learn in-context, generalizing on-the-fly to eclectic contextual circumstances that users enforce by means of prompting. In this paper, we argue that context is environment, and posit that in-context learning holds the key to better domain generalization. Via extensive theory and experiments, we show that paying attention to context$\\unicode{x2013}\\unicode{x2013}$unlabeled examples as they arrive$\\unicode{x2013}\\unicode{x2013}$allows our proposed In-Context Risk Minimization (ICRM) algorithm to zoom-in on the test environment risk minimizer, leading to significant out-of-distribution performance improvements. Furthermore, training with context helps the model learn a better featurizer. From all of this, two messages are worth taking home. Researchers in domain generalization should consider environment as context, and harness the adaptive power of in-context learning. Researchers in LLMs should consider context as environment, to better structure data towards generalization. Code is available at https://github.com/facebookresearch/ICRM.", "keywords": "Domain Generalization; In-Context Learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Sharut Gupta;Stefanie Jegelka;David Lopez-Paz;Kartik Ahuja", "authorids": "~Sharut_Gupta1;~Stefanie_Jegelka3;~David_Lopez-Paz2;~Kartik_Ahuja1", "gender": "F;F;;", "homepage": "https://www.mit.edu/~sharut/;http://people.csail.mit.edu/stefje/;http://lopezpaz.org;", "dblp": ";38/7003;74/10481;", "google_scholar": "https://scholar.google.com/citations?hl=en;gTWUZlsAAAAJ;;", "orcid": ";;;", "linkedin": "sharut-gupta/;;;", "or_profile": "~Sharut_Gupta1;~Stefanie_Jegelka3;~David_Lopez-Paz2;~Kartik_Ahuja1", "aff": "Google;Massachusetts Institute of Technology;Meta Facebook;", "aff_domain": "google.com;mit.edu;fb.com;", "position": "Student Researcher;Associate Professor;Research Scientist;", "bibtex": "@inproceedings{\ngupta2024context,\ntitle={Context is Environment},\nauthor={Sharut Gupta and Stefanie Jegelka and David Lopez-Paz and Kartik Ahuja},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8VPWfqtQMX}\n}", "github": "", "project": "", "reviewers": "cBZm;NswY;1GJP;7Bph", "pdf_size": 3283081, "rating": "6;6;6;8", "confidence": "4;3;2;3", "soundness": "2;3;2;3", "contribution": "2;4;2;4", "presentation": "3;3;3;4", "wc_summary": "177;85;25;123", "wc_strengths": "54;120;16;237", "wc_weaknesses": "1496;519;192;443", "wc_questions": "100;84;289;710", "wc_review": "1827;808;522;1513", "wc_reply_reviewers": "1819;174;0;61", "wc_reply_authors": "4824;1304;2420;1488", "reply_reviewers": "4;1;0;1", "reply_authors": "9;3;5;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 102.5, 55.414348322433604 ], "wc_strengths_avg": [ 106.75, 83.9028456013263 ], "wc_weaknesses_avg": [ 662.5, 496.20182385799427 ], "wc_questions_avg": [ 295.75, 252.390941794669 ], "wc_review_avg": [ 1167.5, 524.4571002474845 ], "wc_reply_reviewers_avg": [ 513.5, 756.3116090607099 ], "wc_reply_authors_avg": [ 2509.0, 1401.928314857789 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 5.0, 2.449489742783178 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "openreview": "https://openreview.net/forum?id=8VPWfqtQMX", "pdf": "https://openreview.net/pdf?id=8VPWfqtQMX", "email": "google.com;mit.edu;fb.com;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;Massachusetts Institute of Technology;Meta", "aff_unique_dep": "Google;;Meta Platforms, Inc.", "aff_unique_url": "https://www.google.com;https://web.mit.edu;https://meta.com", "aff_unique_abbr": "Google;MIT;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8WH6ZlDad6", "title": "EWoK: Tackling Robust Markov Decision Processes via Estimating Worst Kernel", "track": "main", "status": "Reject", "tldr": "", "abstract": "Robust Markov Decision Processes (RMDPs) provide a framework for sequential decision-making that is robust to perturbations on the transition kernel. However, current RMDP methods are often limited to small-scale problems, hindering their use in realistic high-dimensional domains. To bridge this gap, we present **EWoK**, a novel approach for the online RMDP setting that **E**stimates the **Wo**rst transition **K**ernel to learn robust policies. Unlike previous works that regularize the policy or value updates, EWoK achieves robustness by simulating the worst scenarios for the agent while retaining complete flexibility in the learning process. Notably, EWoK can be applied on top of any off-the-shelf *non-robust* RL algorithm, enabling easy scaling to high-dimensional domains. Our experiments, spanning from simple Cartpole to high-dimensional MinAtar and DeepMind Control Suite environments, demonstrate the effectiveness and applicability of the EWoK paradigm as a practical method for learning robust policies.", "keywords": "robust Markov decision process;reinforcement learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Kaixin Wang;Uri Gadot;Navdeep Kumar;Kfir Yehuda Levy;Shie Mannor", "authorids": "~Kaixin_Wang1;~Uri_Gadot1;~Navdeep_Kumar1;~Kfir_Yehuda_Levy1;~Shie_Mannor2", "gender": "M;M;M;M;M", "homepage": "https://kaixin96.github.io;;;http://kfiryehud.wixsite.com/kfir-y-levy;https://shie.net.technion.ac.il", "dblp": ";349/0367;;83/11388;20/1669", "google_scholar": "https://scholar.google.com.sg/citations?hl=en;XXolX3MAAAAJ;;;https://scholar.google.com.tw/citations?user=q1HlbIUAAAAJ", "orcid": "0000-0001-8237-9285;;;;", "linkedin": ";;navdeepsjb/;;", "or_profile": "~Kaixin_Wang1;~Uri_Gadot1;~Navdeep_Kumar1;~Kfir_Yehuda_Levy1;~Shie_Mannor2", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion;Technion - Israel Institute of Technology, Technion", "aff_domain": "campus.technion.ac.il;campus.technion.ac.il;campus.technion.ac.il;technion.ac.il;technion.il", "position": "Postdoc;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nwang2024ewok,\ntitle={{EW}oK: Tackling Robust Markov Decision Processes via Estimating Worst Kernel},\nauthor={Kaixin Wang and Uri Gadot and Navdeep Kumar and Kfir Yehuda Levy and Shie Mannor},\nyear={2024},\nurl={https://openreview.net/forum?id=8WH6ZlDad6}\n}", "github": "", "project": "", "reviewers": "rzcF;QAuy;qiNv;vpVH", "site": "https://openreview.net/forum?id=8WH6ZlDad6", "pdf_size": 7772774, "rating": "3;6;6;6", "confidence": "4;2;4;3", "soundness": "2;3;3;3", "contribution": "1;3;2;3", "presentation": "2;2;3;2", "wc_summary": "34;86;59;96", "wc_strengths": "25;79;62;113", "wc_weaknesses": "118;80;137;310", "wc_questions": "178;97;148;31", "wc_review": "355;342;406;550", "wc_reply_reviewers": "609;23;0;106", "wc_reply_authors": "1235;259;630;423", "reply_reviewers": "2;1;0;1", "reply_authors": "4;2;2;3", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.75, 24.200981385059574 ], "wc_strengths_avg": [ 69.75, 31.696805832764916 ], "wc_weaknesses_avg": [ 161.25, 88.29885333343803 ], "wc_questions_avg": [ 113.5, 55.74271252818614 ], "wc_review_avg": [ 413.25, 82.49659083865224 ], "wc_reply_reviewers_avg": [ 184.5, 248.23627857345912 ], "wc_reply_authors_avg": [ 636.75, 369.57162702242175 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1383827578533557120&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Israel" }, { "title": "OmniQuant: Omnidirectionally Calibrated Quantization for Large Language Models", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19323", "id": "8Wuvhh0LYW", "author_site": "Wenqi Shao, Mengzhao Chen, Zhaoyang Zhang, Peng Xu, Lirui Zhao, Zhiqian Li, Kaipeng Zhang, Gao Peng, Yu Qiao, Ping Luo", "tldr": "", "abstract": "Large language models (LLMs) have revolutionized natural language processing tasks. However, their practical deployment is hindered by their immense memory and computation requirements. Although recent post-training quantization (PTQ) methods are effective in reducing memory footprint and improving the computational efficiency of LLM, they hand-craft quantization parameters, leading to low performance, especially in extremely low-bit quantization. To tackle this issue, we introduce an Omnidirectionally calibrated Quantization ($\\textbf{OmniQuant}$) technique for LLMs, which achieves good performance in diverse quantization settings while maintaining the computational efficiency of PTQ by efficiently optimizing various quantization parameters. OmniQuant comprises two innovative components including Learnable Weight Clipping (LWC) and Learnable Equivalent Transformation (LET). LWC modulates the extreme values of weights by optimizing the clipping threshold. Meanwhile, LET tackles activation outliers by shifting the challenge of quantization from activations to weights. Operating within a differentiable framework using block-wise error minimization, OmniQuant can optimize the quantization process efficiently for both weight-only and weight-activation quantization. For instance, the LLaMA-2 model family size 7-70B can be processed with OmniQuant on a single A100-40G GPU within 1-16 hours using 128 samples. Extensive experiments validate OmniQuant's superior performance across diverse quantization configurations such as W4A4 (4-bit weight, 4-bit activation), W6A6, W4A16, W3A16, and W2A16. Additionally, OmniQuant demonstrates effectiveness in instruction-tuned models and delivers notable improvements in inference speed and memory reduction on real devices. Codes are available at \n\\url{https://github.com/OpenGVLab/OmniQuant}.", "keywords": "Large Language Model Compression;Differentiable Quantization", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/0974848a3a6e6d9966e1d4d1ac74e251b28c3d0c.zip", "author": "Wenqi Shao;Mengzhao Chen;Zhaoyang Zhang;Peng Xu;Lirui Zhao;Zhiqian Li;Kaipeng Zhang;Peng Gao;Yu Qiao;Ping Luo", "authorids": "~Wenqi_Shao2;~Mengzhao_Chen1;~Zhaoyang_Zhang1;~Peng_Xu11;~Lirui_Zhao1;~Zhiqian_Li1;~Kaipeng_Zhang1;~Peng_Gao3;~Yu_Qiao1;~Ping_Luo2", "gender": "M;M;M;M;M;F;M;;;", "homepage": "https://wqshao126.github.io/;https://chenmnz.github.io/;https://zzyfd.github.io/#/;;https://github.com/Lirui-Zhao;;http://kpzhang93.github.io/;;;", "dblp": "227/3122;301/9459;;;;;179/2126;;;", "google_scholar": "Bs9mrwwAAAAJ;https://scholar.google.com.hk/citations?user=dN7UtFkAAAAJ;Pf6o7uAAAAAJ;;;;4OqZBmYAAAAJ;;;", "orcid": ";;;;;;;;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/peng-xu-250466206;;zhiqian-li-a19727205/;;;;", "or_profile": "~Wenqi_Shao2;~Mengzhao_Chen1;~Zhaoyang_Zhang1;~Peng_Xu11;~Lirui_Zhao1;~Zhiqian_Li1;~Kaipeng_Zhang1;~Peng_Gao3;~Yu_Qiao1;~Ping_Luo2", "aff": "Shanghai AI Laboratory;Xiamen University;The Chinese University of Hong Kong;University of Hong Kong;Xiamen University;University of Hong Kong;Shanghai AI Laboratory;;;", "aff_domain": "pjlab.org.cn;xmu.edu.cn;cuhk.edu.hk;hku.hk;xmu.edu.cn;hku.hk;pjlab.org.cn;;;", "position": "Researcher;MS student;PhD student;PhD student;MS student;Undergrad student;Researcher;;;", "bibtex": "@inproceedings{\nshao2024omniquant,\ntitle={OmniQuant: Omnidirectionally Calibrated Quantization for Large Language Models},\nauthor={Wenqi Shao and Mengzhao Chen and Zhaoyang Zhang and Peng Xu and Lirui Zhao and Zhiqian Li and Kaipeng Zhang and Peng Gao and Yu Qiao and Ping Luo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8Wuvhh0LYW}\n}", "github": "", "project": "", "reviewers": "PTbL;WDuc;ZyAK;FuSa;ngyH", "pdf_size": 884767, "rating": "6;6;6;6;8", "confidence": "5;4;5;4;5", "soundness": "3;2;3;3;3", "contribution": "3;2;3;3;3", "presentation": "3;3;3;3;2", "wc_summary": "92;71;47;64;210", "wc_strengths": "53;66;50;69;115", "wc_weaknesses": "67;453;41;191;91", "wc_questions": "394;5;81;6;22", "wc_review": "606;595;219;330;438", "wc_reply_reviewers": "27;229;42;36;23", "wc_reply_authors": "1590;2841;922;513;889", "reply_reviewers": "1;2;1;1;1", "reply_authors": "5;6;3;3;3", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 96.8, 58.410273069041544 ], "wc_strengths_avg": [ 70.6, 23.36321895629966 ], "wc_weaknesses_avg": [ 168.6, 151.00410590444218 ], "wc_questions_avg": [ 101.6, 148.81478421178454 ], "wc_review_avg": [ 437.6, 149.99813332171837 ], "wc_reply_reviewers_avg": [ 71.4, 79.08122406741059 ], "wc_reply_authors_avg": [ 1351.0, 821.8211484258603 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 4.0, 1.2649110640673518 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 246, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17294293173749479580&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=8Wuvhh0LYW", "pdf": "https://openreview.net/pdf?id=8Wuvhh0LYW", "email": "pjlab.org.cn;xmu.edu.cn;cuhk.edu.hk;hku.hk;xmu.edu.cn;hku.hk;pjlab.org.cn;;;", "author_num": 10, "aff_unique_index": "0;1;2;3;1;3;0", "aff_unique_norm": "Shanghai AI Laboratory;Xiamen University;Chinese University of Hong Kong;University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.xmu.edu.cn;https://www.cuhk.edu.hk;https://www.hku.hk", "aff_unique_abbr": "SAIL;XMU;CUHK;HKU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8XgCH9y1Bs", "title": "3D Object Representation Learning for Robust Classification and Pose estimation", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this work, we pioneer a framework for 3D object representation learning that achieves exceptionally robust classification and pose estimation results. In particular, we introduce a 3D representation of object categories using a 3D template mesh composed of feature vectors at each mesh vertex. Our model predicts, for each pixel in a 2D image, a feature vector of the corresponding vertex in each category template mesh, hence establishing dense correspondences between image pixels and the 3D template geometry of all target object categories. The feature vectors on the mesh vertices are trained to be viewpoint invariant by leveraging associated camera poses. During inference, we efficiently estimate the object class and pose by matching the class-specific templates to a target feature map in a two-step process: First, we classify the image by matching the vertex features of each template to an input feature map. Interestingly, we found that image classification can be performed using the vertex features only and without requiring the 3D mesh geometry, hence making the class label inference very efficient. In a second step, the object pose can be inferred using a render-and-compare matching process that ensures spatial consistency between the detected vertices. Our experiments on image classification demonstrate that our proposed 3D object representation has a number of profound advantages over classical image-based representations. First, it is exceptionally robust on a range of real-world and synthetic out-of-distribution shifts while performing on par with state-of-the-art architectures on in-distribution data in terms of accuracy and speed. Second, the estimated object pose is competitive with baseline models that were explicitly designed for pose estimation, but that cannot classify images. Finally, we show that our model has an enhanced interpretability by visualizing the individual vertex matches and the ability to perform classification and pose estimation jointly and consistently.", "keywords": "classification;3D-pose estimation;analysis-by-synthesis;render-and-compare", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/d5fb8bce79d4790099daf4e95353a2977d370d9d.zip", "author": "Artur Jesslen;Guofeng Zhang;Angtian Wang;Alan Yuille;Adam Kortylewski", "authorids": "~Artur_Jesslen1;~Guofeng_Zhang4;~Angtian_Wang2;~Alan_Yuille1;~Adam_Kortylewski1", "gender": "M;M;M;M;", "homepage": "https://arturjssln.github.io;https://richard-guofeng-zhang.github.io/;https://angtianwang.github.io/;;https://gvrl.mpi-inf.mpg.de/", "dblp": "345/3057;;;y/AlanLYuille;161/0772", "google_scholar": ";vl0mzhEAAAAJ;YR7re-cAAAAJ;;https://scholar.google.ch/citations?user=tRLUOBIAAAAJ", "orcid": "0000-0002-4837-8163;;;;0000-0002-9146-4403", "linkedin": "artur-jesslen/;;;;", "or_profile": "~Artur_Jesslen1;~Guofeng_Zhang4;~Angtian_Wang2;~Alan_Yuille1;~Adam_Kortylewski1", "aff": "University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "cs.uni-freiburg.de;jhu.edu;jhu.edu;johnshopkins.edu;uni-freiburg.de", "position": "PhD student;PhD student;PhD student;Full Professor;Research Group Leader", "bibtex": "@misc{\njesslen2024d,\ntitle={3D Object Representation Learning for Robust Classification and Pose estimation},\nauthor={Artur Jesslen and Guofeng Zhang and Angtian Wang and Alan Yuille and Adam Kortylewski},\nyear={2024},\nurl={https://openreview.net/forum?id=8XgCH9y1Bs}\n}", "github": "", "project": "", "reviewers": "LehM;35dn;Yc1n;wnbh", "site": "https://openreview.net/forum?id=8XgCH9y1Bs", "pdf_size": 10851286, "rating": "3;5;6;6", "confidence": "3;4;4;3", "soundness": "2;2;3;3", "contribution": "1;2;3;2", "presentation": "3;2;3;3", "wc_summary": "244;81;80;94", "wc_strengths": "83;46;71;54", "wc_weaknesses": "228;181;81;98", "wc_questions": "46;5;7;200", "wc_review": "601;313;239;446", "wc_reply_reviewers": "190;49;63;204", "wc_reply_authors": "876;734;313;1458", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 124.75, 69.07016360194899 ], "wc_strengths_avg": [ 63.5, 14.430869689661812 ], "wc_weaknesses_avg": [ 147.0, 60.153969112603036 ], "wc_questions_avg": [ 64.5, 79.92027277230727 ], "wc_review_avg": [ 399.75, 137.84660859085363 ], "wc_reply_reviewers_avg": [ 126.5, 70.84666541200087 ], "wc_reply_authors_avg": [ 845.25, 409.9008264202452 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xO1UbHRLSTMJ:scholar.google.com/&scioq=3D+Object+Representation+Learning+for+Robust+Classification+and+Pose+estimation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "University of Freiburg;Johns Hopkins University;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-freiburg.de;https://www.jhu.edu;https://www.uni-freiburg.de", "aff_unique_abbr": "UoF;JHU;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Freiburg;", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Germany;United States" }, { "id": "8Xx0mKoCMd", "title": "ExoViP: Step-by-step Verification and Exploration with Exoskeleton Modules for Compositional Visual Reasoning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Compositional visual reasoning methods, which translate a complex query into a structured composition of feasible visual tasks, have exhibited a strong potential in complicated multimodal tasks like visual question answering, language-guided image editing, etc. Empowered by recent advances in large language models (LLMs), this multimodal challenge has been brought to a new stage by treating LLMs as few-shot/zero-shot planners, i.e., visual-language programming.\nSuch methods, despite their numerous merits, suffer from challenges due to LLM planning mistakes or inaccuracy of visual execution modules, lagging behind the non-compositional models.\nIn this work, we devise a \"plug-and-play\" method, ExoViP, to correct the errors at both the planning and execution stages through introspective verification. We employ verification modules as \"exoskeletons\" to enhance current vision-language programming schemes. Specifically, our proposed verification module utilizes a mixture of three sub-verifiers to validate predictions after each reasoning step, subsequently calibrating the visual module predictions and refining the reasoning trace planned by LLMs. \nExperimental results on two representative vision-language programming methods showcase consistent improvements on five compositional reasoning tasks on standard benchmarks. In light of this, we believe ExoViP can foster better performance and generalization on open-domain multimodal challenges.", "keywords": "Compositional Reasoning;Multimodality", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/35577ec20819897a1094f0e7aaddda3b59f3bc9f.pdf", "author": "Yuxuan Wang;Alan Yuille;Zhuowan Li;Zilong Zheng", "authorids": "~Yuxuan_Wang4;~Alan_Yuille1;~Zhuowan_Li1;~Zilong_Zheng1", "gender": "M;M;F;M", "homepage": "https://github.com/patrick-tssn;;https://lizw14.github.io;http://zilongzheng.github.io", "dblp": ";y/AlanLYuille;228/6963;218/5234", "google_scholar": "jNCX2g0AAAAJ;;Fft1WvwAAAAJ;9sDx70IAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yuxuan_Wang4;~Alan_Yuille1;~Zhuowan_Li1;~Zilong_Zheng1", "aff": "Beijing Institute for General Artificial Intelligence;Johns Hopkins University;Johns Hopkins University;Beijing Institute for General Artificial Intelligence", "aff_domain": "bigai.ai;johnshopkins.edu;jhu.edu;bigai.ai", "position": "Researcher;Full Professor;PhD student;Researcher", "bibtex": "@misc{\nwang2024exovip,\ntitle={ExoViP: Step-by-step Verification and Exploration with Exoskeleton Modules for Compositional Visual Reasoning},\nauthor={Yuxuan Wang and Alan Yuille and Zhuowan Li and Zilong Zheng},\nyear={2024},\nurl={https://openreview.net/forum?id=8Xx0mKoCMd}\n}", "github": "", "project": "", "reviewers": "atdm;2ndj;rVcU;Tiuv", "site": "https://openreview.net/forum?id=8Xx0mKoCMd", "pdf_size": 1944140, "rating": "5;5;6;6", "confidence": "3;5;2;3", "soundness": "2;4;4;3", "contribution": "2;2;4;3", "presentation": "2;3;3;3", "wc_summary": "81;90;74;113", "wc_strengths": "58;149;86;65", "wc_weaknesses": "299;202;30;134", "wc_questions": "133;162;2;202", "wc_review": "571;603;192;514", "wc_reply_reviewers": "0;66;0;48", "wc_reply_authors": "2112;1350;505;978", "reply_reviewers": "0;1;0;1", "reply_authors": "4;4;2;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.5, 14.705441169852742 ], "wc_strengths_avg": [ 89.5, 35.864327680858594 ], "wc_weaknesses_avg": [ 166.25, 98.11313622548207 ], "wc_questions_avg": [ 124.75, 74.9845817485168 ], "wc_review_avg": [ 470.0, 163.6383206953677 ], "wc_reply_reviewers_avg": [ 28.5, 29.201883500897676 ], "wc_reply_authors_avg": [ 1236.25, 587.64290815086 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5136323398738501800&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Beijing Institute for General Artificial Intelligence;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bigaiai.org/;https://www.jhu.edu", "aff_unique_abbr": "BIGAI;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "8ZW3oLNE0c", "title": "SEArch: A Self-Evolving Framework for Network Architecture Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper studies a fundamental network optimization problem that finds a network architecture with optimal performance (low losses) under given resource budgets (small parameter size and/or fast inference). Different from existing network optimization approaches such as network pruning, knowledge distillation (KD), and network architecture search (NAS), in this work we introduce a novel self-evolving pipeline to perform network optimization. In this framework, a simple network iteratively and adaptively modifies its structures by using the guidance from the teacher network, until it reaches the resource budget. An attention module is introduced to transfer the knowledge from teacher network to student network. The splitting edge scheme helps the student model find an optimal macro architecture. The proposed framework combines the advantages of pruning, KD, and NAS, and hence, can efficiently generate networks with flexible structure and desirable performance. Extensive experiments on CIFAR-10, CIFAR-100 and ImageNet demonstrated that our framework achieves state-of-the-art performance in this network architecture optimization task.", "keywords": "network architecture optimization;network pruning;knowledge distillation", "primary_area": "optimization", "supplementary_material": "", "author": "Yongqing Liang;Dawei Xiang;Xin Li", "authorids": "~Yongqing_Liang1;xiangdw@tamu.edu;~Xin_Li52", "gender": "M;;M", "homepage": "https://lyq.me/scholar;;https://people.tamu.edu/~xinli/", "dblp": ";;09/1365-3", "google_scholar": "bDo9RPwAAAAJ;;Begpk8wAAAAJ", "orcid": "0000-0002-7282-0476;;0000-0002-0144-9489", "linkedin": ";;xin-shane-li-31427443", "or_profile": "~Yongqing_Liang1;xiangdw@tamu.edu;~Xin_Li52", "aff": "Texas A&M University - College Station;;Texas A&M University - College Station", "aff_domain": "tamu.edu;;tamu.edu", "position": "PhD student;;Full Professor", "bibtex": "@misc{\nliang2024search,\ntitle={{SEA}rch: A Self-Evolving Framework for Network Architecture Optimization},\nauthor={Yongqing Liang and Dawei Xiang and Xin Li},\nyear={2024},\nurl={https://openreview.net/forum?id=8ZW3oLNE0c}\n}", "github": "", "project": "", "reviewers": "rdLr;6dDt;xUVA", "site": "https://openreview.net/forum?id=8ZW3oLNE0c", "pdf_size": 580495, "rating": "3;5;5", "confidence": "3;4;4", "soundness": "3;2;3", "contribution": "1;2;2", "presentation": "3;3;2", "wc_summary": "44;58;96", "wc_strengths": "31;48;29", "wc_weaknesses": "117;81;200", "wc_questions": "65;4;4", "wc_review": "257;191;329", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 66.0, 21.96967607104544 ], "wc_strengths_avg": [ 36.0, 8.524474568362947 ], "wc_weaknesses_avg": [ 132.66666666666666, 49.828595092465626 ], "wc_questions_avg": [ 24.333333333333332, 28.755675768252935 ], "wc_review_avg": [ 259.0, 56.356011214421486 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bTBy1m-mvH8J:scholar.google.com/&scioq=SEArch:+A+Self-Evolving+Framework+for+Network+Architecture+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Station", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "8ayoKVFmxp", "title": "QualEval: Qualitative Evaluation for Model Improvement", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Quantitative evaluation metrics have played a central role in measuring the progress of natural language systems (NLP) systems like large language models (LLMs) thus far, but they come with their own weaknesses. Given the complex and intricate nature of real-world tasks, a simple scalar to quantify and compare models is a gross trivialization of model behavior that ignores its idiosyncrasies. As a result, scalar evaluation metrics like accuracy make the actual model improvement process an arduous one. It currently involves a lot of manual effort which includes analyzing a large number of data points and making hit-or-miss changes to the training data or setup. This process is even more excruciating when this analysis needs to be performed on a cross-product of multiple models and datasets. In this work, we address the shortcomings of quantitative metrics by proposing our method QualEval, which enables automated qualitative evaluation as a vehicle for model improvement. QualEval provides a comprehensive dashboard with fine-grained analysis and human-readable insights to improve the model. We show that utilizing the dashboard generated by QualEval improves performance by up to 12% relatively on a variety of datasets, thus leading to agile model development cycles both on open-source and closed-source models and on a variety of setups like fine-tuning and in-context learning. In essence, QualEval serves as an automated data-scientist-in-a-box. Given the focus on critiquing and improving current evaluation metrics, our method serves as a refreshingly new technique towards both model evaluation and improvement.", "keywords": "qualitative evaluation;evaluation;framework", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Vishvak Murahari;Ameet Deshpande;Peter Clark;Tanmay Rajpurohit;Ashish Sabharwal;Karthik R Narasimhan;Ashwin Kalyan", "authorids": "~Vishvak_Murahari1;~Ameet_Deshpande1;~Peter_Clark1;~Tanmay_Rajpurohit1;~Ashish_Sabharwal1;~Karthik_R_Narasimhan1;~Ashwin_Kalyan6", "gender": "M;M;M;M;M;M;M", "homepage": "https://vishvakmurahari.com/;https://allenai.org/team/peterc;;;http://www.karthiknarasimhan.com;https://ameet-1997.github.io;http://ashwinkalyan.com/", "dblp": "249/5621;34/1184;;13/154;147/0322;220/4337;173/5217", "google_scholar": "Y_NYX7MAAAAJ;o-5vyEsAAAAJ;B4NztA8AAAAJ;7VspfeAAAAAJ;euc0GX4AAAAJ;332L1coAAAAJ;KYHL9aIAAAAJ", "orcid": ";;;;;;", "linkedin": ";peter-clark-a8b556/;tanmay-rajpurohit-b13942125/;ashish-sabharwal-82a2b661;;;", "or_profile": "~Vishvak_Murahari1;~Peter_Clark1;~Tanmay_Rajpurohit1;~Ashish_Sabharwal1;~Karthik_R_Narasimhan1;~Ameet_S_Deshpande1;~Ashwin_Kalyan_Vijayakumar1", "aff": "Princeton University;Allen Institute for Artificial Intelligence;Independent Researcher;Allen Institute for AI;Princeton University;Princeton University;Allen Institute for Artificial Intelligence", "aff_domain": "princeton.edu;allenai.org;tanmay.one;allenai.org;princeton.edu;princeton.edu;allenai.org", "position": "PhD student;Senior Research Manager;Researcher;Principal Researcher;Assistant Professor;PhD student;Research Scientist", "bibtex": "@misc{\nmurahari2024qualeval,\ntitle={QualEval: Qualitative Evaluation for Model Improvement},\nauthor={Vishvak Murahari and Ameet Deshpande and Peter Clark and Tanmay Rajpurohit and Ashish Sabharwal and Karthik R Narasimhan and Ashwin Kalyan},\nyear={2024},\nurl={https://openreview.net/forum?id=8ayoKVFmxp}\n}", "github": "", "project": "", "reviewers": "BTXw;hZVP;2Tzm", "site": "https://openreview.net/forum?id=8ayoKVFmxp", "pdf_size": 36036938, "rating": "1;3;3", "confidence": "4;2;4", "soundness": "1;3;3", "contribution": "2;3;2", "presentation": "2;2;3", "wc_summary": "46;53;89", "wc_strengths": "35;53;51", "wc_weaknesses": "298;139;151", "wc_questions": "6;30;38", "wc_review": "385;275;329", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 62.666666666666664, 18.83849486792639 ], "wc_strengths_avg": [ 46.333333333333336, 8.055363982396383 ], "wc_weaknesses_avg": [ 196.0, 72.29107828771127 ], "wc_questions_avg": [ 24.666666666666668, 13.59738536958076 ], "wc_review_avg": [ 329.6666666666667, 44.90978611493144 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5000000000000001, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16924907597993779907&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;0;0;1", "aff_unique_norm": "Princeton University;Allen Institute for Artificial Intelligence;Independent Researcher;Allen Institute for AI", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.princeton.edu;https://allenai.org;;https://allenai.org", "aff_unique_abbr": "Princeton;AI2;;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "8cNMMrWRbZ", "title": "LMRL Gym: Benchmarks for Multi-Turn Reinforcement Learning with Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) provide excellent text-generation capabilities, but standard prompting and generation methods generally do not lead to intentional or goal-directed agents and might necessitate considerable prompt tuning. This becomes particularly apparent in multi-turn conversations: even the best current LLMs rarely ask clarifying questions, engage in explicit information gathering, or take actions now that lead to better decisions after multiple turns. Reinforcement learning has the potential to leverage the powerful modeling capabilities of LLMs, as well as their internal representation of textual interactions, to create capable goal-directed language agents. This can enable intentional and temporally extended interactions, such as with humans, through coordinated persuasion and carefully crafted questions, or in goal-directed play through text games to bring about desired final outcomes. However, enabling this requires the community to develop stable and reliable reinforcement learning algorithms that can effectively train LLMs. Developing such algorithms requires tasks that can gauge progress on algorithm design, provide accessible and reproducible evaluations for multi-turn interactions, and cover a range of task properties and challenges in improving reinforcement learning algorithms. Our paper introduces the LMRL-Gym benchmark for evaluating multi-turn RL for LLMs, together with an open-source research framework containing a basic toolkit for getting started on multi-turn RL with offline value-based and policy-based RL methods. Our benchmark consists of 8 different language tasks, which require multiple rounds of language interaction and cover a range of tasks in open-ended dialogue and text games", "keywords": "benchmarks;RL;LLMs;offline reinforcement learning", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/e4ee6cd1641958842d9b76d430bd3ef903f69b0b.zip", "author": "Marwa Abdulhai;Isadora White;Charlie Victor Snell;Charles Sun;Joey Hong;Yuexiang Zhai;Kelvin Xu;Sergey Levine", "authorids": "~Marwa_Abdulhai1;~Isadora_White1;~Charlie_Victor_Snell1;~Charles_Sun1;~Joey_Hong2;~Yuexiang_Zhai1;~Kelvin_Xu2;~Sergey_Levine1", "gender": ";Non-Binary;M;M;M;;Unspecified;M", "homepage": "https://abdulhaim.github.io/;https://icwhite.github.io/website/;https://sea-snell.github.io;https://charlesjsun.github.io/;;;http://kelvinxu.github.io/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "277/9482;;;;188/6056.html;241/6124.html;159/1894;80/7594", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;dD7EpwQAAAAJ;;SiBVfPUAAAAJ;78WTKm4AAAAJ;GyoKzFwAAAAJ;8R35rCwAAAAJ", "orcid": ";;;;;;;", "linkedin": ";isadora-c-white/;;;;;;", "or_profile": "~Marwa_Abdulhai1;~Isadora_White1;~Charlie_Victor_Snell1;~Charles_Sun1;~Joey_Hong2;~Yuexiang_Zhai1;~Kelvin_Xu2;~Sergey_Levine1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Google", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;google.com", "position": "PhD student;Undergrad student;PhD student;Undergrad student;PhD student;PhD student;PhD student;Research Scientist", "bibtex": "@misc{\nabdulhai2024lmrl,\ntitle={{LMRL} Gym: Benchmarks for Multi-Turn Reinforcement Learning with Language Models},\nauthor={Marwa Abdulhai and Isadora White and Charlie Victor Snell and Charles Sun and Joey Hong and Yuexiang Zhai and Kelvin Xu and Sergey Levine},\nyear={2024},\nurl={https://openreview.net/forum?id=8cNMMrWRbZ}\n}", "github": "", "project": "", "reviewers": "6Fq6;2ASy;fFLM;BtsG", "site": "https://openreview.net/forum?id=8cNMMrWRbZ", "pdf_size": 1601889, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "2;2;3;2", "contribution": "3;3;2;2", "presentation": "3;2;3;2", "wc_summary": "158;77;108;96", "wc_strengths": "32;129;116;54", "wc_weaknesses": "181;360;99;300", "wc_questions": "79;10;67;230", "wc_review": "450;576;390;680", "wc_reply_reviewers": "36;89;11;105", "wc_reply_authors": "1435;1235;581;925", "reply_reviewers": "1;1;1;1", "reply_authors": "4;4;3;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 109.75, 29.969776442275975 ], "wc_strengths_avg": [ 82.75, 40.76380134383936 ], "wc_weaknesses_avg": [ 235.0, 101.56524996277024 ], "wc_questions_avg": [ 96.5, 81.36491873037176 ], "wc_review_avg": [ 524.0, 112.32987136109433 ], "wc_reply_reviewers_avg": [ 60.25, 38.21894163893082 ], "wc_reply_authors_avg": [ 1044.0, 323.2228333518534 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14634267088299559076&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0;0;0;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;0;0;0;0;0;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Uncertainty-aware Graph-based Hyperspectral Image Classification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19322", "id": "8dN7gApKm3", "author_site": "Linlin Yu, Yifei Lou, Feng Chen", "tldr": "", "abstract": "Hyperspectral imaging (HSI) technology captures spectral information across a broad wavelength range, providing richer pixel features compared to traditional color images with only three channels. Although pixel classification in HSI has been extensively studied, especially using graph convolution neural networks (GCNs), quantifying epistemic and aleatoric uncertainties associated with the HSI classification (HSIC) results remains an unexplored area. These two uncertainties are effective for out-of-distribution (OOD) and misclassification detection, respectively. In this paper, we adapt two advanced uncertainty quantification models, evidential GCNs (EGCN) and graph posterior networks (GPN), designed for node classifications in graphs, into the realm of HSIC. We first reveal theoretically that a popular uncertainty cross-entropy (UCE) loss function is insufficient to produce good epistemic uncertainty when learning EGCNs. To mitigate the limitations, we propose two regularization terms. One leverages the inherent property of HSI data where each feature vector is a linear combination of the spectra signatures of the confounding materials, while the other is the total variation (TV) regularization to enforce the spatial smoothness of the evidence with edge-preserving. We demonstrate the effectiveness of the proposed regularization terms on both EGCN and GPN on three real-world HSIC datasets for OOD and misclassification detection tasks. The code is available at GitHub.", "keywords": "Uncertainty Quantification;Graph;Hyperspectral Image Classification", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Linlin Yu;Yifei Lou;Feng Chen", "authorids": "~Linlin_Yu1;~Yifei_Lou2;~Feng_Chen7", "gender": "F;F;M", "homepage": ";https://sites.google.com/site/louyifei/;https://personal.utdallas.edu/~fxc190007/", "dblp": "204/9716;;21/3047-1", "google_scholar": "https://scholar.google.com/citations?hl=en;iCiUflEAAAAJ;KOQ-SSYAAAAJ", "orcid": "0009-0001-5690-9905;0000-0003-1973-5704;", "linkedin": "linlin-yu-723884249/;;", "or_profile": "~Linlin_Yu1;~Yifei_Lou2;~Feng_Chen7", "aff": "The University of Texas at Dallas;University of North Carolina at Chapel Hill;University of Texas, Dallas", "aff_domain": "cs.utdallas.edu;unc.edu;utdallas.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nyu2024uncertaintyaware,\ntitle={Uncertainty-aware Graph-based Hyperspectral Image Classification},\nauthor={Linlin Yu and Yifei Lou and Feng Chen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8dN7gApKm3}\n}", "github": "", "project": "", "reviewers": "6WzU;pixa;NxUY;WkZQ;SyHe", "pdf_size": 1832303, "rating": "5;5;6;6;6", "confidence": "4;2;4;2;3", "soundness": "2;2;2;3;4", "contribution": "2;3;2;3;3", "presentation": "2;2;3;2;2", "wc_summary": "36;54;72;85;35", "wc_strengths": "60;28;69;74;60", "wc_weaknesses": "96;159;148;218;95", "wc_questions": "207;35;5;58;4", "wc_review": "399;276;294;435;194", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1095;2237;884;698;1126", "reply_reviewers": "0;0;0;0;0", "reply_authors": "2;4;2;1;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 2.6, 0.8 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 56.4, 19.7038067388005 ], "wc_strengths_avg": [ 58.2, 16.0299719276111 ], "wc_weaknesses_avg": [ 143.2, 45.64822011864208 ], "wc_questions_avg": [ 61.8, 75.34294923879739 ], "wc_review_avg": [ 319.6, 87.12427905010175 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1208.0, 537.2913548532118 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7656154241134560566&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=8dN7gApKm3", "pdf": "https://openreview.net/pdf?id=8dN7gApKm3", "email": "cs.utdallas.edu;unc.edu;utdallas.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Dallas;University of North Carolina", "aff_unique_dep": ";", "aff_unique_url": "https://www.utdallas.edu;https://www.unc.edu", "aff_unique_abbr": "UT Dallas;UNC", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Dallas;Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8dkp41et6U", "title": "LongLLMLingua: Accelerating and Enhancing LLMs in Long Context Scenarios via Prompt Compression", "track": "main", "status": "Reject", "tldr": "", "abstract": "In long context scenarios, large language models (LLMs) face three main challenges: higher computational/financial cost, longer latency, and inferior performance. Some studies reveal that the performance of LLMs depends on both the density and the position of the key information (question relevant) in the input prompt. Inspired by these findings, we propose LongLLMLingua for prompt compression towards improving LLMs\u2019 perception of the key information to simultaneously address the three challenges. We conduct evaluation on a wide range of long context scenarios including single-/multi-document QA, few-shot learning, summarization, synthetic tasks, and code completion. and experimental results show that LongLLMLingua compressed prompt can derive higher performance with much less cost. The latency of the end-to-end system is also reduced. For example, on NaturalQuestions benchmark, LongLLMLingua gains a performance boost of up to 17.1% over the original prompt with \u223c4x fewer tokens as input to GPT-3.5-Turbo. It can derive cost savings of `$`28.5 and `$`27.4 per 1,000 samples from the LongBench and ZeroScrolls benchmark, respectively. Additionally, when compressing prompts of \u223c10k tokens at a compression rate of 2x-10x, LongLLMLingua can speed up the end-to-end latency by 1.4x-3.8x.", "keywords": "Prompt Compression;Long Context;LLMs;Black-box LLMs;Efficient Method", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/a6731934d7958d6ae28a710aa9b4f8508ce1a21f.zip", "author": "Huiqiang Jiang;Qianhui Wu;Xufang Luo;Dongsheng Li;Chin-Yew Lin;Yuqing Yang;Lili Qiu", "authorids": "~Huiqiang_Jiang2;~Qianhui_Wu1;~Xufang_Luo1;~Dongsheng_Li2;~Chin-Yew_Lin1;~Yuqing_Yang1;~Lili_Qiu3", "gender": "M;F;F;M;M;;", "homepage": "https://hqjiang.com;https://qianhuiwu.github.io/;;http://recmind.cn;https://www.microsoft.com/en-us/research/people/cyl/;;https://www.microsoft.com/en-us/research/people/liliqiu/", "dblp": "204/2497;204/2307;218/7350;254/0830-2.html;64/6843;91/9064-1.html;", "google_scholar": "99KtvpYAAAAJ;BLZieokAAAAJ;;VNg5rA8AAAAJ;cDF07aYAAAAJ;4BtNQAEAAAAJ;", "orcid": "0000-0002-1327-4882;;;0000-0003-3103-8442;;0000-0003-3518-5212;", "linkedin": ";qianhui-wu-2b1608b7?originalSubdomain=cn;;;chin-yew-lin-32585a4;;", "or_profile": "~Huiqiang_Jiang2;~Qianhui_Wu1;~Xufang_Luo1;~Dongsheng_Li2;~Chin-Yew_Lin1;~Yuqing_Yang1;~Lili_Qiu3", "aff": "Microsoft;Microsoft;Microsoft Research;Microsoft Research Asia;Microsoft;Microsoft Research;University of Texas at Austin", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;utexas.edu", "position": "RSDE;Researcher;Researcher;Principal Researcher;Senior Principal Research Manager;Researcher;Full Professor", "bibtex": "@misc{\njiang2024longllmlingua,\ntitle={Long{LLML}ingua: Accelerating and Enhancing {LLM}s in Long Context Scenarios via Prompt Compression},\nauthor={Huiqiang Jiang and Qianhui Wu and Xufang Luo and Dongsheng Li and Chin-Yew Lin and Yuqing Yang and Lili Qiu},\nyear={2024},\nurl={https://openreview.net/forum?id=8dkp41et6U}\n}", "github": "", "project": "", "reviewers": "iM9S;jRw6;qdfA;PhaL;qC5x", "site": "https://openreview.net/forum?id=8dkp41et6U", "pdf_size": 2349383, "rating": "5;6;6;6;6", "confidence": "4;4;3;5;4", "soundness": "2;2;3;3;3", "contribution": "3;2;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "124;24;75;61;37", "wc_strengths": "64;56;34;69;61", "wc_weaknesses": "682;41;219;206;70", "wc_questions": "183;5;2;4;86", "wc_review": "1053;126;330;340;254", "wc_reply_reviewers": "0;16;12;15;0", "wc_reply_authors": "1186;127;161;373;447", "reply_reviewers": "0;1;1;1;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 64.2, 34.81034329046469 ], "wc_strengths_avg": [ 56.8, 12.155657119218196 ], "wc_weaknesses_avg": [ 243.6, 230.3897567167429 ], "wc_questions_avg": [ 56.0, 71.06335201775947 ], "wc_review_avg": [ 420.6, 325.32420752228074 ], "wc_reply_reviewers_avg": [ 8.6, 7.1442284397967 ], "wc_reply_authors_avg": [ 458.8, 383.43103682409435 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 205, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13295521544032790503&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Microsoft;University of Texas at Austin", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.utexas.edu", "aff_unique_abbr": "Microsoft;UT Austin", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Austin", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Prometheus: Inducing Fine-Grained Evaluation Capability in Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19321", "id": "8euJaTveKw", "author_site": "Seungone Kim, Jamin Shin, yejin cho, Joel Jang, Shayne Longpre, Hwaran Lee, Sangdoo Yun, Ryan, S Shin, Sungdong Kim, James Thorne, Minjoon Seo", "tldr": "", "abstract": "Recently, GPT-4 has become the de facto evaluator for long-form text generated by large language models (LLMs). However, for practitioners and researchers with large and custom evaluation tasks, GPT-4 is unreliable due to its closed-source nature, uncontrolled versioning, and prohibitive costs. In this work, we propose PROMETHEUS a fully open-source LLM that is on par with GPT-4\u2019s evaluation capabilities when the appropriate reference materials (reference answer, score rubric) are accompanied. For this purpose, we construct a new dataset \u2013 FEEDBACK COLLECTION \u2013 that consists of 1K fine-grained score rubrics, 20K instructions, and 100K natural language feedback generated by GPT-4. Using the FEEDBACK COLLECTION, we train PROMETHEUS, a 13B evaluation-specific LLM that can assess any given response based on novel and unseen score rubrics and reference materials provided by the user. Our dataset\u2019s versatility and diversity make our model generalize to challenging real-world criteria, such as prioritizing conciseness, child-readability, or varying levels of formality. We show that PROMETHEUS shows a stronger correlation with GPT-4 evaluation compared to ChatGPT on seven evaluation benchmarks (Two Feedback Collection testsets, MT Bench, Vicuna Bench, Flask Eval, MT Bench Human Judgment, and HHH Alignment), showing the efficacy of our model and dataset design. During human evaluation with hand-crafted score rubrics, PROMETHEUS shows a Pearson correlation of 0.897 with human evaluators, which is on par with GPT-4-0613 (0.882), and greatly outperforms ChatGPT (0.392). Remarkably, when assessing the quality of the generated feedback, PROMETHEUS demonstrates a win rate of 58.62% when compared to GPT-4 evaluation and a win rate of 79.57% when compared to ChatGPT evaluation. Our findings suggests that by adding reference materials and training on GPT-4 feedback, we can obtain effective open-source evaluator LMs.", "keywords": "automatic evaluation;large language models;llm-as-a-judge", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/fba32c838c8e8307f5560e1966c2a1c1b9cd1c4e.zip", "author": "Seungone Kim;Jamin Shin;Yejin Cho;Joel Jang;Shayne Longpre;Hwaran Lee;Sangdoo Yun;Seongjin Shin;Sungdong Kim;James Thorne;Minjoon Seo", "authorids": "~Seungone_Kim1;~Jamin_Shin1;~Yejin_Cho2;~Joel_Jang1;~Shayne_Longpre1;~Hwaran_Lee1;~Sangdoo_Yun1;~Seongjin_Shin1;~Sungdong_Kim1;~James_Thorne1;~Minjoon_Seo1", "gender": "M;M;F;M;M;F;M;M;;;M", "homepage": "https://github.com/SeungoneKim;https://jayshin.xyz;https://github.com/bodhitrii?tab=repositories;https://joeljang.github.io/;https://www.shaynelongpre.com;https://hwaranlee.github.io;https://sangdooyun.github.io/;;;https://jamesthorne.com;https://seominjoon.github.io", "dblp": "324/2064.html;225/5387;;;190/7024;127/9475;124/3009.html;277/5169;118/1568;204/1380;149/1367", "google_scholar": "https://scholar.google.co.kr/citations?user=qEf3e3EAAAAJ;GuBHIwsAAAAJ;;xL-7eFEAAAAJ;ADd_YfkAAAAJ;https://scholar.google.co.kr/citations?user=Jf6padoAAAAJ;o0qtjzYAAAAJ;;xKrSnDoAAAAJ;hao9RrgAAAAJ;zYze5fIAAAAJ", "orcid": ";;;;;0000-0002-3773-4871;;;;;", "linkedin": "seungone-kim-09b551264/;jayshin94/;;joel-jang-1289331a5/;shayne-redford-longpre/;hwaranlee/;;businesssavior/;;;minjoon-seo/", "or_profile": "~Seungone_Kim1;~Jamin_Shin1;~Yejin_Cho2;~Joel_Jang1;~Shayne_Longpre1;~Hwaran_Lee1;~Sangdoo_Yun1;~Seongjin_Shin1;~Sungdong_Kim1;~James_Thorne1;~Minjoon_Seo1", "aff": "KAIST;NAVER;Korea Advanced Institute of Science & Technology;Department of Computer Science, University of Washington;Massachusetts Institute of Technology;NAVER AI Lab;NAVER;NAVER;NAVER;KAIST;Twelve Labs", "aff_domain": "ee.kaist.ac.kr;navercorp.com;kaist.edu;cs.washington.edu;mit.edu;navercorp.com;navercorp.com;navercorp.com;navercorp.com;kaist.ac.kr;twelvelabs.io", "position": "MS student;Research Scientist;MS student;PhD student;PhD student;Lead;Research Scientist;Researcher;Researcher;Assistant Professor;Chief Scientist", "bibtex": "@inproceedings{\nkim2024prometheus,\ntitle={Prometheus: Inducing Fine-Grained Evaluation Capability in Language Models},\nauthor={Seungone Kim and Jamin Shin and Yejin Cho and Joel Jang and Shayne Longpre and Hwaran Lee and Sangdoo Yun and Seongjin Shin and Sungdong Kim and James Thorne and Minjoon Seo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8euJaTveKw}\n}", "github": "", "project": "", "reviewers": "B7Vr;WpqT;LTFo;6KgK", "pdf_size": 11257046, "rating": "1;5;6;6", "confidence": "4;3;4;4", "soundness": "1;2;4;2", "contribution": "1;3;3;3", "presentation": "1;3;4;3", "wc_summary": "103;126;99;93", "wc_strengths": "5;91;92;33", "wc_weaknesses": "41;221;95;161", "wc_questions": "8;2;2;21", "wc_review": "157;440;288;308", "wc_reply_reviewers": "0;0;15;0", "wc_reply_authors": "1045;1249;1404;0", "reply_reviewers": "0;0;1;0", "reply_authors": "5;4;4;0", "rating_avg": [ 4.5, 2.0615528128088303 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 1.0897247358851685 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 105.25, 12.497499749949988 ], "wc_strengths_avg": [ 55.25, 37.5790832778023 ], "wc_weaknesses_avg": [ 129.5, 67.79933627993714 ], "wc_questions_avg": [ 8.25, 7.75806032459145 ], "wc_review_avg": [ 298.25, 100.30547093753162 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 924.5, 548.7351364729618 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.920286436967152 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.14002800840280097, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4752804398290639678&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=8euJaTveKw", "pdf": "https://openreview.net/pdf?id=8euJaTveKw", "email": "ee.kaist.ac.kr;navercorp.com;kaist.edu;cs.washington.edu;mit.edu;navercorp.com;navercorp.com;navercorp.com;navercorp.com;kaist.ac.kr;twelvelabs.io", "author_num": 11, "aff_unique_index": "0;1;0;2;3;1;1;1;1;0;4", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NAVER Corporation;University of Washington;Massachusetts Institute of Technology;Twelve Labs", "aff_unique_dep": ";;Department of Computer Science;;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.naver.com;https://www.washington.edu;https://web.mit.edu;https://twelvelabs.com", "aff_unique_abbr": "KAIST;NAVER;UW;MIT;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;1;1;0;0;0;0;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "8fJEOri51F", "title": "SSCBench: Monocular 3D Semantic Scene Completion Benchmark in Street Views", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Monocular scene understanding is a foundational component of autonomous systems. Within the spectrum of monocular perception topics, one crucial and useful task for holistic 3D scene understanding is semantic scene completion (SSC), which jointly completes semantic information and geometric details from RGB input. However, progress in SSC, particularly in large-scale street views, is hindered by the scarcity of high-quality datasets. To address this issue, we introduce SSCBench, a comprehensive benchmark that integrates scenes from widely used automotive datasets (e.g., KITTI-360, nuScenes, and Waymo). SSCBench follows an established setup and format in the community, facilitating the easy exploration of SSC methods in various street views. We benchmark models using monocular, trinocular, and point cloud input to assess the performance gap resulting from sensor coverage and modality. Moreover, we have unified semantic labels across diverse datasets to simplify cross-domain generalization testing. We commit to including more datasets and SSC models to drive further advancements in this field.", "keywords": "Semantic Scene Completion;Autonomous Driving", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Yiming Li;Sihang Li;Xinhao Liu;Moonjun Gong;Kenan Li;Nuo Chen;Zijun Wang;Zhiheng Li;Tao Jiang;Fisher Yu;Yue Wang;Hang Zhao;Zhiding Yu;Chen Feng", "authorids": "~Yiming_Li2;~Sihang_Li4;~Xinhao_Liu1;~Moonjun_Gong1;~Kenan_Li1;~Nuo_Chen6;~Zijun_Wang2;~Zhiheng_Li7;~Tao_Jiang11;~Fisher_Yu2;~Yue_Wang2;~Hang_Zhao1;~Zhiding_Yu1;~Chen_Feng2", "gender": "M;M;;;M;M;M;M;;M;M;M;;M", "homepage": "https://yimingli-page.github.io/;https://louis-leee.github.io/;https://gaaaavin.github.io/;;https://connorkevin.github.io/;https://nuochen1203.github.io/;;;;https://www.yf.io/;https://yuewang.xyz;http://www.mit.edu/~hangzhao/;;https://ai4ce.github.io/", "dblp": "l/YimingLi-3;;126/4582-3;;;135/5622-3;;;;117/6314;33/4822-41;;;01/161-2", "google_scholar": "https://scholar.google.com/citations?hl=en;90IoeJsAAAAJ;6pI4Xa4AAAAJ;;;https://scholar.google.com/citations?hl=en;;;;-XCiamcAAAAJ;v-AEFIEAAAAJ;DmahiOYAAAAJ;;YeG8ZM0AAAAJ", "orcid": "0000-0002-0157-6218;0000-0003-3053-5770;0000-0002-3036-0344;;;0009-0007-0581-0327;;;;;;;;0000-0003-3211-1576", "linkedin": "yiming-li-58b519173/;sihang-li-07724b267/;;;;;zijun-wang-083025193/;zhiheng-li-zl3466nyu/;;;;;;simbaforrest/", "or_profile": "~Yiming_Li2;~Sihang_Li4;~Xinhao_Liu1;~Moonjun_Gong1;~Kenan_Li1;~Nuo_Chen6;~Zijun_Wang2;~Zhiheng_Li7;~Tao_Jiang11;~Fisher_Yu2;~Yue_Wang2;~Hang_Zhao1;~Zhiding_Yu1;~Chen_Feng2", "aff": "New York University;New York University;New York University;;;New York University;Duke University;New York University;;Swiss Federal Institute of Technology;NVIDIA;Tsinghua University;;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;;;nyu.edu;duke.edu;nyu.edu;;ethz.ch;nvidia.com;tsinghua.edu.cn;;nyu.edu", "position": "PhD student;PhD student;PhD student;;;MS student;MS student;Undergrad student;;Assistant Professor;Researcher;Assistant Professor;;Assistant Professor", "bibtex": "@misc{\nli2024sscbench,\ntitle={{SSCB}ench: Monocular 3D Semantic Scene Completion Benchmark in Street Views},\nauthor={Yiming Li and Sihang Li and Xinhao Liu and Moonjun Gong and Kenan Li and Nuo Chen and Zijun Wang and Zhiheng Li and Tao Jiang and Fisher Yu and Yue Wang and Hang Zhao and Zhiding Yu and Chen Feng},\nyear={2024},\nurl={https://openreview.net/forum?id=8fJEOri51F}\n}", "github": "", "project": "", "reviewers": "JQ2D;MTom;drPb;5Bcg", "site": "https://openreview.net/forum?id=8fJEOri51F", "pdf_size": 11668709, "rating": "5;5;5;8", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "3;3;2;4", "wc_summary": "45;59;33;94", "wc_strengths": "44;65;49;79", "wc_weaknesses": "227;300;230;21", "wc_questions": "63;81;10;84", "wc_review": "379;505;322;278", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 57.75, 22.862359895688808 ], "wc_strengths_avg": [ 59.25, 13.790848414800301 ], "wc_weaknesses_avg": [ 194.5, 104.34198579670601 ], "wc_questions_avg": [ 59.5, 29.685855217594792 ], "wc_review_avg": [ 371.0, 85.24963343029692 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10903902950978373793&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;1;0;2;3;4;0", "aff_unique_norm": "New York University;Duke University;Swiss Federal Institute of Technology;NVIDIA;Tsinghua University", "aff_unique_dep": ";;;NVIDIA Corporation;", "aff_unique_url": "https://www.nyu.edu;https://www.duke.edu;https://www.ethz.ch;https://www.nvidia.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NYU;Duke;ETH Zurich;NVIDIA;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0;2;0", "aff_country_unique": "United States;Switzerland;China" }, { "id": "8fQlGQkj0S", "title": "A Theoretical Analysis of In-context Task Retrieval and Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "In-context learning (ICL) can be used for two different purposes: task retrieval and task learning.\nTask retrieval focuses on recalling a pre-trained task using examples from the task that closely approximates the target pre-trained task, while task learning involves learning a task using in-context examples.\nTo rigorously analyze these two modes, we propose generative models for both pretraining data and in-context samples.\nAssuming we use our proposed models and consider the mean squared error as a risk measure, we demonstrate that in-context prediction using a Bayes-optimal next-token predictor equates to the posterior mean of the label, conditioned on in-context samples.\nFrom this equivalence, we derive risk upper bounds for in-context learning.\nWe reveal a unique phenomenon in task retrieval: as the number of in-context samples increases, the risk upper bound decreases initially and then increases subsequently.\nThis implies that more in-context examples could potentially worsen task retrieval.\nWe validate our analysis with numerical computations in various scenarios and validate that our findings are replicable in the actual Transformer model implementation.", "keywords": "In-context Learning;Task Learning;Task Retrieval;Bayesian Inference;Noisy Linear Regression", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/2cc5080d6e6e6417844b1b5182590f4a19145080.pdf", "author": "Ziqian Lin;Kangwook Lee", "authorids": "~Ziqian_Lin1;~Kangwook_Lee1", "gender": "M;M", "homepage": "https://myhakureimu.github.io/;http://kangwooklee.com/", "dblp": "245/3453;88/9826-1", "google_scholar": "0nOdbCoAAAAJ;sCEl8r-n5VEC", "orcid": ";", "linkedin": ";", "or_profile": "~Ziqian_Lin1;~Kangwook_Lee1", "aff": "University of Wisconsin - Madison;KRAFTON", "aff_domain": "wisc.edu;krafton.com", "position": "PhD student;Researcher", "bibtex": "@misc{\nlin2024a,\ntitle={A Theoretical Analysis of In-context Task Retrieval and Learning},\nauthor={Ziqian Lin and Kangwook Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=8fQlGQkj0S}\n}", "github": "", "project": "", "reviewers": "v8Fu;KzaL;ZoJM;4TUM;8Pgy", "site": "https://openreview.net/forum?id=8fQlGQkj0S", "pdf_size": 1271038, "rating": "3;5;6;6;6", "confidence": "3;4;3;2;3", "soundness": "2;3;3;3;4", "contribution": "2;2;3;3;3", "presentation": "2;3;3;3;2", "wc_summary": "79;69;81;105;124", "wc_strengths": "34;31;48;71;94", "wc_weaknesses": "221;147;61;60;142", "wc_questions": "14;6;1;145;4", "wc_review": "348;253;191;381;364", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "759;686;135;566;382", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 91.6, 20.05592181875468 ], "wc_strengths_avg": [ 55.6, 23.83778513201258 ], "wc_weaknesses_avg": [ 126.2, 60.50256192922742 ], "wc_questions_avg": [ 34.0, 55.666866267107224 ], "wc_review_avg": [ 307.4, 73.1808718177093 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 505.6, 224.95564007154832 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2711630722733202, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VUKWRZU-hyYJ:scholar.google.com/&scioq=A+Theoretical+Analysis+of+In-context+Task+Retrieval+and+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Wisconsin-Madison;KRAFTON Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.krafton.com", "aff_unique_abbr": "UW-Madison;KRAFTON", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;South Korea" }, { "title": "Amortized Network Intervention to Steer the Excitatory Point Processes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19320", "id": "8g26Yv1EOu", "author_site": "Zitao Song, Wendi Ren, Shuang Li", "tldr": "", "abstract": "Excitatory point processes (i.e., event flows) occurring over dynamic graphs (i.e., evolving topologies) provide a fine-grained model to capture how discrete events may spread over time and space. How to effectively steer the event flows by modifying the dynamic graph structures presents an interesting problem, motivated by curbing the spread of infectious diseases through strategically locking down cities to mitigating traffic congestion via traffic light optimization. To address the intricacies of planning and overcome the high dimensionality inherent to such decision-making problems, we design an Amortized Network Interventions (ANI) framework, allowing for the pooling of optimal policies from history and other contexts while ensuring a permutation equivalent property. This property enables efficient knowledge transfer and sharing across diverse contexts. Each task is solved by an H-step lookahead model-based reinforcement learning, where neural ODEs are introduced to model the dynamics of the excitatory point processes. Instead of simulating rollouts from the dynamics model, we derive an analytical mean-field approximation for the event flows given the dynamics, making the online planning more efficiently solvable. We empirically illustrate that this ANI approach substantially enhances policy learning for unseen dynamics and exhibits promising outcomes in steering event flows through network intervention using synthetic and real COVID datasets.", "keywords": "Time series application;Point Process;Amortized Learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/3169788ddf13a20386755c976fece7fa1bc6f6ac.pdf", "author": "Zitao Song;Wendi Ren;Shuang Li", "authorids": "~Zitao_Song1;~Wendi_Ren1;~Shuang_Li3", "gender": "M;F;F", "homepage": "https://tsedao.github.io/;;https://shuangli01.github.io", "dblp": ";218/8905;43/6294-2", "google_scholar": "RATrbJUAAAAJ;V0vQt1YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-4646-0339;;", "linkedin": ";wendiren-gatech/;", "or_profile": "~Zitao_Song1;~Wendi_Ren1;~Shuang_Li3", "aff": "Nanyang Technological University;The Chinese University of Hong Kong;The Chinese University of Hong Kong (Shenzhen)", "aff_domain": "ntu.edu.sg;cuhk.edu.cn;cuhk.edu.cn", "position": "Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsong2024amortized,\ntitle={Amortized Network Intervention to Steer the Excitatory Point Processes},\nauthor={Zitao Song and Wendi Ren and Shuang Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8g26Yv1EOu}\n}", "github": "", "project": "", "reviewers": "ya7t;9miN;hvRi;YqcV", "pdf_size": 4978040, "rating": "5;5;6;6", "confidence": "3;2;3;4", "soundness": "2;3;3;3", "contribution": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "51;132;116;66", "wc_strengths": "83;68;33;14", "wc_weaknesses": "38;157;32;27", "wc_questions": "70;40;12;357", "wc_review": "242;397;193;464", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "896;568;561;690", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.25, 33.65542303997975 ], "wc_strengths_avg": [ 49.5, 27.37243138634199 ], "wc_weaknesses_avg": [ 63.5, 54.12254613375095 ], "wc_questions_avg": [ 119.75, 138.50338443518265 ], "wc_review_avg": [ 324.0, 110.46945279125809 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 678.75, 135.51268390818626 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zZ3BY3ZTVrYJ:scholar.google.com/&scioq=Amortized+Network+Intervention+to+Steer+the+Excitatory+Point+Processes&hl=en&as_sdt=0,48", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=8g26Yv1EOu", "pdf": "https://openreview.net/pdf?id=8g26Yv1EOu", "email": "ntu.edu.sg;cuhk.edu.cn;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Nanyang Technological University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.cuhk.edu.hk", "aff_unique_abbr": "NTU;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Shenzhen", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Singapore;China" }, { "id": "8gZtt8nrpI", "title": "Diffusion Models With Learned Adaptive Noise Processes", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion models have gained traction as powerful algorithms for synthesizing high-quality images. Central to these algorithms is the diffusion process, which maps data to noise according to equations inspired by thermodynamics, and which can significantly impact performance. In this work, we explore whether a diffusion process can be learned from data. We propose multivariate learned adaptive noise (MULAN), a learned diffusion process that applies Gaussian noise at different rates across an image. Our method consists of three components\u2014a multivariate noise schedule, instance-conditional diffusion, and auxiliary variables\u2014which ensure that the learning objective is no longer invariant to the choice of noise schedule as in previous works. Our work is grounded in Bayesian inference and casts the learned diffusion process as an approximate variational posterior that yields a tighter lower bound on marginal likelihood. Empirically, MULAN significantly improves likelihood estimation on CIFAR10 and ImageNet, and achieves ~2x faster convergence to state-of-the-art performance compared to classical diffusion.", "keywords": "Generative Modeling;Diffusion Models;likelihood;Noising Schedule", "primary_area": "generative models", "supplementary_material": "", "author": "Subham Sekhar Sahoo;Aaron Gokaslan;Christopher De Sa;Volodymyr Kuleshov", "authorids": "~Subham_Sekhar_Sahoo1;~Aaron_Gokaslan1;~Christopher_De_Sa2;~Volodymyr_Kuleshov1", "gender": "M;M;;M", "homepage": ";https://skylion007.github.io/;https://www.cs.cornell.edu/~kuleshov/;http://cs.cornell.edu/~cdesa", "dblp": ";220/6816;81/8612;154/6336", "google_scholar": "Z7DoDbAAAAAJ;Mt2wyL4AAAAJ;RY_t8XAAAAAJ;", "orcid": ";0000-0002-3575-2961;;", "linkedin": "shakeh3r/;aarongokaslan/;;", "or_profile": "~Subham_Sekhar_Sahoo1;~Aaron_Gokaslan1;~Volodymyr_Kuleshov1;~Christopher_De_Sa1", "aff": "Department of Computer Science, Cornell University;Cornell University;Cornell University;Cornell University", "aff_domain": "cs.cornell.edu;cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nsahoo2024diffusion,\ntitle={Diffusion Models With Learned Adaptive Noise Processes},\nauthor={Subham Sekhar Sahoo and Aaron Gokaslan and Christopher De Sa and Volodymyr Kuleshov},\nyear={2024},\nurl={https://openreview.net/forum?id=8gZtt8nrpI}\n}", "github": "", "project": "", "reviewers": "U1Ra;oSn9;aHz6;PeVN", "site": "https://openreview.net/forum?id=8gZtt8nrpI", "pdf_size": 4573487, "rating": "5;5;6;6", "confidence": "3;4;3;4", "soundness": "2;2;3;3", "contribution": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "35;75;38;114", "wc_strengths": "124;25;21;149", "wc_weaknesses": "310;111;97;326", "wc_questions": "138;53;48;13", "wc_review": "607;264;204;602", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "976;558;690;638", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 32.12864765283469 ], "wc_strengths_avg": [ 79.75, 57.451610073173754 ], "wc_weaknesses_avg": [ 211.0, 107.26369376447933 ], "wc_questions_avg": [ 63.0, 45.96194077712559 ], "wc_review_avg": [ 419.25, 186.4689987638696 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 715.5, 157.577758582866 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hTKfdIOIfGAJ:scholar.google.com/&scioq=Diffusion+Models+With+Learned+Adaptive+Noise+Processes&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "8giiPtg6rw", "title": "DataFreeShield: Defending Adversarial Attacks without Training Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advances in adversarial robustness rely on an abundant set of training data, where using external or additional datasets has become a common setting.\nHowever, due to security and privacy issues, it is more common that a pretrained model is available while the dataset is not.\nIn such a scenario, existing methods that assume accessibility to the original data become inapplicable.\nFor the first time, we propose a problem of learning *data-free adversarial robustness*, where given only a pretrained model, adversarial robustness should be achieved without accessing the training dataset.\nIn our preliminary study, we identify that robustness without the original dataset is difficult to achieve, even with similar domain datasets.\nWe tackle the task from two perspectives: surrogate dataset generation and adversarial training using the generated data.\nFor dataset generation, we propose diversified sample synthesis, which largely enhances the diversity of synthetic samples that are known to have low coverage. \nFor training, we propose a soft label loss that best learns robustness from noisy synthetic samples and a gradient refinement method toward smoother loss surface. \nExtensively validating methods using four datasets, we show that the proposed solution outperforms several baselines, demonstrating that the proposed method sets the first solution for the data-free robustness problem.", "keywords": "Data-free;Adversarial Robustness;Adversarial Training", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/0fc4fd221918f07afe3bc4bc764d6743e67677e1.zip", "author": "Hyeyoon Lee;Kanghyun Choi;Dain Kwon;SunJong Park;Mayoore Selvarasa Jaiswal;Noseong Park;Jonghyun Choi;Jinho Lee", "authorids": "~Hyeyoon_Lee1;~Kanghyun_Choi1;~Dain_Kwon1;~SunJong_Park1;~Mayoore_Selvarasa_Jaiswal1;~Noseong_Park1;~Jonghyun_Choi1;~Jinho_Lee2", "gender": ";M;F;;F;;M;M", "homepage": "https://aisys.snu.ac.kr/members/HyeyoonLee.html;https://aisys.snu.ac.kr/kanghyun.html;https://github.com/meowrowan;;mayoore.github.io;;https://ppolon.github.io/;http://acsys.snu.ac.kr/people.html", "dblp": "276/0074;229/7353;380/6008;;http://dblp.uni-trier.de/pers/hd/j/Jaiswal:Mayoore_S=;;21/11103;", "google_scholar": "lYXg5nsAAAAJ;n9e6qnsAAAAJ;;;IcMxiP4AAAAJ;;uiGWnm4AAAAJ;https://scholar.google.com/citations?hl=ko", "orcid": ";;;;;;0000-0002-7934-8434;", "linkedin": ";;;;;;jonghyun-choi-459bb615/;", "or_profile": "~Hyeyoon_Lee1;~Kanghyun_Choi1;~Dain_Kwon1;~SunJong_Park1;~Mayoore_Selvarasa_Jaiswal1;~Noseong_Park1;~Jonghyun_Choi1;~Jinho_Lee2", "aff": "Seoul National University;Seoul National University;Yonsei University;;University of Washington;;Yonsei University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;yonsei.ac.kr;; ;;yonsei.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Undergrad student;;Graduate Student;;Associate Professor;Associate Professor", "bibtex": "@misc{\nlee2024datafreeshield,\ntitle={DataFreeShield: Defending Adversarial Attacks without Training Data},\nauthor={Hyeyoon Lee and Kanghyun Choi and Dain Kwon and SunJong Park and Mayoore Selvarasa Jaiswal and Noseong Park and Jonghyun Choi and Jinho Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=8giiPtg6rw}\n}", "github": "", "project": "", "reviewers": "F6PX;nfcV;RMmq", "site": "https://openreview.net/forum?id=8giiPtg6rw", "pdf_size": 15521882, "rating": "3;5;5", "confidence": "4;4;4", "soundness": "2;4;2", "contribution": "2;3;2", "presentation": "3;3;2", "wc_summary": "101;48;36", "wc_strengths": "115;42;29", "wc_weaknesses": "712;134;152", "wc_questions": "191;2;2", "wc_review": "1119;226;219", "wc_reply_reviewers": "0;0;43", "wc_reply_authors": "1888;710;1258", "reply_reviewers": "0;0;1", "reply_authors": "4;2;3", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 61.666666666666664, 28.241026106633512 ], "wc_strengths_avg": [ 62.0, 37.85058343892029 ], "wc_weaknesses_avg": [ 332.6666666666667, 268.3298136415126 ], "wc_questions_avg": [ 65.0, 89.09545442950498 ], "wc_review_avg": [ 521.3333333333334, 422.62381486244817 ], "wc_reply_reviewers_avg": [ 14.333333333333334, 20.27039439401436 ], "wc_reply_authors_avg": [ 1285.3333333333333, 481.30470829010414 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16114210221793948933&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1;2;1;0", "aff_unique_norm": "Seoul National University;Yonsei University;University of Washington", "aff_unique_dep": ";;", "aff_unique_url": "https://www.snu.ac.kr;https://www.yonsei.ac.kr;https://www.washington.edu", "aff_unique_abbr": "SNU;Yonsei;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "id": "8hc2UvwTaL", "title": "FLAIM: AIM-based Synthetic Data Generation in the Federated Setting", "track": "main", "status": "Reject", "tldr": "", "abstract": "Preserving individual privacy while enabling collaborative data sharing is crucial for organizations. Synthetic data generation is one solution, producing artificial data that mirrors the statistical properties of private data. While numerous techniques have been devised under differential privacy, they predominantly assume data is centralized. However, data is often distributed across multiple clients in a federated manner. In this work, we initiate the study of federated synthetic tabular data generation. Building upon a SOTA central method known as AIM, we present DistAIM and FLAIM. We show it is straightforward to distribute AIM, extending a recent approach based on secure multi-party computation which necessitates additional overhead, making it less suited to federated scenarios. We then demonstrate that naively federating AIM can lead to substantial degradation in utility under the presence of heterogeneity. To mitigate both issues, we propose an augmented FLAIM approach that maintains a private proxy of heterogeneity. We simulate our methods across a range of benchmark datasets under different degrees of heterogeneity and show this can improve utility while reducing overhead.", "keywords": "Synthetic Data;Differential Privacy;Federated Learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Samuel Maddock;Graham Cormode;carsten maple", "authorids": "~Samuel_Maddock1;~Graham_Cormode1;~carsten_maple1", "gender": "M;M;", "homepage": "https://warwick.ac.uk/fac/sci/dcs/people/u1714078/;http://dimacs.rutgers.edu/~graham/;https://warwick.ac.uk/fac/sci/wmg/people/profile/?wmgid=1102", "dblp": "289/1670;c/GrahamCormode;05/2263.html", "google_scholar": "ohQy__cAAAAJ;https://scholar.google.co.uk/citations?user=gpLVKmEAAAAJ;8MMdv50AAAAJ", "orcid": ";0000-0002-0698-0922;0000-0002-4715-212X", "linkedin": "samuel-maddock/;;", "or_profile": "~Samuel_Maddock1;~Graham_Cormode1;~carsten_maple1", "aff": "University of Warwick;The university of Warwick;The university of Warwick", "aff_domain": "warwick.ac.uk;warwick.ac.uk;warwick.ac.uk", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nmaddock2024flaim,\ntitle={{FLAIM}: {AIM}-based Synthetic Data Generation in the Federated Setting},\nauthor={Samuel Maddock and Graham Cormode and carsten maple},\nyear={2024},\nurl={https://openreview.net/forum?id=8hc2UvwTaL}\n}", "github": "", "project": "", "reviewers": "DXga;B23T;gW6i", "site": "https://openreview.net/forum?id=8hc2UvwTaL", "pdf_size": 7555006, "rating": "3;3;8", "confidence": "3;5;3", "soundness": "2;3;3", "contribution": "2;1;2", "presentation": "2;2;3", "wc_summary": "76;129;150", "wc_strengths": "55;60;19", "wc_weaknesses": "156;64;23", "wc_questions": "149;28;19", "wc_review": "436;281;211", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1415;760;245", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 118.33333333333333, 31.13768706175132 ], "wc_strengths_avg": [ 44.666666666666664, 18.263503375736967 ], "wc_weaknesses_avg": [ 81.0, 55.61174935808679 ], "wc_questions_avg": [ 65.33333333333333, 59.275252471461954 ], "wc_review_avg": [ 309.3333333333333, 94.0153651744697 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 806.6666666666666, 478.78898158676213 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2418353014466619100&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Warwick", "aff_unique_dep": "", "aff_unique_url": "https://www.warwick.ac.uk", "aff_unique_abbr": "Warwick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Neural Auto-designer for Enhanced Quantum Kernels", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19319", "id": "8htNAnMSyP", "author_site": "Cong Lei, Yuxuan Du, Peng Mi, Jun Yu, Tongliang Liu", "tldr": "", "abstract": "Quantum kernels hold great promise for offering computational advantages over classical learners, with the effectiveness of these kernels closely tied to the design of the feature map. However, the challenge of designing effective quantum feature maps for real-world datasets, particularly in the absence of sufficient prior information, remains a significant obstacle. In this study, we present a data-driven approach that automates the design of problem-specific quantum feature maps. Our approach leverages feature-selection techniques to handle high-dimensional data on near-term quantum machines with limited qubits, and incorporates a deep neural predictor to efficiently evaluate the performance of various candidate quantum kernels. Through extensive numerical simulations on different datasets, we demonstrate the superiority of our proposal over prior methods, especially for the capability of eliminating the kernel concentration issue and identifying the feature map with prediction advantages. Our work not only unlocks the potential of quantum kernels for enhancing real-world tasks, but also highlights the substantial role of deep learning in advancing quantum machine learning.", "keywords": "Quantum machine learning;kernel learning;quantum kernels;feature map;quantum circuit design", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Cong Lei;Yuxuan Du;Peng Mi;Jun Yu;Tongliang Liu", "authorids": "~Cong_Lei2;~Yuxuan_Du2;~Peng_Mi1;~Jun_Yu3;~Tongliang_Liu1", "gender": ";M;M;M;M", "homepage": "https://cong-lei.github.io/;https://github.com/yuxuan-du/Yuxuan-Du.github.io;https://www.github.com/Mi-Peng;https://faculty.ustc.edu.cn/yujun_AI/en/index.htm;https://tongliang-liu.github.io/", "dblp": "205/7609;;;50/5754-1.html;150/6667", "google_scholar": ";https://scholar.google.com.au/citations?user=50sFkzIAAAAJ;PTM4HCsAAAAJ;efZyqyQAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": "0000-0003-2522-1152;0000-0002-1193-9756;;0000-0002-3197-8103;", "linkedin": ";;;;", "or_profile": "~Cong_Lei2;~Yuxuan_Du2;~Peng_Mi1;~Jun_Yu3;~Tongliang_Liu1", "aff": "University of Sydney;JD.com;;University of Science and Technology of China;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "usyd.edu.au;jd.com;;ustc.edu.cn;mbzuai.ac.ae", "position": "PhD student;Researcher;;Associate Professor;Affiliated Associate Professor", "bibtex": "@inproceedings{\nlei2024neural,\ntitle={Neural Auto-designer for Enhanced Quantum Kernels},\nauthor={Cong Lei and Yuxuan Du and Peng Mi and Jun Yu and Tongliang Liu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8htNAnMSyP}\n}", "github": "", "project": "", "reviewers": "3hUP;oLos;FSEH;e6Tr", "pdf_size": 4287919, "rating": "3;6;6;6", "confidence": "5;5;4;4", "soundness": "2;2;3;2", "contribution": "1;2;3;2", "presentation": "3;3;3;3", "wc_summary": "85;46;100;51", "wc_strengths": "8;49;103;45", "wc_weaknesses": "242;139;384;163", "wc_questions": "5;53;4;35", "wc_review": "340;287;591;294", "wc_reply_reviewers": "770;53;470;11", "wc_reply_authors": "1773;1890;3165;2050", "reply_reviewers": "2;1;2;1", "reply_authors": "6;7;7;5", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.5, 22.699118925632334 ], "wc_strengths_avg": [ 51.25, 33.88491552298751 ], "wc_weaknesses_avg": [ 232.0, 95.67392539244953 ], "wc_questions_avg": [ 24.25, 20.753011829611623 ], "wc_review_avg": [ 378.0, 124.6495086231791 ], "wc_reply_reviewers_avg": [ 326.0, 312.9001438158826 ], "wc_reply_authors_avg": [ 2219.5, 554.6694961866931 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 6.25, 0.82915619758885 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2971491082992789812&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=8htNAnMSyP", "pdf": "https://openreview.net/pdf?id=8htNAnMSyP", "email": "usyd.edu.au;jd.com;;ustc.edu.cn;mbzuai.ac.ae", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Sydney;JD.com;University of Science and Technology of China;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sydney.edu.au;https://www.jd.com;http://www.ustc.edu.cn;https://mbzuai.ac.ae", "aff_unique_abbr": "USYD;JD;USTC;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "Australia;China;United Arab Emirates" }, { "title": "Poisoned Forgery Face: Towards Backdoor Attacks on Face Forgery Detection", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19318", "id": "8iTpB4RNvP", "author_site": "Jiawei Liang, Siyuan Liang, Aishan Liu, Xiaojun Jia, Junhao Kuang, Xiaochun Cao", "tldr": "", "abstract": "The proliferation of face forgery techniques has raised significant concerns within society, thereby motivating the development of face forgery detection methods. These methods aim to distinguish forged faces from genuine ones and have proven effective in practical applications. However, this paper introduces a novel and previously unrecognized threat in face forgery detection scenarios caused by backdoor attack. By embedding backdoors into models and incorporating specific trigger patterns into the input, attackers can deceive detectors into producing erroneous predictions for forged faces. To achieve this goal, this paper proposes \\emph{Poisoned Forgery Face} framework, which enables clean-label backdoor attacks on face forgery detectors. Our approach involves constructing a scalable trigger generator and utilizing a novel convolving process to generate translation-sensitive trigger patterns. Moreover, we employ a relative embedding method based on landmark-based regions to enhance the stealthiness of the poisoned samples. Consequently, detectors trained on our poisoned samples are embedded with backdoors. Notably, our approach surpasses SoTA backdoor baselines with a significant improvement in attack success rate (+16.39\\% BD-AUC) and reduction in visibility (-12.65\\% $L_\\infty$). Furthermore, our attack exhibits promising performance against backdoor defenses. We anticipate that this paper will draw greater attention to the potential threats posed by backdoor attacks in face forgery detection scenarios. Our codes will be made available at \\url{https://github.com/JWLiang007/PFF}.", "keywords": "Deepfake Detection;Backdoor Attack", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/c9892c29e38f6dc89dbeac92fcf9e72a9c2f5640.zip", "author": "Jiawei Liang;Siyuan Liang;Aishan Liu;Xiaojun Jia;Junhao Kuang;Xiaochun Cao", "authorids": "~Jiawei_Liang1;~Siyuan_Liang1;~Aishan_Liu1;~Xiaojun_Jia1;~Junhao_Kuang1;~Xiaochun_Cao3", "gender": "M;F;M;M;;M", "homepage": "https://www.researchgate.net/profile/Jiawei-Liang-11;https://www.github.com/;https://liuaishan.github.io/;https://jiaxiaojunqaq.github.io/;https://github.com/knight4u13;https://scst.sysu.edu.cn/members/caoxiaochun.htm", "dblp": ";205/8767.html;177/5658;;;39/3695", "google_scholar": "https://scholar.google.com.hk/citations?user=w60LFNwAAAAJ;Hon4nf0AAAAJ;88tzr_sAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-1143-6873;0000-0002-6154-0233;;0000-0002-2018-9344;;0000-0001-7141-708X", "linkedin": ";;;;;", "or_profile": "~Jiawei_Liang1;~Siyuan_Liang1;~Aishan_Liu1;~Xiaojun_Jia1;~Junhao_Kuang1;~Xiaochun_Cao3", "aff": "SUN YAT-SEN UNIVERSITY;National University of Singapore;Beihang University;Nanyang Technological University;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;nus.edu;buaa.edu.cn;ntu.edu.sg;sysu.edu.cn;sysu.edu.cn", "position": "MS student;Researcher;Assistant Professor;Postdoc;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nliang2024poisoned,\ntitle={Poisoned Forgery Face: Towards Backdoor Attacks on Face Forgery Detection},\nauthor={Jiawei Liang and Siyuan Liang and Aishan Liu and Xiaojun Jia and Junhao Kuang and Xiaochun Cao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8iTpB4RNvP}\n}", "github": "", "project": "", "reviewers": "GcXY;4qiC;uGDS", "pdf_size": 5589900, "rating": "6;8;8", "confidence": "4;5;4", "soundness": "3;4;3", "contribution": "3;4;3", "presentation": "3;4;3", "wc_summary": "82;72;100", "wc_strengths": "71;81;69", "wc_weaknesses": "83;144;216", "wc_questions": "230;4;9", "wc_review": "466;301;394", "wc_reply_reviewers": "97;22;37", "wc_reply_authors": "1246;998;1549", "reply_reviewers": "1;1;1", "reply_authors": "3;3;4", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 84.66666666666667, 11.585431464655178 ], "wc_strengths_avg": [ 73.66666666666667, 5.2493385826745405 ], "wc_weaknesses_avg": [ 147.66666666666666, 54.35888969514452 ], "wc_questions_avg": [ 81.0, 105.37868222115262 ], "wc_review_avg": [ 387.0, 67.54257916307313 ], "wc_reply_reviewers_avg": [ 52.0, 32.4037034920393 ], "wc_reply_authors_avg": [ 1264.3333333333333, 225.3180468187629 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12263160280159972267&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=8iTpB4RNvP", "pdf": "https://openreview.net/pdf?id=8iTpB4RNvP", "email": "sysu.edu.cn;nus.edu;buaa.edu.cn;ntu.edu.sg;sysu.edu.cn;sysu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Sun Yat-sen University;National University of Singapore;Beihang University;Nanyang Technological University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.nus.edu.sg;http://www.buaa.edu.cn/;https://www.ntu.edu.sg", "aff_unique_abbr": "SYSU;NUS;BUAA;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "id": "8iojQVLLWb", "title": "Bayesian Knowledge Distillation for Online Action Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Online action detection aims at identifying the ongoing action in a streaming video without seeing the future. Timely and accurate response is critical for real-world applications. In this paper, we introduce Bayesian knowledge distillation (BKD), an efficient and generalizable framework for online action detection. Specifically, we adopt a teacher-student architecture. During the training, the teacher model is built with a Bayesian neural network to output both the feature mutual information that measures the informativeness of historical features to ongoing action and the detection uncertainty. For efficient online detection, we also introduce a student model based on the evidential neural network that learns the feature mutual information and predictive uncertainties from the teacher model. In this way, the student model can not only select important features and make fast inference, but also efficiently quantify the prediction uncertainty by a single forward pass. We evaluated our proposed method on three benchmark datasets including THUMOS'14, TVSeries, and HDD. Our method achieves competitive performance with much better computational efficiency and much less model complexity. We also demonstrate that BKD generalizes better and is more data-efficient by extensive ablation studies. Finally, we validate the uncertainty quantification of the student model by performing abnormal action detection.", "keywords": "Online action detection;knowledge distillation;mutual information;uncertainty quantification", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/f6e366e0bf22ec1481b420f92501008a709307a6.pdf", "author": "Hongji Guo;Hanjing Wang;Qiang Ji", "authorids": "~Hongji_Guo1;~Hanjing_Wang2;~Qiang_Ji1", "gender": "M;M;M", "homepage": "https://sites.ecse.rpi.edu/~cvrl/Hongji/hongji.html;https://www.ecse.rpi.edu/~cvrl/people_zw.html;https://www.ecse.rpi.edu/~qji/", "dblp": "330/1899;234/8752;", "google_scholar": "I213n_cAAAAJ;;vAXmpVIAAAAJ", "orcid": ";;", "linkedin": "hongji-guo-357124192/;;", "or_profile": "~Hongji_Guo1;~Hanjing_Wang2;~Qiang_Ji1", "aff": "Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;rpi.edu;rpi.edu", "position": "PhD student;PhD student;Professor", "bibtex": "@misc{\nguo2024bayesian,\ntitle={Bayesian Knowledge Distillation for Online Action Detection},\nauthor={Hongji Guo and Hanjing Wang and Qiang Ji},\nyear={2024},\nurl={https://openreview.net/forum?id=8iojQVLLWb}\n}", "github": "", "project": "", "reviewers": "i6Lf;f1zi;kgXL;37DN", "site": "https://openreview.net/forum?id=8iojQVLLWb", "pdf_size": 4195624, "rating": "3;3;5;5", "confidence": "4;4;4;2", "soundness": "2;2;3;2", "contribution": "2;2;2;2", "presentation": "3;2;3;2", "wc_summary": "80;105;64;67", "wc_strengths": "29;90;26;80", "wc_weaknesses": "217;352;112;150", "wc_questions": "61;42;11;12", "wc_review": "387;589;213;309", "wc_reply_reviewers": "48;90;55;69", "wc_reply_authors": "572;611;212;371", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.0, 16.170961628796228 ], "wc_strengths_avg": [ 56.25, 28.98598799420161 ], "wc_weaknesses_avg": [ 207.75, 91.37388850213172 ], "wc_questions_avg": [ 31.5, 21.10094784600919 ], "wc_review_avg": [ 374.5, 138.32841356713377 ], "wc_reply_reviewers_avg": [ 65.5, 16.03901493234544 ], "wc_reply_authors_avg": [ 441.5, 160.7801293692725 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9528727725945138740&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Rensselaer Polytechnic Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.rpi.edu", "aff_unique_abbr": "RPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Finite Scalar Quantization: VQ-VAE Made Simple", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19317", "id": "8ishA3LxN8", "author_site": "Fabian Mentzer, David Minnen, Eirikur Agustsson, Michael Tschannen", "tldr": "", "abstract": "We propose to replace vector quantization (VQ) in the latent representation of VQ-VAEs\nwith a simple scheme termed finite scalar quantization (FSQ), where we project the VAE representation down to a few dimensions (typically less than 10).\nEach dimension is quantized to a small set of fixed values, leading to an (implicit) codebook given by the product of these sets.\nBy appropriately choosing the number of dimensions and values each dimension can take, we obtain the same codebook size as in VQ.\nOn top of such discrete representations,\nwe can train the same models that have been trained on VQ-VAE representations. For example, autoregressive and masked transformer models for image generation, multimodal generation, and dense prediction computer vision tasks.\nConcretely, we employ FSQ with MaskGIT for image generation, and with UViM for depth estimation, colorization, and panoptic segmentation.\nDespite the much simpler design of FSQ, we obtain competitive performance in all these tasks.\nWe emphasize that FSQ does not suffer from codebook collapse and does not need the complex machinery employed in VQ (commitment losses, codebook reseeding, code splitting, entropy penalties, etc.) to learn expressive discrete representations.", "keywords": "representation learning;vector quantization;quantization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/8d534731cef42aa361d20832385f5dc4f32f5537.pdf", "author": "Fabian Mentzer;David Minnen;Eirikur Agustsson;Michael Tschannen", "authorids": "~Fabian_Mentzer2;~David_Minnen1;~Eirikur_Agustsson1;~Michael_Tschannen1", "gender": "M;M;;", "homepage": "https://fmentzer.github.io;http://research.minnen.org;;https://mitscha.github.io/", "dblp": "186/8020;;http://dblp.uni-trier.de/pers/hd/a/Agustsson:Eirikur;134/9824", "google_scholar": "https://scholar.google.ch/citations?user=R80F8XUAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=Uhvyua4AAAAJ;https://scholar.google.ch/citations?user=TSj_8nYAAAAJ", "orcid": ";;;", "linkedin": ";;eirikuragustsson/;", "or_profile": "~Fabian_Mentzer2;~David_Minnen1;~Eirikur_Agustsson1;~Michael_Tschannen1", "aff": "Google;Google;Google;Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmentzer2024finite,\ntitle={Finite Scalar Quantization: {VQ}-{VAE} Made Simple},\nauthor={Fabian Mentzer and David Minnen and Eirikur Agustsson and Michael Tschannen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8ishA3LxN8}\n}", "github": "", "project": "", "reviewers": "zMky;fsTh;Hp3C;HYXB", "pdf_size": 3295764, "rating": "6;6;6;8", "confidence": "4;4;2;3", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "73;50;250;120", "wc_strengths": "62;45;89;47", "wc_weaknesses": "67;84;101;115", "wc_questions": "97;97;37;145", "wc_review": "299;276;477;427", "wc_reply_reviewers": "28;0;17;120", "wc_reply_authors": "456;785;217;385", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 123.25, 77.40599137017755 ], "wc_strengths_avg": [ 60.75, 17.583728273605686 ], "wc_weaknesses_avg": [ 91.75, 18.019087102292392 ], "wc_questions_avg": [ 94.0, 38.301436004411116 ], "wc_review_avg": [ 369.75, 84.52033778919723 ], "wc_reply_reviewers_avg": [ 41.25, 46.54769059792333 ], "wc_reply_authors_avg": [ 460.75, 206.34482668581734 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 166, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6562591541374820279&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=8ishA3LxN8", "pdf": "https://openreview.net/pdf?id=8ishA3LxN8", "email": "google.com;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Combining Axes Preconditioners through Kronecker Approximation for Deep Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19316", "id": "8j9hz8DVi8", "author_site": "Venkata Sai Surya Subramanyam Duvvuri, Fnu Devvrit, Rohan Anil, Cho-Jui Hsieh, Inderjit Dhillon", "tldr": "", "abstract": "Adaptive regularization based optimization methods such as full-matrix Adagrad which use gradient second-moment information hold significant potential for fast convergence in deep neural network (DNN) training, but are memory intensive and computationally demanding for large neural nets. We develop a technique called Combining AxeS PReconditioners (CASPR), which optimizes matrix-shaped DNN parameters by finding different preconditioners for each mode/axis of the parameter and combining them using a Kronecker-sum based approximation. We show tighter convergence guarantees in stochastic optimization compared to a Kronecker product based preconditioner, Shampoo, which arises as a special case of CASPR. Furthermore, our experiments demonstrates that CASPR approximates the gradient second-moment matrix in full-matrix Adagrad more accurately, and shows significant improvement in training and generalization performance compared to existing practical adaptive regularization based methods such as Shampoo and Adam in a variety of tasks including graph neural network on OGBG-molpcba, Transformer on a universal dependencies dataset and auto-regressive large language modeling on C4 dataset.", "keywords": "Optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Sai Surya Duvvuri;Fnu Devvrit;Rohan Anil;Cho-Jui Hsieh;Inderjit S Dhillon", "authorids": "~Sai_Surya_Duvvuri1;~Fnu_Devvrit1;~Rohan_Anil1;~Cho-Jui_Hsieh1;~Inderjit_S_Dhillon1", "gender": "M;M;M;M;M", "homepage": ";;;http://web.cs.ucla.edu/~chohsieh/index.html;http://www.cs.utexas.edu/users/inderjit/", "dblp": "277/6122;;182/1833;14/2770;d/InderjitSDhillon", "google_scholar": "UL3980gAAAAJ;c86HtPoAAAAJ;;Wy89g4IAAAAJ;xBv5ZfkAAAAJ", "orcid": ";;;;", "linkedin": "sai-surya-duvvuri-79903511b/;devvrit/;;;inderjit-dhillon-a20888b0/", "or_profile": "~Sai_Surya_Duvvuri1;~Fnu_Devvrit1;~Rohan_Anil1;~Cho-Jui_Hsieh1;~Inderjit_S_Dhillon1", "aff": "University of Texas at Austin;, University of Texas at Austin;Google Brain ;University of California, Los Angeles;University of Texas, Austin", "aff_domain": "cs.utexas.edu;cs.utexas.edu;google.com;ucla.edu;utexas.edu", "position": "PhD student;PhD student;Principal Engineer;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nduvvuri2024combining,\ntitle={Combining Axes Preconditioners through Kronecker Approximation for Deep Learning},\nauthor={Sai Surya Duvvuri and Fnu Devvrit and Rohan Anil and Cho-Jui Hsieh and Inderjit S Dhillon},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8j9hz8DVi8}\n}", "github": "", "project": "", "reviewers": "gGzL;Vz7y;a8ib", "pdf_size": 678541, "rating": "6;8;8", "confidence": "3;5;3", "soundness": "3;4;3", "contribution": "3;4;4", "presentation": "3;4;3", "wc_summary": "48;77;41", "wc_strengths": "37;67;22", "wc_weaknesses": "129;25;81", "wc_questions": "109;108;4", "wc_review": "323;277;148", "wc_reply_reviewers": "0;76;0", "wc_reply_authors": "803;258;271", "reply_reviewers": "0;1;0", "reply_authors": "3;2;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 55.333333333333336, 15.584892970081281 ], "wc_strengths_avg": [ 42.0, 18.708286933869708 ], "wc_weaknesses_avg": [ 78.33333333333333, 42.49967320135794 ], "wc_questions_avg": [ 73.66666666666667, 49.2634640366356 ], "wc_review_avg": [ 249.33333333333334, 74.07353703509027 ], "wc_reply_reviewers_avg": [ 25.333333333333332, 35.82674358011841 ], "wc_reply_authors_avg": [ 444.0, 253.90680705067098 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12991620137545411675&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=8j9hz8DVi8", "pdf": "https://openreview.net/pdf?id=8j9hz8DVi8", "email": "cs.utexas.edu;cs.utexas.edu;google.com;ucla.edu;utexas.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Texas at Austin;Google;University of California, Los Angeles", "aff_unique_dep": ";Google Brain;", "aff_unique_url": "https://www.utexas.edu;https://brain.google.com;https://www.ucla.edu", "aff_unique_abbr": "UT Austin;Google Brain;UCLA", "aff_campus_unique_index": "0;0;1;2;0", "aff_campus_unique": "Austin;Mountain View;Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "8jKuUHsndT", "title": "Re-evaluating Retrosynthesis Algorithms with Syntheseus", "track": "main", "status": "Reject", "tldr": "", "abstract": "The planning of how to synthesize molecules, also known as retrosynthesis, has been a growing focus of the machine learning and chemistry communities in recent years. Despite the appearance of steady progress, we argue that imperfect benchmarks and inconsistent comparisons mask systematic shortcomings of existing techniques. To remedy this, we present a benchmarking library called syntheseus which promotes best practice by default, enabling consistent meaningful evaluation of single-step and multi-step retrosynthesis algorithms. We use syntheseus to re-evaluate a number of previous retrosynthesis algorithms, and find that the ranking of state-of-the-art models changes when evaluated carefully. We end with guidance for future works in this area.", "keywords": "retrosynthesis;reaction prediction;chemistry;drug design;science", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/42f838fa010120ad601b8a33bdb7878062eff97e.zip", "author": "Krzysztof Maziarz;Austin Tripp;Guoqing Liu;Megan Stanley;Shufang Xie;Piotr Gai\u0144ski;Philipp Seidl;Marwin Segler", "authorids": "~Krzysztof_Maziarz1;~Austin_Tripp1;~Guoqing_Liu3;~Megan_Stanley1;~Shufang_Xie1;~Piotr_Gai\u0144ski1;~Philipp_Seidl1;~Marwin_Segler2", "gender": "M;M;M;;M;M;M;", "homepage": ";https://www.austintripp.ca/;https://www.microsoft.com/en-us/research/people/guoqingliu/;;;https://github.com/panpiort8/;;", "dblp": "194/2971;267/5455;;;https://dblp.uni-trier.de/pid/163/2704-3;;262/3456;185/0993", "google_scholar": "BA8bBVkAAAAJ;WAvRaxMAAAAJ;h-eHvyoAAAAJ;;;;WmyltwcAAAAJ;imsL94QAAAAJ", "orcid": ";0000-0002-0138-7740;;;;;;", "linkedin": ";;;megan-jane-stanley/;;;phseidl/;", "or_profile": "~Krzysztof_Maziarz1;~Austin_Tripp1;~Guoqing_Liu3;~Megan_Stanley1;~Shufang_Xie1;~Piotr_Gai\u0144ski1;~Philipp_Seidl1;~Marwin_Segler1", "aff": "Microsoft Research;University of Cambridge;Microsoft Research ;Microsoft Research Cambridge;Renmin University of China;Mila - Quebec Artificial Intelligence Institute;Johannes Kepler University Linz;Microsoft", "aff_domain": "microsoft.com;cam.ac.uk;microsoft.com;microsoft.com;ruc.edu.cn;mila.quebec;jku.at;microsoft.com", "position": "Senior Researcher;PhD student;Researcher;Researcher;PhD student;Intern;PhD student;Researcher", "bibtex": "@misc{\nmaziarz2024reevaluating,\ntitle={Re-evaluating Retrosynthesis Algorithms with Syntheseus},\nauthor={Krzysztof Maziarz and Austin Tripp and Guoqing Liu and Megan Stanley and Shufang Xie and Piotr Gai{\\'n}ski and Philipp Seidl and Marwin Segler},\nyear={2024},\nurl={https://openreview.net/forum?id=8jKuUHsndT}\n}", "github": "", "project": "", "reviewers": "cznK;eNX2;WK4y;JYEf", "site": "https://openreview.net/forum?id=8jKuUHsndT", "pdf_size": 578731, "rating": "3;5;6;8", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "contribution": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "111;99;60;61", "wc_strengths": "81;63;68;184", "wc_weaknesses": "114;52;94;153", "wc_questions": "21;31;53;52", "wc_review": "327;245;275;450", "wc_reply_reviewers": "98;0;0;133", "wc_reply_authors": "496;528;406;486", "reply_reviewers": "1;0;0;2", "reply_authors": "3;1;2;3", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.75, 22.65364209128413 ], "wc_strengths_avg": [ 99.0, 49.512624652708524 ], "wc_weaknesses_avg": [ 103.25, 36.409991760504425 ], "wc_questions_avg": [ 39.25, 13.718144918318949 ], "wc_review_avg": [ 324.25, 78.30509242699354 ], "wc_reply_reviewers_avg": [ 57.75, 59.060879607401716 ], "wc_reply_authors_avg": [ 479.0, 44.91102314577124 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.16012815380508713, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3781188252048429306&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0;2;3;4;0", "aff_unique_norm": "Microsoft;University of Cambridge;Renmin University of China;Quebec Artificial Intelligence Institute;Johannes Kepler University", "aff_unique_dep": "Microsoft Research;;;Artificial Intelligence;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.cam.ac.uk;http://www.ruc.edu.cn;https://mila.quebec;https://www.jku.at", "aff_unique_abbr": "MSR;Cambridge;RUC;Mila;JKU", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Cambridge;Linz", "aff_country_unique_index": "0;1;0;1;2;3;4;0", "aff_country_unique": "United States;United Kingdom;China;Canada;Austria" }, { "id": "8lLaS1ekDA", "title": "Signatures Meet Dynamic Programming: Generalizing Bellman Equations for Trajectory Following", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Path signatures have been proposed as a powerful representation of paths that ef\ufb01ciently captures the path\u2019s analytic and geometric characteristics, having useful algebraic properties including fast concatenation of paths through tensor products. Signatures have recently been widely adopted in machine learning problems for time series analysis. In this work we establish connections between value functions typically used in optimal control and intriguing properties of path signatures. These connections motivate our novel control framework with signature transforms that ef\ufb01ciently generalizes the Bellman equation to the space of trajectories. We analyze the properties and advantages of the framework, termed signature control. In particular, we demonstrate that (i) it can naturally deal with varying/adaptive time steps; (ii) it propagates higher-level information more ef\ufb01ciently than value function updates; (iii) it is robust to dynamical system misspeci\ufb01cation over long rollouts. As a speci\ufb01c case of our framework, we devise a model predictive control method for path tracking. This method generalizes integral control, being suitable for problems with unknown disturbances. The proposed algorithms are tested in simulation, with differentiable physics models including typical control and robotics tasks such as point-mass, curve following for an ant model, and a robotic manipulator.", "keywords": "Decision making;Path signature;Bellman equation;Integral control;Model predictive control;Robotics", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/31e087a732bf849de7221b5f837a4c104c4b0288.zip", "author": "Motoya Ohnishi;Iretiayo Akinola;Jie Xu;Ajay Mandlekar;Fabio Ramos", "authorids": "~Motoya_Ohnishi1;~Iretiayo_Akinola1;~Jie_Xu7;~Ajay_Mandlekar1;~Fabio_Ramos1", "gender": "M;M;M;M;M", "homepage": "https://mohnishi.github.io/;;https://people.csail.mit.edu/jiex;https://ai.stanford.edu/~amandlek/;https://fabioramos.github.io/", "dblp": "207/9907;;37/5126-28;https://dblp.uni-trier.de/pers/hd/m/Mandlekar:Ajay;22/2488", "google_scholar": ";e1zesfMAAAAJ;3Tj5lWEAAAAJ;MEz23joAAAAJ;https://scholar.google.com.au/citations?user=T_mJiHoAAAAJ", "orcid": ";;;;", "linkedin": ";;;;fabio-ramos-3256b421/", "or_profile": "~Motoya_Ohnishi1;~Iretiayo_Akinola1;~Jie_Xu7;~Ajay_Mandlekar1;~Fabio_Ramos1", "aff": "University of Washington;NVIDIA;NVIDIA;NVIDIA;NVIDIA", "aff_domain": "washington.edu;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "position": "PhD student;Researcher;Researcher;Researcher;Principal Research Scientist", "bibtex": "@misc{\nohnishi2024signatures,\ntitle={Signatures Meet Dynamic Programming: Generalizing Bellman Equations for Trajectory Following},\nauthor={Motoya Ohnishi and Iretiayo Akinola and Jie Xu and Ajay Mandlekar and Fabio Ramos},\nyear={2024},\nurl={https://openreview.net/forum?id=8lLaS1ekDA}\n}", "github": "", "project": "", "reviewers": "1sNc;ZzLg;AcwD", "site": "https://openreview.net/forum?id=8lLaS1ekDA", "pdf_size": 3336066, "rating": "3;5;5", "confidence": "3;3;3", "soundness": "2;2;2", "contribution": "3;2;2", "presentation": "1;3;1", "wc_summary": "80;120;81", "wc_strengths": "118;167;33", "wc_weaknesses": "962;128;152", "wc_questions": "178;44;1", "wc_review": "1338;459;267", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_summary_avg": [ 93.66666666666667, 18.624953392931992 ], "wc_strengths_avg": [ 106.0, 55.35943159631127 ], "wc_weaknesses_avg": [ 414.0, 387.618369017775 ], "wc_questions_avg": [ 74.33333333333333, 75.37609405876346 ], "wc_review_avg": [ 688.0, 466.2552948761011 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2357026002029994603&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Washington;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.washington.edu;https://www.nvidia.com", "aff_unique_abbr": "UW;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DiffEnc: Variational Diffusion with a Learned Encoder", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19315", "id": "8nxy1bQWTG", "author_site": "Beatrix M. G. Nielsen, Anders Christensen, Andrea Dittadi, Ole Winther", "tldr": "", "abstract": "Diffusion models may be viewed as hierarchical variational autoencoders (VAEs) with two improvements: parameter sharing for the conditionals in the generative process and efficient computation of the loss as independent terms over the hierarchy. We consider two changes to the diffusion model that retain these advantages while adding flexibility to the model. Firstly, we introduce a data and depth-dependent mean function in the diffusion process, which leads to a modified diffusion loss. Our proposed framework, DiffEnc, achieves a statistically significant improvement in likelihood on CIFAR-10. Secondly, we let the ratio of the noise variance of the reverse encoder process and the generative process be a free weight parameter rather than being fixed to one. This leads to theoretical insights: For a finite depth hierarchy, the evidence lower bound (ELBO) can be used as an objective for a weighted diffusion loss approach and for optimizing the noise schedule specifically for inference. For the infinite-depth hierarchy, on the other hand, the weight parameter has to be one to have a well-defined ELBO.", "keywords": "DDPM;diffusion;image generation;encoder", "primary_area": "generative models", "supplementary_material": "", "author": "Beatrix Miranda Ginn Nielsen;Anders Christensen;Andrea Dittadi;Ole Winther", "authorids": "~Beatrix_Miranda_Ginn_Nielsen1;~Anders_Christensen1;~Andrea_Dittadi1;~Ole_Winther1", "gender": "F;M;M;M", "homepage": ";;https://addtt.github.io;https://olewinther.github.io/", "dblp": ";44/5606;;36/1568", "google_scholar": ";https://scholar.google.com/citations?hl=da;PrvuuaAAAAAJ;7VAwhzUAAAAJ", "orcid": "0009-0005-0092-024X;0009-0009-0038-5485;;0000-0002-1966-3205", "linkedin": "beatrix-miranda-ginn-nielsen-84201694;anderschrist/;;owinther/", "or_profile": "~Beatrix_Miranda_Ginn_Nielsen1;~Anders_Christensen1;~Andrea_Dittadi1;~Ole_Winther1", "aff": "Technical University of Denmark;Technical University of Denmark;Mila - Quebec Artificial Intelligence Institute;Technical University of Denmark", "aff_domain": "dtu.dk;dtu.dk;mila.quebec;dtu.dk", "position": "PhD student;PhD student;Visiting Researcher;Full Professor", "bibtex": "@inproceedings{\nnielsen2024diffenc,\ntitle={DiffEnc: Variational Diffusion with a Learned Encoder},\nauthor={Beatrix Miranda Ginn Nielsen and Anders Christensen and Andrea Dittadi and Ole Winther},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8nxy1bQWTG}\n}", "github": "", "project": "", "reviewers": "g9hi;jJ4J;Hg56;neQL", "pdf_size": 3398407, "rating": "5;6;6;6", "confidence": "2;4;4;1", "soundness": "3;3;3;2", "contribution": "2;2;3;3", "presentation": "3;4;3;2", "wc_summary": "58;41;105;86", "wc_strengths": "71;38;79;101", "wc_weaknesses": "133;227;306;56", "wc_questions": "70;73;146;24", "wc_review": "332;379;636;267", "wc_reply_reviewers": "0;0;35;0", "wc_reply_authors": "506;874;583;188", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.5, 24.70323865407125 ], "wc_strengths_avg": [ 72.25, 22.620510604316607 ], "wc_weaknesses_avg": [ 180.5, 94.43119188064927 ], "wc_questions_avg": [ 78.25, 43.671357890498435 ], "wc_review_avg": [ 403.5, 140.00089285429576 ], "wc_reply_reviewers_avg": [ 8.75, 15.155444566227676 ], "wc_reply_authors_avg": [ 537.75, 244.15402413230873 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5956881876886698360&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=8nxy1bQWTG", "pdf": "https://openreview.net/pdf?id=8nxy1bQWTG", "email": "dtu.dk;dtu.dk;mila.quebec;dtu.dk", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Technical University of Denmark;Quebec Artificial Intelligence Institute", "aff_unique_dep": ";Artificial Intelligence", "aff_unique_url": "https://www.tek.dk;https://mila.quebec", "aff_unique_abbr": "DTU;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Denmark;Canada" }, { "id": "8nz6xYntfJ", "title": "AlignDiff: Aligning Diffusion Models for General Few-Shot Segmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Text-to-image diffusion models have shown remarkable success in synthesizing photo-realistic images. Apart from creative applications, can we use such models to synthesize samples that aid the few-shot training of discriminative models? In this work, we propose AlignDiff, a general framework for synthesizing training images and associated mask annotations for few-shot segmentation. We identify three levels of misalignments that arise when utilizing pre-trained diffusion models in segmentation tasks. These misalignments need to be addressed to create realistic training samples and align the synthetic data distribution with the real training distribution: 1) instance-level misalignment, where generated samples fail to be consistent with the target task (e.g., specific texture or out-of-distribution generation of rare categories); 2) scene-level misalignment, where synthetic samples are object-centric and fail to represent realistic scene layouts with multiple objects; and 3) annotation-level misalignment, where diffusion models are limited to generating images without pixel-level annotations. AlignDiff overcomes these challenges by leveraging a few real samples to guide the generation, thus improving novel IoU over baseline methods in generalized few-shot semantic segmentation on Pascal-5i and COCO-20i by up to 80%. In addition, AlignDiff is capable of augmenting the learning of out-of-distribution categories on FSS-1000, while naive diffusion model generates samples that hurt the training process. The code will be released.", "keywords": "Few-shot learning;image segmentation;image synthesis;training synthesis", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Ri-Zhao Qiu;Yu-Xiong Wang;Kris Hauser", "authorids": "~Ri-Zhao_Qiu1;~Yu-Xiong_Wang1;~Kris_Hauser2", "gender": "Not Specified;;M", "homepage": "https://rogerqi.github.io/;https://yxw.cs.illinois.edu/;http://kkhauser.web.illinois.edu", "dblp": "336/5470;35/10700;", "google_scholar": "uH0re54AAAAJ;T_Q-xDkAAAAJ;-sGaL8sAAAAJ", "orcid": ";;", "linkedin": "rizhaoqiu/;;", "or_profile": "~Ri-Zhao_Qiu1;~Yu-Xiong_Wang1;~Kris_Hauser2", "aff": "University of California, San Diego;Department of Computer Science, University of Illinois Urbana-Champaign;University of Illinois, Urbana-Champaign", "aff_domain": "ucsd.edu;cs.illinois.edu;illinois.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nqiu2024aligndiff,\ntitle={AlignDiff: Aligning Diffusion Models for General Few-Shot Segmentation},\nauthor={Ri-Zhao Qiu and Yu-Xiong Wang and Kris Hauser},\nyear={2024},\nurl={https://openreview.net/forum?id=8nz6xYntfJ}\n}", "github": "", "project": "", "reviewers": "ZQEG;cYiM;bS35;ofKT", "site": "https://openreview.net/forum?id=8nz6xYntfJ", "pdf_size": 16923510, "rating": "3;5;5;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "contribution": "2;2;3;2", "presentation": "3;1;2;3", "wc_summary": "147;122;69;100", "wc_strengths": "72;27;78;59", "wc_weaknesses": "234;247;156;81", "wc_questions": "22;57;4;46", "wc_review": "475;453;307;286", "wc_reply_reviewers": "0;77;0;0", "wc_reply_authors": "722;1024;234;726", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 109.5, 28.692333470807146 ], "wc_strengths_avg": [ 59.0, 19.710403344427025 ], "wc_weaknesses_avg": [ 179.5, 66.67270805959512 ], "wc_questions_avg": [ 32.25, 20.64430914319973 ], "wc_review_avg": [ 380.25, 84.43747686898277 ], "wc_reply_reviewers_avg": [ 19.25, 33.34197804570089 ], "wc_reply_authors_avg": [ 676.5, 283.32093110111015 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1403047807487662662&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, San Diego;University of Illinois Urbana-Champaign;University of Illinois", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "https://www.ucsd.edu;https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UCSD;UIUC;UIUC", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "San Diego;Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8oNzf7u5lT", "title": "Pylic: Leveraging Source Code for Planning in Structured Environments", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper investigates the application of program analysis techniques to planning problems in dynamic environments with discontinuities in long-horizon settings. Traditional approaches rely on specialized representations, which are often tailored to specific problems and domains. In contrast, we propose describing the combined planning and control problem directly as a desired property of the execution of simulator source code. This representation is expressive, naturally providing a means to describe desired properties of even very dynamic and discontinuous environments. We show that, despite this generality, it is still possible to leverage domain knowledge by relating it to the simulator source code. We study the effectiveness of this approach through several case studies in simulated robotic environments. Our results show that in these environments, our framework can improve the efficiency in solving the control and planning problem, relative to standard numerical and reinforcement learning methods.", "keywords": "program analysis;planning;robotics;optimization", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "/attachment/92ea473536bb13af7e2f11255bbc254dcb94c811.zip", "author": "Leonardo Hernandez Cano;Nathan Hunt;Sara Magliacane;Armando Solar-Lezama", "authorids": "~Leonardo_Hernandez_Cano1;~Nathan_Hunt1;~Sara_Magliacane1;~Armando_Solar-Lezama1", "gender": ";M;F;M", "homepage": ";;http://saramagliacane.github.io;https://people.csail.mit.edu/asolar/", "dblp": ";200/8601;120/5256;95/6919", "google_scholar": ";;https://scholar.google.nl/citations?user=H3j_zQ4AAAAJ;https://scholar.google.com.tw/citations?user=8BX3BokAAAAJ", "orcid": ";;;", "linkedin": ";;magliacane/;", "or_profile": "~Leonardo_Hernandez_Cano1;~Nathan_Hunt1;~Sara_Magliacane1;~Armando_Solar-Lezama1", "aff": ";Massachusetts Institute of Technology;University of Amsterdam;Massachusetts Institute of Technology", "aff_domain": ";mit.edu;uva.nl;mit.edu", "position": ";PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\ncano2024pylic,\ntitle={Pylic: Leveraging Source Code for Planning in Structured Environments},\nauthor={Leonardo Hernandez Cano and Nathan Hunt and Sara Magliacane and Armando Solar-Lezama},\nyear={2024},\nurl={https://openreview.net/forum?id=8oNzf7u5lT}\n}", "github": "", "project": "", "reviewers": "uEX5;ppCk;wE34", "site": "https://openreview.net/forum?id=8oNzf7u5lT", "pdf_size": 685675, "rating": "3;3;5", "confidence": "4;4;2", "soundness": "1;2;2", "contribution": "1;2;2", "presentation": "2;2;2", "wc_summary": "59;55;41", "wc_strengths": "34;25;41", "wc_weaknesses": "428;601;146", "wc_questions": "6;71;19", "wc_review": "527;752;247", "wc_reply_reviewers": "181;0;95", "wc_reply_authors": "1271;508;389", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 51.666666666666664, 7.717224601860151 ], "wc_strengths_avg": [ 33.333333333333336, 6.548960901462833 ], "wc_weaknesses_avg": [ 391.6666666666667, 187.5212580541796 ], "wc_questions_avg": [ 32.0, 28.083209693100727 ], "wc_review_avg": [ 508.6666666666667, 206.57255921884257 ], "wc_reply_reviewers_avg": [ 92.0, 73.92338376093635 ], "wc_reply_authors_avg": [ 722.6666666666666, 390.761933776678 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jz2EayYY-nsJ:scholar.google.com/&scioq=Pylic:+Leveraging+Source+Code+for+Planning+in+Structured+Environments&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.uva.nl", "aff_unique_abbr": "MIT;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Netherlands" }, { "id": "8oUF3uGIVo", "title": "Exploring High-Order Message-Passing in Graph Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "The Transformer architecture has demonstrated promising performance on graph learning tasks. However, the existing attention mechanism used in Graph Transformers (GT) cannot capture high-order correlations that exist in complex graphs, thereby limiting their expressiveness. In this paper, we present a High-Order message-passing strategy within the Transformer architecture (HOtrans) to learn long-range, high-order relationships for graph representation. Recognizing that some nodes share similar properties, we extract communities from the entire graph and introduce a virtual node to connect all nodes in the community. Operating on the community, we adopt a three-step message-passing approach: capture the high-order information of the community into a virtual node; propagate long-range dependent information between communities; aggregate community-level representations back to graph nodes. This facilitates effective global information passing. Virtual nodes capture the high-order community information and support the long-range information passing as the bridge. \nWe demonstrate that many existing GTs can be regarded as special cases of this framework. Our experimental results illustrate that our proposed HOtrans consistently achieves highly competitive results across several node classification tasks.", "keywords": "Graph representation learning;Transformer", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Xueqi Ma;Xingjun Ma;Chuang Liu;Sarah Monazam Erfani;James Bailey", "authorids": "~Xueqi_Ma1;~Xingjun_Ma1;~Chuang_Liu2;~Sarah_Monazam_Erfani1;~James_Bailey1", "gender": "F;M;M;;", "homepage": ";http://xingjunma.com/;https://liuchuang0059.github.io/;https://people.eng.unimelb.edu.au/smonazam/;", "dblp": "194/4773;195/8270;52/1800-8;136/0170;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=XQViiyYAAAAJ;hQzjzekAAAAJ;https://scholar.google.com.au/citations?user=Jq9ocx4AAAAJ;", "orcid": ";;0000-0003-2377-2567;;", "linkedin": ";xingjun-ma-173532129/;;;", "or_profile": "~Xueqi_Ma1;~Xingjun_Ma1;~Chuang_Liu2;~Sarah_Monazam_Erfani1;~James_Bailey1", "aff": "University of Melbourne;Fudan University;Wuhan University;The University of Melbourne;", "aff_domain": "unimelb.edu;fudan.edu.cn;whu.edu;unimelb.edu.au;", "position": "PhD student;Associate Professor;PhD student;Associate Professor;", "bibtex": "@misc{\nma2024exploring,\ntitle={Exploring High-Order Message-Passing in Graph Transformers},\nauthor={Xueqi Ma and Xingjun Ma and Chuang Liu and Sarah Monazam Erfani and James Bailey},\nyear={2024},\nurl={https://openreview.net/forum?id=8oUF3uGIVo}\n}", "github": "", "project": "", "reviewers": "eGUc;e8WT;7DnL;akHZ", "site": "https://openreview.net/forum?id=8oUF3uGIVo", "pdf_size": 426931, "rating": "3;3;5;5", "confidence": "5;4;4;5", "soundness": "2;2;3;3", "contribution": "2;2;2;3", "presentation": "3;3;2;4", "wc_summary": "69;58;85;66", "wc_strengths": "36;44;30;89", "wc_weaknesses": "137;427;211;151", "wc_questions": "25;44;5;45", "wc_review": "267;573;331;351", "wc_reply_reviewers": "0;22;0;0", "wc_reply_authors": "510;640;360;422", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 69.5, 9.810708435174291 ], "wc_strengths_avg": [ 49.75, 23.19886850689059 ], "wc_weaknesses_avg": [ 231.5, 116.24435470163702 ], "wc_questions_avg": [ 29.75, 16.361158271956175 ], "wc_review_avg": [ 380.5, 115.38955758646446 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 483.0, 105.152270541344 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8011409773533815531&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Melbourne;Fudan University;Wuhan University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unimelb.edu.au;https://www.fudan.edu.cn;http://www.whu.edu.cn/", "aff_unique_abbr": "UniMelb;Fudan;WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Australia;China" }, { "id": "8oYjW8QxuC", "title": "Pi-DUAL: Using privileged information to distinguish clean from noisy labels", "track": "main", "status": "Reject", "tldr": "", "abstract": "Label noise is a pervasive problem in deep learning that often compromises the generalization performance of trained models. Recently, leveraging privileged information (PI) -- information available only during training but not at test time -- has emerged as an effective approach to mitigate this issue. Yet, existing PI-based methods have failed to consistently outperform their no-PI counterparts in terms of preventing overfitting to label noise. To address this deficiency, we introduce Pi-DUAL, an architecture designed to harness PI to distinguish clean from wrong labels. Pi-DUAL decomposes the output logits into a prediction term, based on conventional input features, and a noise-fitting term influenced solely by PI. A gating mechanism steered by PI adaptively shifts focus between these terms, allowing the model to implicitly separate the learning paths of clean and wrong labels. Empirically, Pi-DUAL achieves significant performance improvements on key PI benchmarks (e.g., +6.8% on ImageNet-PI), establishing a new state-of-the-art test set accuracy. Additionally, Pi-DUAL is a potent method for identifying noisy samples post-training, outperforming other strong methods at this task. Overall, Pi-DUAL is a simple, scalable and practical approach for mitigating the effects of label noise in a variety of real-world scenarios with PI.", "keywords": "noisy labels;privileged information;supervised learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/04979be18de8465f8a54fb736275a0f60fb768fc.pdf", "author": "Ke Wang;Guillermo Ortiz-Jimenez;Rodolphe Jenatton;Mark Collier;Efi Kokiopoulou;Pascal Frossard", "authorids": "~Ke_Wang19;~Guillermo_Ortiz-Jimenez1;~Rodolphe_Jenatton3;~Mark_Collier1;~Efi_Kokiopoulou2;~Pascal_Frossard1", "gender": "M;;M;M;;", "homepage": "https://wang-kee.github.io/;http://gortizji.github.io;http://rodolphejenatton.com/;;;", "dblp": ";222/2737;68/8398;;;", "google_scholar": "wKBORzsAAAAJ;xAsJnG0AAAAJ;QIR6rygAAAAJ;U4rBrcgAAAAJ;;", "orcid": ";;;;;", "linkedin": ";;;mark-collier-aa446032/;;", "or_profile": "~Ke_Wang19;~Guillermo_Ortiz-Jimenez1;~Rodolphe_Jenatton3;~Mark_Collier1;~Efi_Kokiopoulou2;~Pascal_Frossard1", "aff": "EPFL - EPF Lausanne;Google DeepMind;Google;Google;;", "aff_domain": "epfl.ch;google.com;google.com;google.com;;", "position": "PhD student;Research Scientist;Senior research scientist;Researcher;;", "bibtex": "@misc{\nwang2024pidual,\ntitle={Pi-{DUAL}: Using privileged information to distinguish clean from noisy labels},\nauthor={Ke Wang and Guillermo Ortiz-Jimenez and Rodolphe Jenatton and Mark Collier and Efi Kokiopoulou and Pascal Frossard},\nyear={2024},\nurl={https://openreview.net/forum?id=8oYjW8QxuC}\n}", "github": "", "project": "", "reviewers": "h1hP;VkrZ;sbuM", "site": "https://openreview.net/forum?id=8oYjW8QxuC", "pdf_size": 8107634, "rating": "5;6;6", "confidence": "4;4;3", "soundness": "3;3;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "49;108;66", "wc_strengths": "47;87;123", "wc_weaknesses": "126;186;98", "wc_questions": "6;3;4", "wc_review": "228;384;291", "wc_reply_reviewers": "0;89;0", "wc_reply_authors": "1740;1304;764", "reply_reviewers": "0;1;0", "reply_authors": "3;3;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.33333333333333, 24.796953217863052 ], "wc_strengths_avg": [ 85.66666666666667, 31.04119127152751 ], "wc_weaknesses_avg": [ 136.66666666666666, 36.709066394496546 ], "wc_questions_avg": [ 4.333333333333333, 1.247219128924647 ], "wc_review_avg": [ 301.0, 64.07807737440318 ], "wc_reply_reviewers_avg": [ 29.666666666666668, 41.95500235040182 ], "wc_reply_authors_avg": [ 1269.3333333333333, 399.203651731238 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5880124482341331784&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "EPFL;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.epfl.ch;https://deepmind.com", "aff_unique_abbr": "EPFL;DeepMind", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Lausanne;;Mountain View", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "Switzerland;United Kingdom;United States" }, { "id": "8oZf2SlXEY", "title": "Distribution Calibration For Few-Shot Learning by Bayesian Relation Inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning from a limited number of samples is difficult as a small number of samples cannot cover all the information in their category. It is worth noting that categories with scarce samples may be distributed in a way that is related to categories that contain sufficient data. Therefore it is possible to calibrate the distribution of a sample-poor category by using categories with a large amount of data. Existing methods of distribution calibration usually use artificially set distances to calculate the association between two categories, which may ignore deeper relations between categories. In this paper, we propose a distribution calibration method based on Bayesian relation inference. For the input few-sample classes, it can automatically infer their relation with the categories with sufficient data and adaptively generate a large amount of fused feature data that can represent the few-sample classes. The results show that a simple logistic regression classifier trained by using the large amount of data generated by our method, exceeds state-of-the-art accuracy for skin disease classification issue. Through visual analysis, we demonstrate that the relationship graph generated by this Bayesian relationship inference method has a degree of interpretability.", "keywords": "Bayesian inference;few-shot learning", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/4b2b9ff2b5456c9d167f8ad79d6f4fe6137fc161.zip", "author": "Yuchen Liu;Yang Gu;Weining Weng;Zhaohua Yang;Yiqiang Chen", "authorids": "~Yuchen_Liu12;~Yang_Gu5;~Weining_Weng1;~Zhaohua_Yang2;~Yiqiang_Chen1", "gender": "M;F;M;;M", "homepage": "https://github.com/YuchenLiu1225;https://scholar.google.com/citations?hl=en&user=KrsyFoQAAAAJ;;http://yangzhaohua1998.github.io;http://www.ict.cas.cn/sourcedb_2018_ict_cas/cn/jssrck/200909/t20090917_2496596.html", "dblp": ";;;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;3VK38QgAAAAJ;;LC3SwhEAAAAJ", "orcid": ";;0009-0008-5006-1262;;", "linkedin": ";;;;", "or_profile": "~Yuchen_Liu12;~Yang_Gu5;~Weining_Weng1;~Zhaohua_Yang2;~Yiqiang_Chen1", "aff": "University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "MS student;Associate Professor;PhD student;MS student;Full Professor", "bibtex": "@misc{\nliu2024distribution,\ntitle={Distribution Calibration For Few-Shot Learning by Bayesian Relation Inference},\nauthor={Yuchen Liu and Yang Gu and Weining Weng and Zhaohua Yang and Yiqiang Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=8oZf2SlXEY}\n}", "github": "", "project": "", "reviewers": "czxy;MVEs;PCKz", "site": "https://openreview.net/forum?id=8oZf2SlXEY", "pdf_size": 1058517, "rating": "3;5;5", "confidence": "2;4;3", "soundness": "2;3;3", "contribution": "1;2;2", "presentation": "1;3;1", "wc_summary": "45;95;173", "wc_strengths": "25;74;95", "wc_weaknesses": "109;102;177", "wc_questions": "8;91;39", "wc_review": "187;362;484", "wc_reply_reviewers": "0;50;247", "wc_reply_authors": "1473;2537;822", "reply_reviewers": "0;2;1", "reply_authors": "5;8;2", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_summary_avg": [ 104.33333333333333, 52.67088590694315 ], "wc_strengths_avg": [ 64.66666666666667, 29.32954520994525 ], "wc_weaknesses_avg": [ 129.33333333333334, 33.82635395992631 ], "wc_questions_avg": [ 46.0, 34.2442209236342 ], "wc_review_avg": [ 344.3333333333333, 121.89157294725324 ], "wc_reply_reviewers_avg": [ 99.0, 106.62394977990013 ], "wc_reply_authors_avg": [ 1610.6666666666667, 706.8806279862408 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 5.0, 2.449489742783178 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:abrTrww8RUQJ:scholar.google.com/&scioq=Distribution+Calibration+For+Few-Shot+Learning+by+Bayesian+Relation+Inference&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Computing Technology", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8ohamFnX14", "title": "The (co)limit of metabeliefs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Potentially infinite sequences of beliefs arise when reasoning about the future, one's own beliefs, or others' beliefs. Machine learning researchers are typically content with heuristic truncation, or proofs of asymptotic convergence, of sequences of beliefs; however, such approaches lack insight into the structure of the possible choices. We construct and analyze several (co)limits of meta beliefs to understand the topological and geometric structure of sequences of beliefs. We analyze the relationship between different levels, the relationship between different beliefs at different levels, the encoding of temporal and other indexing structures in belief space, and structures preserved in the colimit. Examples demonstrate the ability to formalize and reason about problems of learning, cooperative and competitive reasoning, and sequential decision making. We conclude by emphasizing insights gained, and future directions for more concrete machine learning models.", "keywords": "Belief;colimit;category theory", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/6d822a6fc32a8915220e508e24ad2a37c088fb94.pdf", "author": "Benjamin Sheller;Patrick Shafto", "authorids": "~Benjamin_Sheller1;~Patrick_Shafto2", "gender": "M;", "homepage": ";http://www.shaftolab.com", "dblp": ";03/5979", "google_scholar": ";HUi6F7wAAAAJ", "orcid": "0000-0001-6699-0797;", "linkedin": ";", "or_profile": "~Benjamin_Sheller1;~Patrick_Shafto1", "aff": "Drake University;Rutgers University", "aff_domain": "drake.edu;rutgers.edu", "position": "Assistant Professor;Professor", "bibtex": "@misc{\nsheller2024the,\ntitle={The (co)limit of metabeliefs},\nauthor={Benjamin Sheller and Patrick Shafto},\nyear={2024},\nurl={https://openreview.net/forum?id=8ohamFnX14}\n}", "github": "", "project": "", "reviewers": "y6tA;BToD;xqdq;cWSP;sas8", "site": "https://openreview.net/forum?id=8ohamFnX14", "pdf_size": 468735, "rating": "3;5;5;5;8", "confidence": "4;3;2;1;4", "soundness": "2;3;3;3;3", "contribution": "2;2;2;2;3", "presentation": "2;2;4;3;4", "wc_summary": "226;66;158;19;140", "wc_strengths": "5;62;38;1;229", "wc_weaknesses": "157;69;49;22;140", "wc_questions": "3;52;47;1;555", "wc_review": "391;249;292;43;1064", "wc_reply_reviewers": "383;79;25;0;0", "wc_reply_authors": "573;163;124;0;435", "reply_reviewers": "2;1;1;0;0", "reply_authors": "3;2;1;0;1", "rating_avg": [ 5.2, 1.6 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 121.8, 72.36131563204196 ], "wc_strengths_avg": [ 67.0, 84.03570669661795 ], "wc_weaknesses_avg": [ 87.4, 52.347301745171166 ], "wc_questions_avg": [ 131.6, 212.7699226864549 ], "wc_review_avg": [ 407.8, 347.15610321583 ], "wc_reply_reviewers_avg": [ 97.4, 145.68541450673777 ], "wc_reply_authors_avg": [ 259.0, 211.70451105255174 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 1.019803902718557 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.12862393885688164, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J2yN2_b05uEJ:scholar.google.com/&scioq=The+(co)limit+of+metabeliefs&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Drake University;Rutgers University", "aff_unique_dep": ";", "aff_unique_url": "https://www.drake.edu;https://www.rutgers.edu", "aff_unique_abbr": "Drake;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "One Step of Gradient Descent is Provably the Optimal In-Context Learner with One Layer of Linear Self-Attention", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19314", "id": "8p3fu56lKc", "author_site": "Arvind Mahankali, Tatsunori Hashimoto, Tengyu Ma", "tldr": "", "abstract": "Recent works have empirically analyzed in-context learning and shown that transformers trained on synthetic linear regression tasks can learn to implement ridge regression, which is the Bayes-optimal predictor, given sufficient capacity (Akyurek et al., 2023), while one-layer transformers with linear self-attention and no MLP layer will learn to implement one step of gradient descent (GD) on a least-squares linear regression objective (von Oswald et al., 2022). However, the theory behind these observations remains poorly understood. We theoretically study transformers with a single layer of linear self-attention, trained on synthetic noisy linear regression data. First, we mathematically show that when the covariates are drawn from a standard Gaussian distribution, the one-layer transformer which minimizes the pre-training loss will implement a single step of GD on the least-squares linear regression objective. Then, we find that changing the distribution of the covariates and weight vector to a non-isotropic Gaussian distribution has a strong impact on the learned algorithm: the global minimizer of the pre-training loss now implements a single step of $\\textit{pre-conditioned}$ GD. However, if only the distribution of the responses is changed, then this does not have a large effect on the learned algorithm: even when the response comes from a more general family of $\\textit{nonlinear}$ functions, the global minimizer of the pre-training loss still implements a single step of GD on a least-squares linear regression objective.", "keywords": "Linear Self-Attention;In-context learning;Gradient Descent;Theoretical Understanding", "primary_area": "learning theory", "supplementary_material": "/attachment/2a9f6630ad97e11aca89759cde4ceb153d835363.zip", "author": "Arvind V. Mahankali;Tatsunori Hashimoto;Tengyu Ma", "authorids": "~Arvind_V._Mahankali1;~Tatsunori_Hashimoto1;~Tengyu_Ma1", "gender": ";M;M", "homepage": ";https://thashim.github.io;http://ai.stanford.edu/~tengyuma/", "dblp": "270/8242.html;;54/9061", "google_scholar": ";5ygiTwsAAAAJ;i38QlUwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Arvind_V._Mahankali1;~Tatsunori_Hashimoto1;~Tengyu_Ma1", "aff": "Computer Science Department, Stanford University;Stanford University;Facebook AI Research", "aff_domain": "cs.stanford.edu;stanford.edu;fb.com", "position": "PhD student;Assistant Professor;Visiting Scientist", "bibtex": "@inproceedings{\nmahankali2024one,\ntitle={One Step of Gradient Descent is Provably the Optimal In-Context Learner with One Layer of Linear Self-Attention},\nauthor={Arvind V. Mahankali and Tatsunori Hashimoto and Tengyu Ma},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8p3fu56lKc}\n}", "github": "", "project": "", "reviewers": "nLRu;VQYN;ksN8;1cqQ", "pdf_size": 401766, "rating": "5;5;6;8", "confidence": "4;3;4;4", "soundness": "2;2;3;4", "contribution": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "208;103;88;269", "wc_strengths": "65;20;82;125", "wc_weaknesses": "22;146;177;89", "wc_questions": "97;60;364;2", "wc_review": "392;329;711;485", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "228;92;754;62", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 167.0, 74.8698871376203 ], "wc_strengths_avg": [ 73.0, 37.60983913818298 ], "wc_weaknesses_avg": [ 108.5, 59.078337823605025 ], "wc_questions_avg": [ 130.75, 138.85851612342688 ], "wc_review_avg": [ 479.25, 144.85229545989253 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 284.0, 278.47082432456006 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13101108196694454872&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=8p3fu56lKc", "pdf": "https://openreview.net/pdf?id=8p3fu56lKc", "email": "cs.stanford.edu;stanford.edu;fb.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Stanford University;Meta", "aff_unique_dep": "Computer Science Department;Facebook AI Research", "aff_unique_url": "https://www.stanford.edu;https://research.facebook.com", "aff_unique_abbr": "Stanford;FAIR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8p3hMUwwbg", "title": "From Stability to Chaos: Analyzing Gradient Descent Dynamics in Quadratic Regression", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We conduct a comprehensive investigation into the dynamics of gradient descent using large-order constant step-sizes in the context of quadratic regression models. Within this framework, we reveal that the dynamics can be encapsulated by a specific cubic map, naturally parameterized by the step-size. Through a fine-grained bifurcation analysis concerning the step-size parameter, we delineate five distinct training phases: (1) monotonic, (2) catapult, (3) periodic, (4) chaotic, and (5) divergent, precisely demarcating the boundaries of each phase. As illustrations, we provide examples involving phase retrieval and two-layer neural networks employing quadratic activation functions and constant outer-layers, utilizing orthogonal training data. Our simulations indicate that these five phases also manifest with generic non-orthogonal data. We also empirically investigate the generalization performance when training in the various non-monotonic (and non-divergent) phases. In particular, we observe that performing an ergodic trajectory averaging stabilizes the test error in non-monotonic (and non-divergent) phases.", "keywords": "Edges of stability;periodicity;chaos;trajectory averaging;cubic dynamics;quadratic regression", "primary_area": "optimization", "supplementary_material": "", "author": "Xuxing Chen;Krishna Balasubramanian;Promit Ghosal;Bhavya Kumar Agrawalla", "authorids": "~Xuxing_Chen1;~Krishna_Balasubramanian1;promit@brandeis.edu;~Bhavya_Kumar_Agrawalla1", "gender": "M;;;", "homepage": "https://xuxingc.github.io/;;;", "dblp": "221/0393;;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com/scholar?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xuxing_Chen1;~Krishna_Balasubramanian1;promit@brandeis.edu;~Bhavya_Kumar_Agrawalla1", "aff": "University of California, Davis;;;Massachusetts Institute of Technology", "aff_domain": "ucdavis.edu;;;mit.edu", "position": "PhD student;;;Undergrad student", "bibtex": "@misc{\nchen2024from,\ntitle={From Stability to Chaos: Analyzing Gradient Descent Dynamics in Quadratic Regression},\nauthor={Xuxing Chen and Krishna Balasubramanian and Promit Ghosal and Bhavya Kumar Agrawalla},\nyear={2024},\nurl={https://openreview.net/forum?id=8p3hMUwwbg}\n}", "github": "", "project": "", "reviewers": "BLuU;fNyy;MkpK;qRAi;Ebrh", "site": "https://openreview.net/forum?id=8p3hMUwwbg", "pdf_size": 1807377, "rating": "3;3;5;5;6", "confidence": "5;3;3;3;3", "soundness": "3;3;3;3;3", "contribution": "1;1;2;2;2", "presentation": "2;2;3;2;3", "wc_summary": "70;99;122;88;81", "wc_strengths": "68;32;66;74;77", "wc_weaknesses": "125;227;406;1007;78", "wc_questions": "204;125;180;77;44", "wc_review": "467;483;774;1246;280", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.4, 1.2 ], "confidence_avg": [ 3.4, 0.8000000000000002 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 1.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 92.0, 17.72004514666935 ], "wc_strengths_avg": [ 63.4, 16.19382598399773 ], "wc_weaknesses_avg": [ 368.6, 338.46512375723444 ], "wc_questions_avg": [ 126.0, 60.20963378065009 ], "wc_review_avg": [ 650.0, 337.3040171714532 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5833333333333331, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12175652516156880684&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Davis;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucdavis.edu;https://web.mit.edu", "aff_unique_abbr": "UC Davis;MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Davis;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "8pYNdmwGAO", "title": "EvolMPNN: Predicting Mutational Effect on Homologous Proteins by Evolution Encoding", "track": "main", "status": "Reject", "tldr": "", "abstract": "Predicting protein properties is paramount for biological and medical advancements. Current protein engineering mutates on a typical protein, called the wild-type, to construct a family of homologous proteins and study their properties. Yet, existing methods easily neglect subtle mutations, failing to capture the effect on the protein properties. To this end, we propose EvolMPNN, Evolution-aware Message Passing Neural Network, to learn evolution-aware protein embeddings. EvolMPNN samples sets of anchor proteins, computes evolutionary information by means of residues and employs a differentiable evolution-aware aggregation scheme over these sampled anchors. This way EvolMPNN can capture the mutation effect on proteins with respect to the anchor proteins. Afterwards, the aggregated evolution-aware embeddings are integrated with sequence embeddings to generate final comprehensive protein embeddings. Our model shows up to 6.4% better than state-of-the-art methods and attains 36X inference speedup in comparison with large pre-trained models. The code and models are available at https://anonymous.4open.science/r/EvolMPNN.", "keywords": "Mutation Prediction;Protein Property Prediction;Homologous Protein Sequence Modelling;Message Passing Neural Network", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Zhiqiang Zhong;Davide Mottin", "authorids": "~Zhiqiang_Zhong1;~Davide_Mottin1", "gender": "M;M", "homepage": "https://zhiqiangzhongddu.github.io/;https://mott.in", "dblp": "253/0447;135/7623", "google_scholar": "zHzChxAAAAAJ;https://scholar.google.it/citations?user=evZ9Q9EAAAAJ", "orcid": "0000-0002-1226-5597;0000-0001-8256-2258", "linkedin": "zhiqiang-zhong-097287111/;davide-mottin-67ab7323/", "or_profile": "~Zhiqiang_Zhong1;~Davide_Mottin1", "aff": "Aarhus University;Aarhus University", "aff_domain": "au.dk;au.dk", "position": "Postdoc;Associate Professor", "bibtex": "@misc{\nzhong2024evolmpnn,\ntitle={Evol{MPNN}: Predicting Mutational Effect on Homologous Proteins by Evolution Encoding},\nauthor={Zhiqiang Zhong and Davide Mottin},\nyear={2024},\nurl={https://openreview.net/forum?id=8pYNdmwGAO}\n}", "github": "", "project": "", "reviewers": "fHbu;LeqQ;ojZm", "site": "https://openreview.net/forum?id=8pYNdmwGAO", "pdf_size": 545553, "rating": "3;3;6", "confidence": "4;4;3", "soundness": "3;3;3", "contribution": "2;2;3", "presentation": "3;2;3", "wc_summary": "93;32;80", "wc_strengths": "63;35;127", "wc_weaknesses": "630;138;132", "wc_questions": "416;91;149", "wc_review": "1202;296;488", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 68.33333333333333, 26.233989826601334 ], "wc_strengths_avg": [ 75.0, 38.505410875183074 ], "wc_weaknesses_avg": [ 300.0, 233.35809392433768 ], "wc_questions_avg": [ 218.66666666666666, 141.53052281712553 ], "wc_review_avg": [ 662.0, 389.79994869163335 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:K-MxG5d2Je4J:scholar.google.com/&scioq=EvolMPNN:+Predicting+Mutational+Effect+on+Homologous+Proteins+by+Evolution+Encoding&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Aarhus University", "aff_unique_dep": "", "aff_unique_url": "https://au.dk", "aff_unique_abbr": "AU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Denmark" }, { "id": "8phE9BVRWS", "title": "SuperFormer: Superpixel-based Transformers for Salient Object Detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Images often have local redundant information that can strain the training of deep neural networks. An effective way to reduce spatial redundancy and image complexity is to over-segment with superpixels. With a fast, linear computational complexity, Simple Linear Iterative Clustering (SLIC) generates superpixels by grouping pixels as a function of colour similarity and spatial proximity. However, it is challenging and non-trivial to train a model on over-segmented images with dynamic graph structure and low spatial inductive bias. In order to train on unstructured data, graph neural networks (GNNs) can be applied to classify each superpixel for salient object detection (SOD) by considering a set of superpixels as graphs. Although other works on graph classification or node classification were able to utilize pre-defined edge information or GNNs, naive applications on superpixel graphs do not translate trivially. Our proposed SuperFormer method introduces new feature attributes for superpixels and a dynamic positional encoding for heterogeneous spatial graphs to achieve state-of-the-art results in salient object detection for low model complexity.", "keywords": "Salient Object Detection;Superpixels;Transformers;Graph Neural Networks", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/8f993178f4ac8afbb0ef49f8626e32c95e514b9c.zip", "author": "Jinman Park;Zahra Gharaee;Paul W. Fieguth", "authorids": "~Jinman_Park1;~Zahra_Gharaee1;~Paul_W._Fieguth1", "gender": "M;F;", "homepage": ";https://zahrag.github.io/;", "dblp": "81/3875;174/4584;f/PWFieguth", "google_scholar": ";https://scholar.google.pl/citations?user=nWe8d1MAAAAJ;TObmBfYAAAAJ", "orcid": "0009-0003-0870-8185;0000-0003-0140-0025;0000-0001-7260-2260", "linkedin": "jinmanpark/;zahragh/;paul-fieguth-1071461", "or_profile": "~Jinman_Park1;~Zahra_Gharaee1;~Paul_W._Fieguth1", "aff": "University of Waterloo;University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\npark2024superformer,\ntitle={SuperFormer: Superpixel-based Transformers for Salient Object Detection},\nauthor={Jinman Park and Zahra Gharaee and Paul W. Fieguth},\nyear={2024},\nurl={https://openreview.net/forum?id=8phE9BVRWS}\n}", "github": "", "project": "", "reviewers": "tMkj;DeSV;P45W", "site": "https://openreview.net/forum?id=8phE9BVRWS", "pdf_size": 3817996, "rating": "3;3;3", "confidence": "5;4;4", "soundness": "1;2;2", "contribution": "1;2;2", "presentation": "2;3;2", "wc_summary": "40;54;45", "wc_strengths": "27;17;25", "wc_weaknesses": "79;92;84", "wc_questions": "27;80;4", "wc_review": "173;243;158", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 46.333333333333336, 5.792715732327589 ], "wc_strengths_avg": [ 23.0, 4.320493798938574 ], "wc_weaknesses_avg": [ 85.0, 5.354126134736337 ], "wc_questions_avg": [ 37.0, 31.822423959633664 ], "wc_review_avg": [ 191.33333333333334, 37.04351795148811 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:j-aW-vzfth8J:scholar.google.com/&scioq=SuperFormer:+Superpixel-based+Transformers+for+Salient+Object+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "8r2f4D0I3S", "title": "Towards Pareto-Optimality for Test-Time Adaptation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Test-Time Adaptation (TTA) has been effective for mitigating the distribution shifts of test datasets by adapting a pre-trained model. The existing TTA approaches update the model parameters online toward the gradient descent direction by averaging individual objectives in the current batch. The averaged gradient can be biased by only a few instances in the batch, leading to conflict among individual objectives when updating the model. To prevent a negative effect from the gradient conflict among test instances, a model could have been updated by the gradient that is agreeable by all objectives in the batch. Therefore, we propose a new approach to update the model parameters toward Pareto-Optimality across all individual objectives in TTA. Particularly, this paper suggests an extended version of the Pareto optimization to anticipate unexpected distribution shifts during testing time. This extension is enabled by merging the sharpness-aware minimization into the Pareto optimization. We demonstrate the effectiveness of the proposed approaches through experiments on three benchmark datasets: CIFAR10-to-CIFAR10C, CIFAR100-to-CIFAR100C, and ImageNet-to-ImageNetC.", "keywords": "Test-Time Adaptation;Pareto-Optimality;Sharpness-Aware Minimization", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "JoonHo Jang;DongHyeok Shin;Byeonghu Na;HeeSun Bae;Il-chul Moon", "authorids": "~JoonHo_Jang1;~DongHyeok_Shin1;~Byeonghu_Na1;~HeeSun_Bae1;~Il-chul_Moon1", "gender": ";;M;F;", "homepage": "https://aailab.kaist.ac.kr/xe2/members_phdstudent/16877;;https://sites.google.com/view/byeonghu-na;;", "dblp": "241/9686;;276/5100;;", "google_scholar": "oYbKry4AAAAJ;;https://scholar.google.co.kr/citations?user=mJoqpmEAAAAJ;https://scholar.google.co.kr/citations?user=D9U_ohsAAAAJ;", "orcid": ";;0000-0003-3463-2674;0000-0002-9986-0945;", "linkedin": ";;byeonghu-na-17942120b/;heesun-bae-8a4b8523a/;", "or_profile": "~JoonHo_Jang1;~DongHyeok_Shin1;~Byeonghu_Na1;~HeeSun_Bae1;~Il-chul_Moon1", "aff": "Korea Advanced Institute of Science & Technology;;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.ac.kr;;kaist.ac.kr;kaist.ac.kr;", "position": "PhD student;;PhD student;PhD student;", "bibtex": "@misc{\njang2024towards,\ntitle={Towards Pareto-Optimality for Test-Time Adaptation},\nauthor={JoonHo Jang and DongHyeok Shin and Byeonghu Na and HeeSun Bae and Il-chul Moon},\nyear={2024},\nurl={https://openreview.net/forum?id=8r2f4D0I3S}\n}", "github": "", "project": "", "reviewers": "HqZ3;MZwq;7JZo;gRux", "site": "https://openreview.net/forum?id=8r2f4D0I3S", "pdf_size": 6355806, "rating": "3;3;5;6", "confidence": "5;4;4;3", "soundness": "3;2;2;3", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "84;45;114;96", "wc_strengths": "33;27;55;127", "wc_weaknesses": "149;171;614;102", "wc_questions": "38;2;145;88", "wc_review": "304;245;928;413", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.75, 25.31180554602931 ], "wc_strengths_avg": [ 60.5, 39.78379066906521 ], "wc_weaknesses_avg": [ 259.0, 206.4691260213013 ], "wc_questions_avg": [ 68.25, 53.816238255753255 ], "wc_review_avg": [ 472.5, 269.80038917688756 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:g3nWTDi2Y7UJ:scholar.google.com/&scioq=Towards+Pareto-Optimality+for+Test-Time+Adaptation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "8rN439jpkT", "title": "Imitation Learning Using Generalized Sliced Wasserstein Distances", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Imitation learning methods allow to train reinforcement learning policies by way\nof minimizing a divergence measure between the state occupancies of the expert\nagent and the novice policy. Alternatively, a true metric in the space of probability\nmeasures can be used by invoking the optimal transport formalism. In this work,\nwe propose a novel imitation learning method based on the generalized form of\nthe sliced Wasserstein distance, which presents a number of computational and\nsample complexity benefits compared to existing imitation learning approaches.\nWe derive a per-state reward function based on the approximate differential of the\n$\\mathcal{SW}2$ distance which allows the use of standard forward RL methods for policy\noptimization. We demonstrate that the proposed method exhibits state-of-the-art\nperformance compared to established imitation learning frameworks on a number\nof benchmark tasks from the MuJoCo robotic locomotion suite.", "keywords": "Imitation Learning;Sliced Wasserstein;Optimal Transport", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Ivan Ovinnikov;Joachim M. Buhmann", "authorids": "~Ivan_Ovinnikov1;~Joachim_M._Buhmann1", "gender": "M;M", "homepage": ";https://ise.ethz.ch", "dblp": ";b/JMBuhmann", "google_scholar": "https://scholar.google.ch/citations?user=m8UKFekAAAAJ;https://scholar.google.ch/citations?user=zQWbCzYAAAAJ", "orcid": ";", "linkedin": "ivan-ovinnikov-0b227593/;", "or_profile": "~Ivan_Ovinnikov1;~Joachim_M._Buhmann1", "aff": "Swiss Federal Institute of Technology;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": "ethz.ch;inf.ethz.ch", "position": "PhD student;Professor", "bibtex": "@misc{\novinnikov2024imitation,\ntitle={Imitation Learning Using Generalized Sliced Wasserstein Distances},\nauthor={Ivan Ovinnikov and Joachim M. Buhmann},\nyear={2024},\nurl={https://openreview.net/forum?id=8rN439jpkT}\n}", "github": "", "project": "", "reviewers": "BgRr;DZ6u;jjKQ;evdi;DVGp", "site": "https://openreview.net/forum?id=8rN439jpkT", "pdf_size": 5309276, "rating": "3;3;5;5;6", "confidence": "3;3;4;3;4", "soundness": "3;3;2;3;4", "contribution": "2;2;2;3;3", "presentation": "3;2;2;2;4", "wc_summary": "32;65;66;71;81", "wc_strengths": "25;29;92;39;43", "wc_weaknesses": "145;71;311;168;139", "wc_questions": "53;76;40;265;5", "wc_review": "255;241;509;543;268", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.4, 1.2 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 63.0, 16.504544828622205 ], "wc_strengths_avg": [ 45.6, 24.096472770926457 ], "wc_weaknesses_avg": [ 166.8, 79.01999746899514 ], "wc_questions_avg": [ 87.8, 91.53228938467561 ], "wc_review_avg": [ 363.2, 133.63293007339172 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.748455199183749, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Th_iEzzvVqgJ:scholar.google.com/&scioq=Imitation+Learning+Using+Generalized+Sliced+Wasserstein+Distances&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "8rhHI6C8iC", "title": "All for One and One for All: A Collaborative FL Framework for Generic Federated Learning with Personalized Plug-ins", "track": "main", "status": "Reject", "tldr": "", "abstract": "Personalized federated learning (PFL) mitigates the notorious data heterogeneity issue in generic federated learning (GFL) by assuming that client models only need to fit on local datasets individually. However, real-world FL clients may meet with test data from other distributions. To endow clients with the ability to handle other datasets, we theoretically formulate a new problem named as Selective FL (SFL), bridging the GFL and PFL together. To practically solve SFL, we design a general effective framework named as Hot-Pluggable Federated Learning (HPFL). In HPFL, clients firstly learn a global shared feature extractor. Next, with the frozen feature extractor, multiple personalized plug-in modules are individually learned based on the local data and saved in a modular store on the server. In inference stage, an accurate selection algorithm allows clients to choose and download suitable plug-in modules from the modular store to achieve the high generalization performance on target data distribution. We conduct comprehensive experiments and ablation studies following common FL settings including four datasets and three neural networks, showing that HPFL significantly outperforms advanced FL algorithms. Additionally, we empirically show the remarkable potential of HPFL to resolve other practical FL problems like continual federated learning and discuss its possible applications in one-shot FL, anarchic FL and an FL plug-in market.", "keywords": "Federated Learning;Deep Learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Lei Shen;Zhenheng Tang;Lijun Wu;Yonggang Zhang;Xiaowen Chu;Tao Qin;Bo Han", "authorids": "~Lei_Shen4;~Zhenheng_Tang2;~Lijun_Wu1;~Yonggang_Zhang1;~Xiaowen_Chu2;~Tao_Qin1;~Bo_Han1", "gender": ";M;M;M;M;M;M", "homepage": "https://shenlei515.github.io/;https://apeterswu.github.io/;https://yonggangzhangben.github.io/index.html;https://facultyprofiles.hkust-gz.edu.cn/faculty-personal-page/CHU-Xiaowen/xwchu;https://www.microsoft.com/en-us/research/people/taoqin/;https://bhanml.github.io/;", "dblp": ";68/1284-3;27/6859-3;24/2536;14/6841;241/0472-3;234/7546", "google_scholar": ";https://scholar.google.com/citations?hl=en;XSbEr98AAAAJ;https://scholar.google.com.hk/citations?user=v4rX24EAAAAJ;Bl4SRU0AAAAJ;nTNjqHwAAAAJ;FlYcrEcAAAAJ", "orcid": ";0000-0002-3530-590X;0000-0002-4080-7592;0000-0001-9745-4372;;;0000-0001-8769-9974", "linkedin": ";lijun-wu-59340478/;;;;;", "or_profile": "~Lei_Shen4;~Lijun_Wu1;~Yonggang_Zhang1;~Xiaowen_Chu2;~Tao_Qin1;~bo_han2;~Zhenheng_TANG1", "aff": "Huazhong University of Science and Technology;Microsoft Research;Hong Kong Baptist University;Hong Kong University of Science and Technology (Guangzhou);;MBZUAI;Hong Kong Baptist University", "aff_domain": "hust.edu.cn;microsoft.com;hkbu.edu.hk;ust.hk;;mbzuai.ac.ae;hkbu.edu.hk", "position": "Undergrad student;Researcher;Postdoc;Full Professor;;Researcher;PhD student", "bibtex": "@misc{\nshen2024all,\ntitle={All for One and One for All: A Collaborative {FL} Framework for Generic Federated Learning with Personalized Plug-ins},\nauthor={Lei Shen and Zhenheng Tang and Lijun Wu and Yonggang Zhang and Xiaowen Chu and Tao Qin and Bo Han},\nyear={2024},\nurl={https://openreview.net/forum?id=8rhHI6C8iC}\n}", "github": "", "project": "", "reviewers": "nQmW;3cLo;TW8h;MHGx;ENEM", "site": "https://openreview.net/forum?id=8rhHI6C8iC", "pdf_size": 2695820, "rating": "3;5;5;5;6", "confidence": "4;4;4;5;4", "soundness": "3;2;2;2;3", "contribution": "2;2;2;1;3", "presentation": "1;2;3;3;2", "wc_summary": "81;81;79;64;146", "wc_strengths": "83;27;78;27;69", "wc_weaknesses": "162;470;149;20;160", "wc_questions": "90;6;78;2;36", "wc_review": "416;584;384;113;411", "wc_reply_reviewers": "182;301;130;0;0", "wc_reply_authors": "3029;2526;2653;914;1793", "reply_reviewers": "1;2;1;0;0", "reply_authors": "6;5;5;5;5", "rating_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 90.2, 28.6174771774173 ], "wc_strengths_avg": [ 56.8, 24.741867350707384 ], "wc_weaknesses_avg": [ 192.2, 148.755369650981 ], "wc_questions_avg": [ 42.4, 36.14194239384486 ], "wc_review_avg": [ 381.6, 151.76639944335506 ], "wc_reply_reviewers_avg": [ 122.6, 114.43006597918223 ], "wc_reply_authors_avg": [ 2183.0, 750.5632551624146 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 5.2, 0.39999999999999997 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.10206207261596574, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:e7EgC2in8KoJ:scholar.google.com/&scioq=All+for+One+and+One+for+All:+A+Collaborative+FL+Framework+for+Generic+Federated+Learning+with+Personalized+Plug-ins&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;2", "aff_unique_norm": "Huazhong University of Science and Technology;Microsoft;Hong Kong Baptist University;Hong Kong University of Science and Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";Microsoft Research;;;", "aff_unique_url": "http://www.hust.edu.cn;https://www.microsoft.com/en-us/research;https://www.hkbu.edu.hk;https://www.ust.hk;https://www.mbzuai.ac.ae", "aff_unique_abbr": "HUST;MSR;HKBU;HKUST;MBZUAI", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;2;0", "aff_country_unique": "China;United States;United Arab Emirates" }, { "title": "Fine-Tuning Enhances Existing Mechanisms: A Case Study on Entity Tracking", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19313", "id": "8sKcAWOf2D", "author_site": "Nikhil Prakash, Tamar Shaham, Tal Haklay, Yonatan Belinkov, David Bau", "tldr": "", "abstract": "Fine-tuning on generalized tasks such as instruction following, code generation, and mathematics has been shown to enhance language models' performance on a range of tasks. Nevertheless, explanations of how such fine-tuning influences the internal computations in these models remain elusive. We study how fine-tuning affects the internal mechanisms implemented in language models. As a case study, we explore the property of entity tracking, a crucial facet of language comprehension, where models fine-tuned on mathematics have substantial performance gains. We identify a mechanism that enables entity tracking and show that (i) both the original model and its fine-tuned version implement entity tracking with the same circuit. In fact, the entity tracking circuit of the fine-tuned version performs better than the full original model. (ii) The circuits of all the models implement roughly the same functionality, that is entity tracking is performed by tracking the position of the correct entity in both the original model and its fine-tuned version. (iii) Performance boost in the fine-tuned model is primarily attributed to its improved ability to handle positional information. To uncover these findings, we employ two methods: DCM, which automatically detects model components responsible for specific semantics, and CMAP, a new approach for patching activations across models to reveal improved mechanisms. Our findings suggest that fine-tuning enhances, rather than fundamentally alters, the mechanistic operation of the model.", "keywords": "Mechanistic Interpretability;Fine-Tuning;Entity Tracking;Mechanisms", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Nikhil Prakash;Tamar Rott Shaham;Tal Haklay;Yonatan Belinkov;David Bau", "authorids": "~Nikhil_Prakash1;~Tamar_Rott_Shaham1;~Tal_Haklay1;~Yonatan_Belinkov1;~David_Bau1", "gender": "M;F;F;M;M", "homepage": "https://nix07.github.io/;https://tamarott.github.io/;;https://www.belinkov.com;https://baulab.info/", "dblp": "227/0705;185/7904;;136/8705;47/3614", "google_scholar": "kUfq-fEAAAAJ;https://scholar.google.co.il/citations?user=YRJ-ePMAAAAJ;;https://scholar.google.com/citations?authorid=K-6ujU4AAAAJ;CYI6cKgAAAAJ", "orcid": ";;;;0000-0003-1744-6765", "linkedin": "nikhil07prakash/;;tal-haklay-501032192/;;david-bau-4b8130/", "or_profile": "~Nikhil_Prakash1;~Tamar_Rott_Shaham1;~Tal_Haklay1;~Yonatan_Belinkov1;~David_Bau1", "aff": "Northeastern University;Massachusetts Institute of Technology;Technion - Israel Institute of Technology, Technion;Technion, Technion;Northeastern University", "aff_domain": "northeastern.edu;mit.edu;technion.ac.il;technion.ac.il;northeastern.edu", "position": "PhD student;Postdoc;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nprakash2024finetuning,\ntitle={Fine-Tuning Enhances Existing Mechanisms: A Case Study on Entity Tracking},\nauthor={Nikhil Prakash and Tamar Rott Shaham and Tal Haklay and Yonatan Belinkov and David Bau},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8sKcAWOf2D}\n}", "github": "", "project": "", "reviewers": "AnWX;fhvV;M52Y", "pdf_size": 2936233, "rating": "5;6;6", "confidence": "4;3;4", "soundness": "3;3;3", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "102;188;166", "wc_strengths": "31;96;147", "wc_weaknesses": "58;240;134", "wc_questions": "29;53;2", "wc_review": "220;577;449", "wc_reply_reviewers": "14;0;21", "wc_reply_authors": "768;1574;763", "reply_reviewers": "1;0;1", "reply_authors": "1;3;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 152.0, 36.478304054145205 ], "wc_strengths_avg": [ 91.33333333333333, 47.471628954097994 ], "wc_weaknesses_avg": [ 144.0, 74.63689882803725 ], "wc_questions_avg": [ 28.0, 20.83266665599966 ], "wc_review_avg": [ 415.3333333333333, 147.67607193524015 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 8.73053390247253 ], "wc_reply_authors_avg": [ 1035.0, 381.13602121377437 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13633037998843459062&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=8sKcAWOf2D", "pdf": "https://openreview.net/pdf?id=8sKcAWOf2D", "email": "northeastern.edu;mit.edu;technion.ac.il;technion.ac.il;northeastern.edu", "author_num": 5, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "Northeastern University;Massachusetts Institute of Technology;Technion - Israel Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.northeastern.edu;https://web.mit.edu;https://www.technion.ac.il", "aff_unique_abbr": "NEU;MIT;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "United States;Israel" }, { "id": "8tGu1pNUnN", "title": "CodeComplex: A Time-complexity Dataset for Multi-language Source Codes", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deciding the computational complexity of algorithms is a really challenging problem, even for human algorithm experts. Theoretically, the problem of deciding the computational complexity of a given program is undecidable due to the famous Halting problem. So, we focus on cases where there are inputs and outputs, and of which we can know if the code is right or wrong. \nWe propose our own dataset CodeComplex, which consists of 4,900 Java codes and 4,900 Python codes submitted to programming competitions by human programmers and their complexity labels annotated by a group of algorithm experts. As far as we are aware, the CodeComplex dataset is by far the largest code dataset for the complexity prediction problem. Then, we present experimental results from several baseline models using the SOTA code understanding neural models such as CodeBERT, GraphCodeBERT, PLBART, CodeT5, CodeT5+ and UniXcoder. We also give an analysis on the difficulties of code complexity and why the models are good/bad on predicting the time complexity.\nThe CodeComplex dataset is available at https://anonymous.4open.science/r/CodeComplex-Data\nand material for reproduction is available at https://anonymous.4open.science/r/CodeComplex-Models.", "keywords": "Code complexity;Dataset;Neural network", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "SeungYeop Baik;Mingi Jeon;Joonghyuk Hahn;Jungin Kim;Yo-Sub Han;Sang-Ki Ko", "authorids": "~SeungYeop_Baik1;~Mingi_Jeon1;~Joonghyuk_Hahn1;~Jungin_Kim1;~Yo-Sub_Han1;~Sang-Ki_Ko1", "gender": "M;M;M;;;M", "homepage": "https://sybaik1.github.io/;https://ckawoalt.github.io/;https://peer0.github.io;https://github.com/inistory;http://toc.yonsei.ac.kr/~emmous/;https://sites.google.com/site/sangkikotoc/home", "dblp": ";;304/4027;70/4018.html;h/YoSubHan;71/9491.html", "google_scholar": ";;08ccS2oAAAAJ;jLI2V78AAAAJ;yDOh26sAAAAJ;https://scholar.google.com/scholar?hl=en", "orcid": ";;0009-0000-5890-4916;;;", "linkedin": ";;joonghyuk-hahn;;;", "or_profile": "~SeungYeop_Baik1;~Mingi_Jeon1;~Joonghyuk_Hahn1;~Jungin_Kim1;~Yo-Sub_Han1;~Sang-Ki_Ko1", "aff": "Yonsei University;;Yonsei University;Yonsei University;Yonsei University;University of Seoul", "aff_domain": "yonsei.ac.kr;;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;uos.ac.kr", "position": "PhD student;;PhD student;MS student;Full Professor;Assistant Professor", "bibtex": "@misc{\nbaik2024codecomplex,\ntitle={CodeComplex: A Time-complexity Dataset for Multi-language Source Codes},\nauthor={SeungYeop Baik and Mingi Jeon and Joonghyuk Hahn and Jungin Kim and Yo-Sub Han and Sang-Ki Ko},\nyear={2024},\nurl={https://openreview.net/forum?id=8tGu1pNUnN}\n}", "github": "", "project": "", "reviewers": "UsDM;Vi2o;oyNS;aZXg", "site": "https://openreview.net/forum?id=8tGu1pNUnN", "pdf_size": 583447, "rating": "3;3;5;5", "confidence": "4;4;3;4", "soundness": "2;2;2;3", "contribution": "2;2;2;3", "presentation": "3;2;2;2", "wc_summary": "58;47;67;89", "wc_strengths": "78;16;54;112", "wc_weaknesses": "305;93;190;290", "wc_questions": "98;63;127;254", "wc_review": "539;219;438;745", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.25, 15.433324334050653 ], "wc_strengths_avg": [ 65.0, 35.0 ], "wc_weaknesses_avg": [ 219.5, 85.37124808739767 ], "wc_questions_avg": [ 135.5, 72.07114540507872 ], "wc_review_avg": [ 485.25, 189.39426469669033 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:22_OuezmMioJ:scholar.google.com/&scioq=CodeComplex:+A+Time-complexity+Dataset+for+Multi-language+Source+Codes&hl=en&as_sdt=0,24", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Yonsei University;University of Seoul", "aff_unique_dep": ";", "aff_unique_url": "https://www.yonsei.ac.kr;http://www.useoul.edu", "aff_unique_abbr": "Yonsei;UOS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "8tWOUmBHRv", "title": "Offline Tracking with Object Permanence", "track": "main", "status": "Reject", "tldr": "", "abstract": "To reduce the expensive labor cost for manual labeling autonomous driving datasets, an alternative is to automatically label the datasets using an offline perception system. However, objects might be temporally occluded. Such occlusion scenarios in the datasets are common yet underexplored in offline autolabeling. In this work, we propose an offline tracking model that focuses on occluded object tracks. It leverages the concept of object permanence which means objects continue to exist even if they are not observed anymore. The model contains three parts: a standard online tracker, a re-identification (Re-ID) module that associates tracklets before and after occlusion, and a track completion module that completes the fragmented tracks. The Re-ID module and the track completion module use the vectorized high-definition map (HD map) as one of the inputs to refine the tracking results with occlusion. The model can effectively recover the occluded object trajectories. It achieves state-of-the-art performance in 3D multi-object tracking (MOT) by improving over the original online tracking result by 45% IDS and 2% AMOTA on the vehicle tracks.", "keywords": "autonomous driving;offline tracking;occlusion", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "", "author": "Xianzhong Liu;Holger Caesar", "authorids": "~Xianzhong_Liu1;~Holger_Caesar2", "gender": "M;M", "homepage": ";http://it-caesar.com", "dblp": ";125/7460", "google_scholar": ";373LKEYAAAAJ", "orcid": ";", "linkedin": "xianzhong-liu-992375193/;holger-caesar-18600638/?originalSubdomain=sg", "or_profile": "~Xianzhong_Liu1;~Holger_Caesar2", "aff": ";Delft University of Technology", "aff_domain": ";tudelft.nl", "position": ";Assistant Professor", "bibtex": "@misc{\nliu2024offline,\ntitle={Offline Tracking with Object Permanence},\nauthor={Xianzhong Liu and Holger Caesar},\nyear={2024},\nurl={https://openreview.net/forum?id=8tWOUmBHRv}\n}", "github": "", "project": "", "reviewers": "SgKx;tT1W;GvzN;S5GM", "site": "https://openreview.net/forum?id=8tWOUmBHRv", "pdf_size": 2259320, "rating": "3;3;5;5", "confidence": "4;3;4;3", "soundness": "2;3;2;2", "contribution": "1;2;2;1", "presentation": "2;3;3;2", "wc_summary": "64;73;64;73", "wc_strengths": "11;37;56;30", "wc_weaknesses": "22;145;103;124", "wc_questions": "117;54;27;124", "wc_review": "214;309;250;351", "wc_reply_reviewers": "0;44;0;104", "wc_reply_authors": "379;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 68.5, 4.5 ], "wc_strengths_avg": [ 33.5, 16.101242188104617 ], "wc_weaknesses_avg": [ 98.5, 46.5966737010272 ], "wc_questions_avg": [ 80.5, 41.19769411022904 ], "wc_review_avg": [ 281.0, 52.758885507561665 ], "wc_reply_reviewers_avg": [ 37.0, 42.649736224272246 ], "wc_reply_authors_avg": [ 94.75, 164.11181401715112 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3862650890182915223&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_country_unique_index": "0", "aff_country_unique": "Netherlands" }, { "id": "8uYJottqTy", "title": "Solving Continual Offline Reinforcement Learning with Decision Transformer", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Continuous offline reinforcement learning (CORL) combines continuous and offline reinforcement learning, enabling agents to learn multiple tasks from static datasets without forgetting prior tasks. However, CORL faces challenges in balancing stability and plasticity. Existing methods, employing Actor-Critic structures and experience replay (ER), suffer from distribution shifts, low efficiency, and weak knowledge-sharing. To address these issues, we first compare AC-based offline algorithms with Decision Transformer (DT) within the CORL framework. DT offers advantages in learning efficiency, distribution shift mitigation, and zero-shot generalization but exacerbates the forgetting problem during supervised parameter updates. We introduce multi-head DT (MH-DT) and low-rank adaptation DT (LoRA-DT) to mitigate DT's forgetting problem. MH-DT stores task-specific knowledge using multiple heads, facilitating knowledge sharing with common components. It employs distillation and selective rehearsal to enhance current task learning when a replay buffer is available. In buffer-unavailable scenarios, LoRA-DT merges less influential weights and fine-tunes DT's decisive MLP layer to adapt to the current task. Extensive experiments on MoJuCo and Meta-World benchmarks demonstrate that our methods outperform SOTA CORL baselines and showcase enhanced learning capabilities and superior memory efficiency.", "keywords": "offline RL;continual learning;decision transformer", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Kaixin Huang;Li Shen;Chen Zhao;Chun Yuan;Dacheng Tao", "authorids": "~Kaixin_Huang1;~Li_Shen1;~Chen_Zhao8;~Chun_Yuan1;~Dacheng_Tao1", "gender": "M;M;;M;", "homepage": ";https://sites.google.com/site/mathshenli/home;;https://www.sigs.tsinghua.edu.cn/fg3/105064.jhtml;", "dblp": ";91/3680-8;;;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;yVhgENIAAAAJ;;https://scholar.google.com.hk/citations?user=fYdxi2sAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Kaixin_Huang1;~Li_Shen1;~Chen_Zhao8;~Chun_Yuan1;~Dacheng_Tao1", "aff": "Electronic Engineering, Tsinghua University, Tsinghua University;JD Explore Academy;;Tsinghua University;", "aff_domain": "mails.tsinghua.edu.cn;jd.com;;tsinghua.edu.cn;", "position": "MS student;Researcher;;Full Professor;", "bibtex": "@misc{\nhuang2024solving,\ntitle={Solving Continual Offline Reinforcement Learning with Decision Transformer},\nauthor={Kaixin Huang and Li Shen and Chen Zhao and Chun Yuan and Dacheng Tao},\nyear={2024},\nurl={https://openreview.net/forum?id=8uYJottqTy}\n}", "github": "", "project": "", "reviewers": "Md6w;ZQbc;b7vq;1HUt", "site": "https://openreview.net/forum?id=8uYJottqTy", "pdf_size": 18435930, "rating": "3;3;5;5", "confidence": "4;4;2;3", "soundness": "2;2;3;3", "contribution": "2;2;2;2", "presentation": "1;2;3;2", "wc_summary": "50;38;41;34", "wc_strengths": "28;9;35;36", "wc_weaknesses": "278;176;90;38", "wc_questions": "98;42;2;22", "wc_review": "454;265;168;130", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 40.75, 5.889609494694874 ], "wc_strengths_avg": [ 27.0, 10.8397416943394 ], "wc_weaknesses_avg": [ 145.5, 90.99862636325891 ], "wc_questions_avg": [ 41.0, 35.81898937714463 ], "wc_review_avg": [ 254.25, 125.39213492081551 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10700540742214198622&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tsinghua University;JD", "aff_unique_dep": "Electronic Engineering;JD Explore Academy", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "title": "Learning Nash Equilibria in Rank-1 Games", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19312", "id": "8utTlmhw8v", "author_site": "Nikolas Patris, Ioannis Panageas", "tldr": "", "abstract": "Learning Nash equilibria (NE) in games has garnered significant attention, particularly in the context of training Generative Adversarial Networks (GANs) and multi-agent Reinforcement Learning. The current state-of-the-art in efficiently learning games focuses on landscapes that meet the (weak) Minty property or games characterized by a unique function, often referred to as potential games. A significant challenge in this domain is that computing Nash equilibria is a computationally intractable task [Daskalakis et al. 2009]. \n\nIn this paper we focus on bimatrix games (A,B) called rank-1. These are games in which the sum of the payoff matrices A+B is a rank 1 matrix; note that standard zero-sum games are rank 0. We show that optimistic gradient descent/ascent converges to an \\epsilon-approximate NE after 1/\\epsilon^2 log(1/\\epsilon) iterates in rank-1 games. We achieve this by leveraging structural results about the NE landscape of rank-1 games Adsul et al. 2021. Notably, our approach bypasses the fact that these games do not satisfy the MVI property.", "keywords": "learning in games;rank games;Nash equilibria;Minty;optimistic gradient", "primary_area": "learning theory", "supplementary_material": "", "author": "Nikolas Patris;Ioannis Panageas", "authorids": "~Nikolas_Patris1;~Ioannis_Panageas1", "gender": "M;M", "homepage": "https://npatris.github.io/;https://panageas.github.io", "dblp": "297/4669;139/3829", "google_scholar": "https://scholar.google.com/citations?hl=en;5NiFWuwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Nikolas_Patris1;~Ioannis_Panageas1", "aff": "University of California, Irvine;Donald Bren School of Information and Computer Sciences, University of California, Irvine", "aff_domain": "uci.edu;ics.uci.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\npatris2024learning,\ntitle={Learning Nash Equilibria in Rank-1 Games},\nauthor={Nikolas Patris and Ioannis Panageas},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8utTlmhw8v}\n}", "github": "", "project": "", "reviewers": "LH17;cjaZ;r4eu;TKWX", "pdf_size": 7806530, "rating": "5;5;6;8", "confidence": "2;4;4;3", "soundness": "3;3;3;3", "contribution": "3;1;2;3", "presentation": "2;2;4;3", "wc_summary": "76;179;120;95", "wc_strengths": "28;16;30;100", "wc_weaknesses": "42;81;183;29", "wc_questions": "380;45;28;45", "wc_review": "526;321;361;269", "wc_reply_reviewers": "0;0;84;17", "wc_reply_authors": "1608;803;844;455", "reply_reviewers": "0;0;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 117.5, 38.78466191679386 ], "wc_strengths_avg": [ 43.5, 33.05676935213119 ], "wc_weaknesses_avg": [ 83.75, 60.41264354421184 ], "wc_questions_avg": [ 124.5, 147.6761659849009 ], "wc_review_avg": [ 369.25, 96.19868762098577 ], "wc_reply_reviewers_avg": [ 25.25, 34.62206666275137 ], "wc_reply_authors_avg": [ 927.5, 420.9539761066523 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12925125726078962169&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=8utTlmhw8v", "pdf": "https://openreview.net/pdf?id=8utTlmhw8v", "email": "uci.edu;ics.uci.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "8vGXHjuCiq", "title": "Connectivity-based Token Condensation for Efficient Vision Transformer", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The high computational cost of vision transformers blocks their implementation on resource-limited devices such as mobile phones. Reducing the number of tokens can significantly accelerate the inference process and save computational resources. Most of the existing token pruning methods focus on evaluating token's importance and discard the unimportant tokens directly, which incur significant information loss. A few methods suggest ways focusing on merging while directly partition tokens into two parts by random or odd/even partition, which do not consider carefully how to select tokens. In this paper, we propose a new token condensation method based on the connectivity between tokens. Different from the previous methods, we gradually condense the large number of tokens by selection and fusion. The most representative tokens are selected and the others are separately fused into them. Extensive experiments are conducted on benchmark datasets. Compared with the existing methods, our method achieves higher accuracy with lower computational cost. For example, our method can reduce 50\\% FLOPs of DeiT-S without accuracy degradation on ImageNet dataset.", "keywords": "Vision Transformer;Token Condensation;Connectivity", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Tianxing Na;Yehui Tang;Chao Zhang;Chao Xu;Yunhe Wang;Kai Han", "authorids": "~Tianxing_Na1;~Yehui_Tang1;~Chao_Zhang10;~Chao_Xu1;~Yunhe_Wang1;~Kai_Han2", "gender": "M;M;M;M;M;M", "homepage": ";;http://www.cis.pku.edu.cn/faculty/vision/zhangchao/zhangchao.htm;http://www.cis.pku.edu.cn/faculty/vision/xuchao/xuchao01.htm;https://www.wangyunhe.site/;https://iamhankai.github.io", "dblp": ";244/9659;94/3019-1;;63/8217-1;51/4757-2", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;TkSZQ6gAAAAJ;NeCCx-kAAAAJ;https://scholar.google.co.uk/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ;vThoBVcAAAAJ", "orcid": ";;;;0000-0002-0142-509X;0000-0002-9761-2702", "linkedin": ";;;;;", "or_profile": "~Tianxing_Na1;~Yehui_Tang1;~Chao_Zhang10;~Chao_Xu1;~Yunhe_Wang1;~Kai_Han2", "aff": "Peking University;Huawei Technologies Ltd.;Peking University;Peking University;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab", "aff_domain": "pku.edu.cn;huawei.com;pku.edu.cn;pku.edu;huawei.com;huawei.com", "position": "MS student;Researcher;Full Professor;Full Professor;Principal Researcher;Principal Researcher", "bibtex": "@misc{\nna2024connectivitybased,\ntitle={Connectivity-based Token Condensation for Efficient Vision Transformer},\nauthor={Tianxing Na and Yehui Tang and Chao Zhang and Chao Xu and Yunhe Wang and Kai Han},\nyear={2024},\nurl={https://openreview.net/forum?id=8vGXHjuCiq}\n}", "github": "", "project": "", "reviewers": "V3W2;DqFk;apVU;1bHr", "site": "https://openreview.net/forum?id=8vGXHjuCiq", "pdf_size": 7198473, "rating": "3;3;5;6", "confidence": "4;4;4;4", "soundness": "2;2;2;3", "contribution": "2;2;2;2", "presentation": "3;2;1;2", "wc_summary": "247;70;98;51", "wc_strengths": "61;16;29;50", "wc_weaknesses": "391;126;136;307", "wc_questions": "190;45;35;46", "wc_review": "889;257;298;454", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.5, 77.17674520216566 ], "wc_strengths_avg": [ 39.0, 17.564168070250297 ], "wc_weaknesses_avg": [ 240.0, 113.02875740270703 ], "wc_questions_avg": [ 79.0, 64.23005527009921 ], "wc_review_avg": [ 474.5, 250.34426296602047 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gNRJtyhzI2QJ:scholar.google.com/&scioq=Connectivity-based+Token+Condensation+for+Efficient+Vision+Transformer&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;1;1", "aff_unique_norm": "Peking University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Peking U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "What does automatic differentiation compute for neural networks?", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19311", "id": "8vKknbgXxf", "author_site": "Sejun Park, Sanghyuk Chun, Wonyeol Lee", "tldr": "", "abstract": "Forward- or reverse-mode automatic differentiation (AD) is a popular algorithm for computing the derivative of a function expressed by a program. AD always outputs the correct derivative if a program does not use any non-differentiable functions and control flows; however, it may return an arbitrary value otherwise. In this work, we investigate what AD computes for neural networks that may contain non-differentiable functions such as ReLU and maxpools. We first prove that AD always returns a generalized derivative called a Clarke subderivative for networks with pointwise activation functions, if the minibatch size is one and all non-differentiable neurons have distinct bias parameters. We show that the same conclusion does not hold otherwise, but does hold under some mild sufficient conditions. We also prove similar results for more general networks that can use maxpools and bias parameters shared across different neurons. We empirically check our sufficient conditions over popular network architectures and observe that AD almost always computes a Clarke subderivative in practical learning setups.", "keywords": "automatic differentiation;correctness;neural networks;clarke subdifferential", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Sejun Park;Sanghyuk Chun;Wonyeol Lee", "authorids": "~Sejun_Park1;~Sanghyuk_Chun1;~Wonyeol_Lee1", "gender": ";M;M", "homepage": ";https://sanghyukchun.github.io/home/;https://wonyeol.github.io", "dblp": "155/9882;213/1095.html;124/7158", "google_scholar": ";https://scholar.google.co.kr/citations?user=4_uj0xcAAAAJ;g3TYhjcAAAAJ", "orcid": ";0000-0002-4533-2610;", "linkedin": ";https://kr.linkedin.com/in/sanghyukchun/en;wonyeol/", "or_profile": "~Sejun_Park1;~Sanghyuk_Chun1;~Wonyeol_Lee1", "aff": "Korea University;NAVER AI Lab;Carnegie Mellon University", "aff_domain": "korea.ac.kr;navercorp.com;cmu.edu", "position": "Assistant Professor;Lead research scientist;Postdoc", "bibtex": "@inproceedings{\npark2024what,\ntitle={What does automatic differentiation compute for neural networks?},\nauthor={Sejun Park and Sanghyuk Chun and Wonyeol Lee},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8vKknbgXxf}\n}", "github": "", "project": "", "reviewers": "84GA;FQtB;XRNZ;uFDP;DHhS", "pdf_size": 509858, "rating": "6;6;8;8;8", "confidence": "3;2;1;3;3", "soundness": "3;3;2;4;4", "contribution": "3;2;2;3;3", "presentation": "2;3;3;4;3", "wc_summary": "52;25;126;544;143", "wc_strengths": "20;19;32;55;115", "wc_weaknesses": "94;36;169;53;88", "wc_questions": "269;32;22;27;43", "wc_review": "435;112;349;679;389", "wc_reply_reviewers": "345;0;0;66;16", "wc_reply_authors": "2253;666;164;1155;393", "reply_reviewers": "2;0;0;1;1", "reply_authors": "7;1;1;3;1", "rating_avg": [ 7.2, 0.9797958971132712 ], "confidence_avg": [ 2.4, 0.8 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 178.0, 188.23920951810226 ], "wc_strengths_avg": [ 48.2, 35.82959670440068 ], "wc_weaknesses_avg": [ 88.0, 45.88245852174881 ], "wc_questions_avg": [ 78.6, 95.45386320102502 ], "wc_review_avg": [ 392.8, 181.36857500680762 ], "wc_reply_reviewers_avg": [ 85.4, 132.03878218159997 ], "wc_reply_authors_avg": [ 926.2, 741.0401878440872 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.6, 2.3323807579381204 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.10206207261596574, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FflVEilSkoAJ:scholar.google.com/&scioq=What+does+automatic+differentiation+compute+for+neural+networks%3F&hl=en&as_sdt=0,23", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=8vKknbgXxf", "pdf": "https://openreview.net/pdf?id=8vKknbgXxf", "email": "korea.ac.kr;navercorp.com;cmu.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Korea University;NAVER Corporation;Carnegie Mellon University", "aff_unique_dep": ";NAVER AI Lab;", "aff_unique_url": "https://www.korea.ac.kr;https://www.naver.com;https://www.cmu.edu", "aff_unique_abbr": "KU;NAVER;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "8vT0f6x1BY", "title": "Going Further: Flatness at the Rescue of Early Stopping for Adversarial Example Transferability", "track": "main", "status": "Reject", "tldr": "", "abstract": "Transferability is the property of adversarial examples to be misclassified by other models than the surrogate model for which they were crafted. Previous research has shown that early stopping the training of the surrogate model substantially increases transferability. A common hypothesis to explain this is that deep neural networks (DNNs) first learn robust features, which are more generic, thus a better surrogate. Then, at later epochs, DNNs learn non-robust features, which are more brittle, hence worst surrogate. We demonstrate that the reasons why early stopping improves transferability lie in the side effects it has on the learning dynamics of the model. We first show that early stopping benefits the transferability of non-robust features. Then, we establish links between transferability and the exploration of the loss landscape in the parameter space, on which early stopping has an inherent effect. More precisely, we observe that transferability peaks when the learning rate decays, which is also the time at which the sharpness of the loss significantly drops. \nThis leads us to evaluate the training of surrogate models with seven minimizers that minimize both loss value and loss sharpness. One of such optimizers, SAM always improves over early stopping (by up to 28.8 percentage points). We also uncover that the strong regularization induced by SAM with large flat neighborhoods is tightly linked to transferability. Finally, the best sharpness-aware minimizers are competitive with other training techniques, and complementary to other types of transferability techniques.", "keywords": "adversarial examples;transferability;sharpness;loss landscape;early stopping", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Martin Gubri;Maxime Cordy;YVES LE TRAON", "authorids": "~Martin_Gubri1;~Maxime_Cordy1;~YVES_LE_TRAON1", "gender": "M;M;M", "homepage": "https://gubri.eu;https://maxcordy.github.io/;https://wwwfr.uni.lu/snt/people/yves_le_traon", "dblp": "213/7879;73/10839.html;95/5206", "google_scholar": "Jt4OYwMAAAAJ;sRXHjkIAAAAJ;DmGlmNEAAAAJ", "orcid": "0000-0001-6744-6662;0000-0001-8312-1358;", "linkedin": ";;", "or_profile": "~Martin_Gubri1;~Maxime_Cordy1;~YVES_LE_TRAON1", "aff": "Parameter Lab;University of Luxemburg;", "aff_domain": "parameterlab.de;uni.lu;", "position": "Principal Researcher;Researcher;", "bibtex": "@misc{\ngubri2024going,\ntitle={Going Further: Flatness at the Rescue of Early Stopping for Adversarial Example Transferability},\nauthor={Martin Gubri and Maxime Cordy and YVES LE TRAON},\nyear={2024},\nurl={https://openreview.net/forum?id=8vT0f6x1BY}\n}", "github": "", "project": "", "reviewers": "2E6M;hxCY;4sAy;5DVg", "site": "https://openreview.net/forum?id=8vT0f6x1BY", "pdf_size": 769398, "rating": "3;3;5;6", "confidence": "4;5;4;4", "soundness": "2;1;3;3", "contribution": "2;2;3;3", "presentation": "1;3;4;3", "wc_summary": "201;108;134;95", "wc_strengths": "29;117;53;65", "wc_weaknesses": "351;467;619;318", "wc_questions": "168;15;75;5", "wc_review": "749;707;881;483", "wc_reply_reviewers": "0;0;30;0", "wc_reply_authors": "429;378;347;204", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 134.5, 40.88092464707715 ], "wc_strengths_avg": [ 66.0, 32.17141588429082 ], "wc_weaknesses_avg": [ 438.75, 117.86512418862503 ], "wc_questions_avg": [ 65.75, 64.82042502174758 ], "wc_review_avg": [ 705.0, 143.35271186831451 ], "wc_reply_reviewers_avg": [ 7.5, 12.99038105676658 ], "wc_reply_authors_avg": [ 339.5, 83.52993475395512 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13291102371347036259&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Parameter Lab;University of Luxembourg", "aff_unique_dep": ";", "aff_unique_url": ";https://wwwen.uniluxembourg.lu", "aff_unique_abbr": ";Uni Lu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Luxembourg" }, { "id": "8w6FzR68DS", "title": "PriViT: Vision Transformers for Fast Private Inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "The Vision Transformer (ViT) architecture has emerged as the backbone of choice for state-of-the-art deep models for computer vision applications. However, ViTs are ill-suited for private inference using secure multi-party computation (MPC) protocols, due to the large number of non-polynomial operations (self-attention, feed-forward rectifiers, layer normalization). We propose PriViT, a gradient-based algorithm to selectively Taylorize nonlinearities in ViTs while maintaining their prediction accuracy. Our algorithm is conceptually simple, easy to implement, and achieves improved performance over existing approaches for designing MPC-friendly transformer architectures in terms of achieving the Pareto frontier in latency-accuracy. We confirm these improvements via experiments on several standard image classification tasks.", "keywords": "private inference;transformers;secure multi-party communication", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/77397e0854eaf94604242cbe06f4a72ccb6b3b17.pdf", "author": "Naren Dhyani;Jianqiao Cambridge Mo;Minsu Cho;Ameya Joshi;Siddharth Garg;Brandon Reagen;Chinmay Hegde", "authorids": "~Naren_Dhyani1;~Jianqiao_Cambridge_Mo1;~Minsu_Cho2;~Ameya_Joshi2;~Siddharth_Garg1;~Brandon_Reagen1;~Chinmay_Hegde1", "gender": "M;M;M;M;M;M;M", "homepage": ";http://engineering.nyu.edu/people/siddharth-garg/;https://brandonreagen.com/;https://chinmayhegde.github.io/;https://ameya005.github.io;https://engineering.nyu.edu/student/jianqiao-cambridge-mo;", "dblp": ";94/3807;135/8203;39/2056;148/8731;260/4034;", "google_scholar": ";https://scholar.google.com.tw/citations?user=Yf8OqQQAAAAJ;cO2uYoAAAAAJ;eJAV17IAAAAJ;jZgsp_sAAAAJ;rydgKnMAAAAJ;1pcqgUYAAAAJ", "orcid": ";;;;;0000-0001-9533-8183;", "linkedin": "naren-dhyani/;;;;;jianqiao-cambridge-mo/;", "or_profile": "~Naren_Dhyani1;~Siddharth_Garg1;~Brandon_Reagen1;~Chinmay_Hegde1;~Ameya_A_Joshi1;~Jianqiao_Mo1;~Minsu_Cho3", "aff": ";New York University;New York University;New York University;InstaDeep;New York University;Samsung", "aff_domain": ";nyu.edu;nyu.edu;nyu.edu;instadeep.com;nyu.edu;samsung.com", "position": ";Associate Professor;Professor;Associate Professor;Researcher;PhD student;Researcher", "bibtex": "@misc{\ndhyani2024privit,\ntitle={PriViT: Vision Transformers for Fast Private Inference},\nauthor={Naren Dhyani and Jianqiao Cambridge Mo and Minsu Cho and Ameya Joshi and Siddharth Garg and Brandon Reagen and Chinmay Hegde},\nyear={2024},\nurl={https://openreview.net/forum?id=8w6FzR68DS}\n}", "github": "", "project": "", "reviewers": "pfyb;HTX8;y916;BazG", "site": "https://openreview.net/forum?id=8w6FzR68DS", "pdf_size": 686634, "rating": "5;5;5;6", "confidence": "3;2;3;3", "soundness": "3;2;2;3", "contribution": "2;2;2;3", "presentation": "2;3;2;2", "wc_summary": "68;55;25;81", "wc_strengths": "27;38;20;39", "wc_weaknesses": "23;15;80;405", "wc_questions": "413;51;11;16", "wc_review": "531;159;136;541", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "685;285;438;458", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 57.25, 20.765054779605084 ], "wc_strengths_avg": [ 31.0, 7.905694150420948 ], "wc_weaknesses_avg": [ 130.75, 160.3096612809097 ], "wc_questions_avg": [ 122.75, 168.28305767367075 ], "wc_review_avg": [ 341.75, 194.45227563595137 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 466.5, 142.8014355670138 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5350038279028032370&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "New York University;InstaDeep;Samsung", "aff_unique_dep": ";;Samsung", "aff_unique_url": "https://www.nyu.edu;https://www.instadeep.com;https://www.samsung.com", "aff_unique_abbr": "NYU;InstaDeep;Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;2", "aff_country_unique": "United States;United Kingdom;South Korea" }, { "id": "8wFNfTxM6i", "title": "LegoNet: Piecing Together and Breaking Apart Sub-Networks for Scalable Multi-task Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Despite considerable progress in general-purpose vision models, most efforts focus on designing a new unified structure that can handle different types of input and supervision. In contrast, we believe each vision task requires its specific designed module to use different forms of perception. For example, a feature pyramid network is commonly used in segmentation but not in classification. We present LegoNet, a general Multi-Task Learning (MTL) framework that is assembled with many small sub-networks from different vision tasks, similar to how Lego pieces can be pieced together into larger structures. By leveraging this property, LegoNet can borrow design elements from single-task models and combine them to create a scalable multi-task model. We demonstrate its efficiency on mainstream vision datasets such as ImageNet, COCO, and ADE20K, and show it achieves comparable results to state-of-the-art single-task models. Moreover, like a Lego creation capable of dynamically piecing together or breaking apart pieces, our model exhibits scalability in both its model capacity and adaptability to a multitude of tasks. It can remove sub-networks and decompose into high-performing components for efficient adaptation, or add sub-networks for learning new tasks in a continuous learning scenario. On downstream tasks, it can be fine-tuned with fewer training parameters, fewer model parameters, and even transformed to a low computation shape. These functions can be controlled and combined to meet various demands of downstream applications.", "keywords": "multi-task learning; continous learning; efficient adaptation", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/874e946be23562ebbaa2ee3abab93bd90e5941ff.pdf", "author": "Zitian Chen;Mingyu Ding;Yikang Shen;Wei Zhan;Erik Learned-Miller;Chuang Gan", "authorids": "~Zitian_Chen1;~Mingyu_Ding1;~Yikang_Shen1;~Wei_Zhan2;~Erik_Learned-Miller2;~Chuang_Gan1", "gender": "M;M;M;;;M", "homepage": "http://chenzt.net/;https://dingmyu.github.io/;;;;http://people.csail.mit.edu/ganchuang/", "dblp": "218/6728;188/5243;152/8226;;;139/6993", "google_scholar": "n6rhKWQAAAAJ;w4yTWwoAAAAJ;qff5rRYAAAAJ;;;PTeSCbIAAAAJ", "orcid": ";0000-0001-6556-8359;;;;", "linkedin": ";dingmyu/;;;;", "or_profile": "~Zitian_Chen1;~Mingyu_Ding1;~Yikang_Shen1;~Wei_Zhan2;~Erik_Learned-Miller2;~Chuang_Gan1", "aff": "University of Massachusetts, Amherst;University of California, Berkeley;International Business Machines;;;University of Massachusetts at Amherst", "aff_domain": "umass.edu;berkeley.edu;ibm.com;;;umass.edu", "position": "PhD student;Postdoc;Researcher;;;Assistant Professor", "bibtex": "@misc{\nchen2024legonet,\ntitle={LegoNet: Piecing Together and Breaking Apart Sub-Networks for Scalable Multi-task Learning},\nauthor={Zitian Chen and Mingyu Ding and Yikang Shen and Wei Zhan and Erik Learned-Miller and Chuang Gan},\nyear={2024},\nurl={https://openreview.net/forum?id=8wFNfTxM6i}\n}", "github": "", "project": "", "reviewers": "MPaF;pEdg;thbj", "site": "https://openreview.net/forum?id=8wFNfTxM6i", "pdf_size": 3383459, "rating": "5;5;8", "confidence": "5;4;4", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "91;126;102", "wc_strengths": "88;55;44", "wc_weaknesses": "237;346;140", "wc_questions": "59;2;383", "wc_review": "475;529;669", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 106.33333333333333, 14.613540144521982 ], "wc_strengths_avg": [ 62.333333333333336, 18.696404883173543 ], "wc_weaknesses_avg": [ 241.0, 84.14669730100324 ], "wc_questions_avg": [ 148.0, 167.7915373312969 ], "wc_review_avg": [ 557.6666666666666, 81.75301557469031 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MjM1bo8Q1VcJ:scholar.google.com/&scioq=LegoNet:+Piecing+Together+and+Breaking+Apart+Sub-Networks+for+Scalable+Multi-task+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Massachusetts Amherst;University of California, Berkeley;International Business Machines Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umass.edu;https://www.berkeley.edu;https://www.ibm.com", "aff_unique_abbr": "UMass Amherst;UC Berkeley;IBM", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Amherst;Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MUSTARD: Mastering Uniform Synthesis of Theorem and Proof Data", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19310", "id": "8xliOUg9EW", "author_site": "Yinya Huang, Xiaohan Lin, Zhengying Liu, Qingxing Cao, Huajian Xin, Haiming Wang, Zhenguo Li, Linqi Song, Xiaodan Liang", "tldr": "", "abstract": "Recent large language models (LLMs) have witnessed significant advancement in various tasks, including mathematical reasoning and theorem proving. As these two tasks require strict and formal multi-step inference, they are appealing domains for exploring the reasoning ability of LLMs but still face important challenges. Previous studies such as Chain-of-Thought (CoT) have revealed the effectiveness of intermediate steps guidance. However, such step-wise annotation requires heavy labor, leading to insufficient training steps for current benchmarks. To fill this gap, this work introduces MUSTARD, a data generation framework that masters uniform synthesis of theorem and proof data of high quality and diversity. MUSTARD synthesizes data in three stages: (1) It samples a few mathematical concept seeds as the problem category. (2) Then, it prompts a generative language model with the sampled concepts to obtain both the problems and their step-wise formal solutions. (3) Lastly, the framework utilizes a proof assistant (e.g., Lean Prover) to filter the valid proofs. With the proposed MUSTARD, we present a theorem-and-proof benchmark MUSTARDSAUCE with 5,866 valid data points. Each data point contains an informal statement, an informal proof, and a translated formal proof that passes the prover validation. We perform extensive analysis and demonstrate that MUSTARD generates validated high-quality step-by-step data. We further apply the MUSTARDSAUCE for fine-tuning smaller language models. The fine-tuned Llama 2-7B achieves a 15.41% average relative performance gain in automated theorem proving, and 8.18% in math word problems. Codes and data are available at https://github.com/Eleanor-H/MUSTARD.", "keywords": "theorem proving;math word problem;mathematical reasoning;benchmark", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/d03852344e1e4ab8a88a1ab90ae3d42a901c6322.zip", "author": "Yinya Huang;Xiaohan Lin;Zhengying Liu;Qingxing Cao;Huajian Xin;Haiming Wang;Zhenguo Li;Linqi Song;Xiaodan Liang", "authorids": "~Yinya_Huang1;~Xiaohan_Lin2;~Zhengying_Liu2;~Qingxing_Cao1;~Huajian_Xin1;~Haiming_Wang1;~Zhenguo_Li1;~Linqi_Song1;~Xiaodan_Liang2", "gender": ";M;M;M;M;M;M;M;F", "homepage": "https://eleanor-h.github.io/;https://xiaohlim.github.io/;;;https://xinhuajian.wordpress.com/;;http://www.ee.columbia.edu/~zgli/;https://sites.google.com/site/aisquaredlab/;https://www.sysu-hcp.net/", "dblp": "282/1562;;241/1782;149/7615;356/3551;97/604;23/6479;137/7963.html;", "google_scholar": "dWStaRIAAAAJ;;http:// DFme0joAAAAJ;flOBrd8AAAAJ;E5M9x8wAAAAJ;zDPqP6AAAAAJ;XboZC1AAAAAJ;UcGN3MoAAAAJ;voxznZAAAAAJ", "orcid": "0000-0002-0686-0832;;;;;;;0000-0003-2756-4984;", "linkedin": ";;;;;;;;", "or_profile": "~Yinya_Huang1;~Xiaohan_Lin2;~Zhengying_Liu2;~Qingxing_Cao1;~Huajian_Xin1;~Haiming_Wang1;~Zhenguo_Li1;~Linqi_Song1;~Xiaodan_Liang2", "aff": "City University of Hong Kong;SUN YAT-SEN UNIVERSITY;Huawei Technologies Ltd.;SUN YAT-SEN UNIVERSITY, Tsinghua University;University of Edinburgh, University of Edinburgh;SUN YAT-SEN UNIVERSITY;Huawei Noah's Ark Lab;City University of Hong Kong;SUN YAT-SEN UNIVERSITY", "aff_domain": "cityu.edu.hk;sysu.edu.cn;huawei.com;sysu.edu.cn;ed.ac.uk;sysu.edu.cn;huawei.com;cityu.edu.hk;sysu.edu.cn", "position": "Postdoc;MS student;Researcher;Postdoc;PhD student;PhD student;Principal Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhuang2024mustard,\ntitle={{MUSTARD}: Mastering Uniform Synthesis of Theorem and Proof Data},\nauthor={Yinya Huang and Xiaohan Lin and Zhengying Liu and Qingxing Cao and Huajian Xin and Haiming Wang and Zhenguo Li and Linqi Song and Xiaodan Liang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=8xliOUg9EW}\n}", "github": "", "project": "", "reviewers": "9Wae;Mb77;LURb", "pdf_size": 2708190, "rating": "6;8;8", "confidence": "4;4;4", "soundness": "3;4;3", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "58;62;139", "wc_strengths": "137;90;48", "wc_weaknesses": "341;34;41", "wc_questions": "48;21;81", "wc_review": "584;207;309", "wc_reply_reviewers": "34;13;79", "wc_reply_authors": "3110;1393;1246", "reply_reviewers": "1;1;1", "reply_authors": "8;3;4", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.33333333333333, 37.27674282385138 ], "wc_strengths_avg": [ 91.66666666666667, 36.353205574688396 ], "wc_weaknesses_avg": [ 138.66666666666666, 143.09980976771269 ], "wc_questions_avg": [ 50.0, 24.535688292770594 ], "wc_review_avg": [ 366.6666666666667, 159.2196246977391 ], "wc_reply_reviewers_avg": [ 42.0, 27.53179979587241 ], "wc_reply_authors_avg": [ 1916.3333333333333, 846.1805691195914 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 5.0, 2.160246899469287 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16858387062816034025&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=8xliOUg9EW", "pdf": "https://openreview.net/pdf?id=8xliOUg9EW", "email": "cityu.edu.hk;sysu.edu.cn;huawei.com;sysu.edu.cn;ed.ac.uk;sysu.edu.cn;huawei.com;cityu.edu.hk;sysu.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;1;3;1;2;0;1", "aff_unique_norm": "City University of Hong Kong;Sun Yat-sen University;Huawei;University of Edinburgh", "aff_unique_dep": ";;Huawei Technologies;", "aff_unique_url": "https://www.cityu.edu.hk;http://www.sysu.edu.cn;https://www.huawei.com;https://www.ed.ac.uk", "aff_unique_abbr": "CityU;SYSU;Huawei;Edinburgh", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "8y5vlBuRll", "title": "Efficient Action Robust Reinforcement Learning with Probabilistic Policy Execution Uncertainty", "track": "main", "status": "Reject", "tldr": "", "abstract": "Robust reinforcement learning (RL) aims to find a policy that optimizes the worst-case performance in the face of uncertainties. In this paper, we focus on action robust RL with the probabilistic policy execution uncertainty, in which, instead of always carrying out the action specified by the policy, the agent will take the action specified by the policy with probability $1-\\rho$ and an alternative adversarial action with probability $\\rho$. We establish the existence of an optimal policy on the action robust MDPs with probabilistic policy execution uncertainty and provide the action robust Bellman optimality equation for its solution. Furthermore, we develop Action Robust Reinforcement Learning with Certificates (ARRLC) algorithm that achieves minimax optimal regret and sample complexity. Furthermore, we conduct numerical experiments to validate our approach's robustness, demonstrating that ARRLC outperforms non-robust RL algorithms and converges faster than the robust TD algorithm in the presence of action perturbations.", "keywords": "Robust Reinforcement Learning;Sample Complexity", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/520f7198cec22e28b38e1179fd69d77d07cf004e.zip", "author": "Guanlin Liu;Zhihan Zhou;Han Liu;Lifeng Lai", "authorids": "~Guanlin_Liu1;~Zhihan_Zhou1;~Han_Liu4;~Lifeng_Lai1", "gender": "M;M;;", "homepage": ";http://zhihan1996.github.io/;;", "dblp": "224/9954;226/5688-1.html;;12/4889", "google_scholar": "a7eYJk4AAAAJ;bNerBT8AAAAJ;;gOhaCfUAAAAJ", "orcid": "0000-0002-0595-9398;;;", "linkedin": ";zhihan-zhou-6a057716b/;;", "or_profile": "~Guanlin_Liu1;~Zhihan_Zhou1;~Han_Liu4;~Lifeng_Lai1", "aff": "University of California, Davis;Northwestern University;Northwestern University;University of California, Davis", "aff_domain": "ucdavis.edu;u.northwestern.edu;u.northwestern.edu;ucdavis.edu", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\nliu2024efficient,\ntitle={Efficient Action Robust Reinforcement Learning with Probabilistic Policy Execution Uncertainty},\nauthor={Guanlin Liu and Zhihan Zhou and Han Liu and Lifeng Lai},\nyear={2024},\nurl={https://openreview.net/forum?id=8y5vlBuRll}\n}", "github": "", "project": "", "reviewers": "wBWc;N6KC;if62", "site": "https://openreview.net/forum?id=8y5vlBuRll", "pdf_size": 3264942, "rating": "5;6;6", "confidence": "4;5;4", "soundness": "2;3;3", "contribution": "2;1;2", "presentation": "1;3;2", "wc_summary": "97;19;82", "wc_strengths": "35;34;36", "wc_weaknesses": "338;227;145", "wc_questions": "137;1;9", "wc_review": "607;281;272", "wc_reply_reviewers": "0;82;16", "wc_reply_authors": "1004;642;693", "reply_reviewers": "0;1;1", "reply_authors": "3;3;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 66.0, 33.793490497431605 ], "wc_strengths_avg": [ 35.0, 0.816496580927726 ], "wc_weaknesses_avg": [ 236.66666666666666, 79.08785550821875 ], "wc_questions_avg": [ 49.0, 62.31104770958892 ], "wc_review_avg": [ 386.6666666666667, 155.84251309860505 ], "wc_reply_reviewers_avg": [ 32.666666666666664, 35.490217744549774 ], "wc_reply_authors_avg": [ 779.6666666666666, 159.98819400887749 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6122834642764614269&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of California, Davis;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucdavis.edu;https://www.northwestern.edu", "aff_unique_abbr": "UC Davis;NU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Davis;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "8zJevzvk64", "title": "Schrodinger Bridge to Bridge Generative Diffusion Method to Off-Policy Evaluation", "track": "main", "status": "Reject", "tldr": "", "abstract": "The problem of off-policy evaluation (OPE) in reinforcement learning (RL), which evaluates a given policy using data collected from a different behavior policy, plays an important role in many real-world applications. The OPE under the model of episodic non-stationary finite-horizon Markov decision process (MDP) has been widely studied. However, the general model-free importance sampling (IS) methods suffer from the curse of horizon and dimensionality, while the improved marginal importance sampling (MIS) can only be restrained to the case where the state space $\\mathcal{S}$ is sufficiently small. The model-based methods often have limited scope of application. To find a widely-applicable OPE algorithm when $\\mathcal{S}$ is continuous and high-dimensional that avoids the curse of horizon and dimensionality, which means the error of the estimator grows exponentially with the number of horizon $H$ and the dimension $d$ of the state space $\\mathcal{S}$, we apply the diffusion Schr\"odinger bridge generative model to construct a model-based estimator (CDSB estimator). Moreover, we established the statistical rate of the estimation error of the value function with a polynomial rate of $O(H^2\\sqrt{d})$, which, to the best of our knowledge, is one of the first theoretical rate results on applying Schr\"odinger bridge to reinforcement learning. This breaks the restraint of the complexity of the state space for OPE under MDP with large horizon and can be applied to various real-life decision problems with continuous setting, which is shown in our simulation using our method in continuous, high-dimensional and long-horizon RL environments and its comparison with other existing algorithms.", "keywords": "off-policy evaluation;Schrodinger bridge problem;diffusion model;generative model", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/8e88ec1313fa2926b2f3fa36bf607b8ec296ccbb.pdf", "author": "Yucong Lin;Liyuan Xu;Haoqun Cao;Hongyi Yuan;Junwei Lu", "authorids": "~Yucong_Lin2;~Liyuan_Xu2;~Haoqun_Cao1;~Hongyi_Yuan1;~Junwei_Lu1", "gender": "M;F;M;M;M", "homepage": ";https://mails.tsinghua.edu.cn;https://kencao2007.github.io/;;https://junwei-lu.github.io/", "dblp": ";;;308/0909;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;;FG3O4i8AAAAJ;", "orcid": "0000-0002-9039-0318;;;;", "linkedin": ";;;;", "or_profile": "~Yucong_Lin2;~Liyuan_Xu2;~Haoqun_Cao1;~Hongyi_Yuan1;~Junwei_Lu1", "aff": "Beijing Institute of Technology;Tsinghua University;Renmin University of China;Tsinghua University;Harvard University", "aff_domain": "bit.edu.cn;tsinghua.edu.cn;ruc.edu.cn;tsinghua.edu.cn;harvard.edu", "position": "Postdoc;Undergrad student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@misc{\nlin2024schrodinger,\ntitle={Schrodinger Bridge to Bridge Generative Diffusion Method to Off-Policy Evaluation},\nauthor={Yucong Lin and Liyuan Xu and Haoqun Cao and Hongyi Yuan and Junwei Lu},\nyear={2024},\nurl={https://openreview.net/forum?id=8zJevzvk64}\n}", "github": "", "project": "", "reviewers": "p1r4;VmGz;CNAs;ijeG", "site": "https://openreview.net/forum?id=8zJevzvk64", "pdf_size": 371040, "rating": "3;3;3;5", "confidence": "4;3;3;2", "soundness": "2;3;2;3", "contribution": "1;1;1;2", "presentation": "2;2;2;2", "wc_summary": "72;87;47;59", "wc_strengths": "92;22;21;37", "wc_weaknesses": "512;35;318;110", "wc_questions": "16;350;275;163", "wc_review": "692;494;661;369", "wc_reply_reviewers": "0;127;0;0", "wc_reply_authors": "323;761;593;203", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 66.25, 14.889173919328098 ], "wc_strengths_avg": [ 43.0, 28.991378028648448 ], "wc_weaknesses_avg": [ 243.75, 186.37110156888593 ], "wc_questions_avg": [ 201.0, 125.84315634948132 ], "wc_review_avg": [ 554.0, 130.68856109086212 ], "wc_reply_reviewers_avg": [ 31.75, 54.99261314031185 ], "wc_reply_authors_avg": [ 470.0, 219.49259668608414 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vna5OldSG5IJ:scholar.google.com/&scioq=Schrodinger+Bridge+to+Bridge+Generative+Diffusion+Method+to+Off-Policy+Evaluation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Beijing Institute of Technology;Tsinghua University;Renmin University of China;Harvard University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.tsinghua.edu.cn;http://www.ruc.edu.cn;https://www.harvard.edu", "aff_unique_abbr": "BIT;THU;RUC;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "90QOM1xB88", "title": "Improved order analysis and design of exponential integrator for diffusion models sampling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Efficient differential equation solvers have significantly reduced the sampling time of diffusion models (DMs) while retaining high sampling quality. Among these solvers, exponential integrators (EI) have gained prominence by demonstrating state-of-the-art performance. However, existing high-order EI-based sampling algorithms rely on degenerate EI solvers, resulting in inferior error bounds and reduced accuracy in contrast to the theoretically anticipated results under optimal settings. This situation makes the sampling quality extremely vulnerable to seemingly innocuous design choices such as timestep schedules. For example, an inefficient timestep scheduler might necessitate twice the number of steps to achieve a quality comparable to that obtained through carefully optimized timesteps. To address this issue, we reevaluate the design of high-order differential solvers for DMs. Through a thorough order analysis, we reveal that the degeneration of existing high-order EI solvers can be attributed to the absence of essential order conditions. By reformulating the differential equations in DMs and capitalizing on the theory of exponential integrators, we propose refined EI solvers that fulfill all the order conditions, which we designate as Refined Exponential Solver (RES). Utilizing these improved solvers, RES exhibits more favorable error bounds theoretically and achieves superior sampling efficiency and stability in practical applications. For instance, a simple switch from the single-step DPM-Solver++ to our order-satisfied numerical scheme when NFE$=9$, results in a reduction of numerical defects by 25.2 and FID improvement of 25.4 (16.77 vs 12.51) on a pre-trained ImageNet diffusion model.", "keywords": "diffusion model;order analysis;fast sampling", "primary_area": "generative models", "supplementary_material": "", "author": "Qinsheng Zhang;Jiaming Song;Yongxin Chen", "authorids": "~Qinsheng_Zhang1;~Jiaming_Song1;~Yongxin_Chen1", "gender": "M;M;M", "homepage": "https://qsh-zh.github.io/;http://tsong.me;https://yongxin.ae.gatech.edu/", "dblp": ";173/5104;", "google_scholar": ";;X8BYiV4AAAAJ", "orcid": ";;", "linkedin": ";jiamings/;", "or_profile": "~Qinsheng_Zhang1;~Jiaming_Song1;~Yongxin_Chen1", "aff": "Georgia Institute of Technology;Luma AI;Georgia Institute of Technology", "aff_domain": "gatech.edu;lumalabs.ai;gatech.edu", "position": "PhD student;Chief Scientist;Associate Professor", "bibtex": "@misc{\nzhang2024improved,\ntitle={Improved order analysis and design of exponential integrator for diffusion models sampling},\nauthor={Qinsheng Zhang and Jiaming Song and Yongxin Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=90QOM1xB88}\n}", "github": "", "project": "", "reviewers": "XmiM;xZkC;kkMa", "site": "https://openreview.net/forum?id=90QOM1xB88", "pdf_size": 12230407, "rating": "3;6;6", "confidence": "4;4;2", "soundness": "3;3;3", "contribution": "1;3;3", "presentation": "2;2;3", "wc_summary": "83;34;51", "wc_strengths": "63;76;47", "wc_weaknesses": "416;81;63", "wc_questions": "37;177;15", "wc_review": "599;368;176", "wc_reply_reviewers": "246;42;0", "wc_reply_authors": "1180;586;399", "reply_reviewers": "1;1;0", "reply_authors": "3;2;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 56.0, 20.314198646923455 ], "wc_strengths_avg": [ 62.0, 11.86029791643813 ], "wc_weaknesses_avg": [ 186.66666666666666, 162.3295687448497 ], "wc_questions_avg": [ 76.33333333333333, 71.74646719912803 ], "wc_review_avg": [ 381.0, 172.9335132355785 ], "wc_reply_reviewers_avg": [ 96.0, 107.44300814850634 ], "wc_reply_authors_avg": [ 721.6666666666666, 332.9607918192304 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5000000000000001, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8888918704360335549&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Luma AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.luma.ai", "aff_unique_abbr": "Georgia Tech;Luma AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Flexible Body Collision Dynamics with Hierarchical Contact Mesh Transformer", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19309", "id": "90yw2uM6J5", "author_site": "Youn-Yeol Yu, Jeongwhan Choi, Woojin Cho, Kookjin Lee, Nayong Kim, Kiseok Chang, ChangSeung Woo, ILHO KIM, SeokWoo Lee, Joon Young Yang, SOOYOUNG YOON, Noseong Park", "tldr": "", "abstract": "Recently, many mesh-based graph neural network (GNN) models have been proposed for modeling complex high-dimensional physical systems. Remarkable achievements have been made in significantly reducing the solving time compared to traditional numerical solvers. These methods are typically designed to i) reduce the computational cost in solving physical dynamics and/or ii) propose techniques to enhance the solution accuracy in fluid and rigid body dynamics. However, it remains under-explored whether they are effective in addressing the challenges of flexible body dynamics, where instantaneous collisions occur within a very short timeframe. In this paper, we present Hierarchical Contact Mesh Transformer (HCMT), which uses hierarchical mesh structures and can learn long-range dependencies (occurred by collisions) among spatially distant positions of a body --- two close positions in a higher-level mesh correspond to two distant positions in a lower-level mesh. HCMT enables long-range interactions, and the hierarchical mesh structure quickly propagates collision effects to faraway positions. To this end, it consists of a contact mesh Transformer and a hierarchical mesh Transformer (CMT and HMT, respectively). Lastly, we propose a flexible body dynamics dataset, consisting of trajectories that reflect experimental settings frequently used in the display industry for product designs. We also compare the performance of several baselines using well-known benchmark datasets. Our results show that HCMT provides significant performance improvements over existing methods. Our code is available at https://github.com/yuyudeep/hcmt.", "keywords": "graph transformer;physics-based simulation;mesh;collision;flexible dynamics", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Youn-Yeol Yu;Jeongwhan Choi;Woojin Cho;Kookjin Lee;Nayong Kim;Kiseok Chang;ChangSeung Woo;ILHO KIM;SeokWoo Lee;Joon Young Yang;SOOYOUNG YOON;Noseong Park", "authorids": "~Youn-Yeol_Yu1;~Jeongwhan_Choi1;~Woojin_Cho1;~Kookjin_Lee1;~Nayong_Kim1;~Kiseok_Chang1;~ChangSeung_Woo1;~ILHO_KIM1;~SeokWoo_Lee1;~Joon_Young_Yang1;~SOOYOUNG_YOON1;~Noseong_Park1", "gender": "M;M;M;M;M;M;M;M;M;M;M;", "homepage": "https://sites.google.com/view/npark/home?authuser=0;https://www.jeongwhanchoi.com;https://woojin-cho.github.io/;https://scholar.google.com/citations?hl=en&user=KL89hVQAAAAJ&view_op=list_works;;;https://www.lgdisplay.com/;;https://www.lgdisplay.com;https://www.lgdisplay.com;https://lgdisplay.com;", "dblp": ";39/11215-2;;122/5103;;;;;;;;", "google_scholar": ";3MNElkYAAAAJ;cqIj5tQAAAAJ;https://scholar.google.com/citations?hl=en;;TmbYdlgAAAAJ;;;;;;", "orcid": ";0000-0002-6530-2662;;;;;;;;;;", "linkedin": ";jeongwhanchoi/;woojin-cho-02b905264/;;gh-k-30ba80218/;kiseok-chang-b0869349/?originalSubdomain=kr;;ilho-kim-918093129/?originalSubdomain=kr;;;;", "or_profile": "~Youn-Yeol_Yu1;~Jeongwhan_Choi1;~Woojin_Cho1;~Kookjin_Lee1;~Nayong_Kim1;~Kiseok_Chang1;~ChangSeung_Woo1;~ILHO_KIM1;~SeokWoo_Lee1;~Joon_Young_Yang1;~SOOYOUNG_YOON1;~Noseong_Park1", "aff": "LG Display;Yonsei University;Yonsei University;Arizona State University;LG Display;LG Display;;LG Display Co., Ltd.;LG Display;LG Display;LG Display;", "aff_domain": "lgdisplay.com;yonsei.ac.kr;yonsei.ac.kr;asu.edu;lgdisplay.com;lgdisplay.com;;lgdisplay.com;lgdisplay.com;lgdisplay.com;lgdisplay.com;", "position": "Researcher;PhD student;MS student;Assistant Professor;Researcher;Researcher;;Researcher;Principal Researcher;Principal Researcher;Principal Researcher;", "bibtex": "@inproceedings{\nyu2024learning,\ntitle={Learning Flexible Body Collision Dynamics with Hierarchical Contact Mesh Transformer},\nauthor={Youn-Yeol Yu and Jeongwhan Choi and Woojin Cho and Kookjin Lee and Nayong Kim and Kiseok Chang and ChangSeung Woo and ILHO KIM and SeokWoo Lee and Joon Young Yang and SOOYOUNG YOON and Noseong Park},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=90yw2uM6J5}\n}", "github": "", "project": "", "reviewers": "gSaT;zh4D;J9Rp;CgKu", "pdf_size": 14173895, "rating": "6;6;6;6", "confidence": "4;2;3;3", "soundness": "3;2;3;3", "contribution": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "114;39;76;69", "wc_strengths": "201;102;84;76", "wc_weaknesses": "639;90;77;2", "wc_questions": "579;417;41;16", "wc_review": "1533;648;278;163", "wc_reply_reviewers": "0;34;0;0", "wc_reply_authors": "3495;1250;602;290", "reply_reviewers": "0;1;0;0", "reply_authors": "6;3;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.5, 26.706740722147284 ], "wc_strengths_avg": [ 115.75, 50.11175011910879 ], "wc_weaknesses_avg": [ 202.0, 254.5279945310535 ], "wc_questions_avg": [ 263.25, 241.79782360476284 ], "wc_review_avg": [ 655.5, 537.3837083500019 ], "wc_reply_reviewers_avg": [ 8.5, 14.722431864335457 ], "wc_reply_authors_avg": [ 1409.25, 1253.0050628389336 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13358364848196757569&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=90yw2uM6J5", "pdf": "https://openreview.net/pdf?id=90yw2uM6J5", "email": "lgdisplay.com;yonsei.ac.kr;yonsei.ac.kr;asu.edu;lgdisplay.com;lgdisplay.com;;lgdisplay.com;lgdisplay.com;lgdisplay.com;lgdisplay.com;", "author_num": 12, "aff_unique_index": "0;1;1;2;0;0;0;0;0;0", "aff_unique_norm": "LG;Yonsei University;Arizona State University", "aff_unique_dep": "LG Display;;", "aff_unique_url": "https://www.lgdisplay.com;https://www.yonsei.ac.kr;https://www.asu.edu", "aff_unique_abbr": "LG Display;Yonsei;ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0", "aff_country_unique": "South Korea;United States" }, { "id": "91DFSjAva8", "title": "SERA: Sample Efficient Reward Augmentation in offline-to-online Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "A prospective application of offline reinforcement learning (RL) involves initializing a pre-trained policy using existing static datasets for subsequent online fine-tuning. However, direct fine-tuning of the offline pre-trained policy often results in sub-optimal performance. A primary reason is that offline conservative methods diminish the agent's capability of exploration, thereby impacting online fine-tuning performance. To enhance exploration during online fine-tuning and thus enhance the overall online fine-tuning performance, we introduce a generalized reward augmentation framework called Sample Efficient Reward Augmentation (SERA). SERA aims to improve the performance of online fine-tuning by designing intrinsic rewards that encourage the agent to explore. Specifically, it implicitly implements State Marginal Matching (SMM) and penalizes out-of-distribution (OOD) state actions, thus encouraging agents to cover the target state density, and achieving better online fine-tuning results. Additionally, SERA can be effortlessly plugged into various RL algorithms to improve online fine-tuning and ensure sustained asymptotic improvement, showing the versatility as well as the effectiveness of SERA. Moreover, extensive experimental results will demonstrate that when conducting offline-to-online problems, SERA consistently and effectively enhances the performance of various offline algorithms.", "keywords": "Reinforcement Learning;Offline-to-Online RL", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/608d32bda01ddc656658dcf526ed2b5d07c812ed.pdf", "author": "Ziqi Zhang;Xiao Xiong;Zifeng Zhuang;Jinxin Liu;Donglin Wang", "authorids": "~Ziqi_Zhang7;~Xiao_Xiong2;~Zifeng_Zhuang1;~Jinxin_Liu1;~Donglin_Wang1", "gender": ";F;M;;M", "homepage": ";https://github.com/SherryHanyu;;;https://milab.westlake.edu.cn/", "dblp": ";;276/5034;;", "google_scholar": ";;;;https://scholar.google.ca/citations?user=-fo6wdwAAAAJ", "orcid": ";;;;0000-0002-8188-3735", "linkedin": ";;;;", "or_profile": "~Ziqi_Zhang7;~Xiao_Xiong2;~Zifeng_Zhuang1;~Jinxin_Liu1;~Donglin_Wang1", "aff": ";University of Cambridge;Zhejiang University;;Westlake University", "aff_domain": ";cam.ac.uk;zju.edu.cn;;westlake.edu.cn", "position": ";MS student;PhD student;;Associate Professor", "bibtex": "@misc{\nzhang2024sera,\ntitle={{SERA}: Sample Efficient Reward Augmentation in offline-to-online Reinforcement Learning},\nauthor={Ziqi Zhang and Xiao Xiong and Zifeng Zhuang and Jinxin Liu and Donglin Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=91DFSjAva8}\n}", "github": "", "project": "", "reviewers": "sGsS;NgYG;zoZX;pDE4", "site": "https://openreview.net/forum?id=91DFSjAva8", "pdf_size": 2039765, "rating": "3;5;5;6", "confidence": "5;3;4;4", "soundness": "2;2;2;3", "contribution": "2;2;2;3", "presentation": "1;2;1;3", "wc_summary": "40;29;61;78", "wc_strengths": "13;12;22;80", "wc_weaknesses": "4;590;341;384", "wc_questions": "468;2;46;3", "wc_review": "525;633;470;545", "wc_reply_reviewers": "0;98;0;26", "wc_reply_authors": "1053;3110;627;710", "reply_reviewers": "0;2;0;1", "reply_authors": "4;9;3;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 52.0, 18.907670401189037 ], "wc_strengths_avg": [ 31.75, 28.12805538959279 ], "wc_weaknesses_avg": [ 329.75, 210.3049868643157 ], "wc_questions_avg": [ 129.75, 196.09484312444323 ], "wc_review_avg": [ 543.25, 58.64458628040614 ], "wc_reply_reviewers_avg": [ 31.0, 40.11234224026316 ], "wc_reply_authors_avg": [ 1375.0, 1014.3517634430375 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.5, 2.692582403567252 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7660073471667830530&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Cambridge;Zhejiang University;Westlake University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.zju.edu.cn;https://www.westlake.edu.cn", "aff_unique_abbr": "Cambridge;ZJU;WU", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;China" }, { "title": "On gauge freedom, conservativity and intrinsic dimensionality estimation in diffusion models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19308", "id": "92KV9xAMhF", "author_site": "Christian Horvat, Jean-Pascal Pfister", "tldr": "", "abstract": "Diffusion models are generative models that have recently demonstrated impressive performances in terms of sampling quality and density estimation in high dimensions. They rely on a forward continuous diffusion process and a backward continuous denoising process, which can be described by a time-dependent vector field and is used as a generative model. In the original formulation of the diffusion model, this vector field is assumed to be the score function (i.e. it is the gradient of the log-probability at a given time in the diffusion process). Curiously, on the practical side, most studies on diffusion models implement this vector field as a neural network function and do not constrain it be the gradient of some energy function (that is, most studies do not constrain the vector field to be conservative). Even though some studies investigated empirically whether such a constraint will lead to a performance gain, they lead to contradicting results and failed to provide analytical results. Here, we provide three analytical results regarding the extent of the modeling freedom of this vector field. {Firstly, we propose a novel decomposition of vector fields into a conservative component and an orthogonal component which satisfies a given (gauge) freedom. Secondly, from this orthogonal decomposition, we show that exact density estimation and exact sampling is achieved when the conservative component is exactly equals to the true score and therefore conservativity is neither necessary nor sufficient to obtain exact density estimation and exact sampling. Finally, we show that when it comes to inferring local information of the data manifold, constraining the vector field to be conservative is desirable.", "keywords": "gauge freedom;conservativitym intrinsic dimensionality estimation;diffusion models;explainable AI;theory", "primary_area": "generative models", "supplementary_material": "/attachment/c6a9ea8a3844959d87cafb5e0e69155b41c43a2d.pdf", "author": "Christian Horvat;Jean-Pascal Pfister", "authorids": "~Christian_Horvat1;~Jean-Pascal_Pfister1", "gender": "M;M", "homepage": "https://physio.unibe.ch/~pfister/group/;https://physio.unibe.ch/~pfister/group/", "dblp": "293/8018;33/921", "google_scholar": "LpRirZAAAAAJ;https://scholar.google.co.uk/citations?user=mzUYoLgAAAAJ", "orcid": ";0000-0002-1847-3389", "linkedin": ";jean-pascal-pfister-840a7a1/", "or_profile": "~Christian_Horvat1;~Jean-Pascal_Pfister1", "aff": "Theoretical Neuroscience;Department of Physiology, \u00fcnivelsitat Bern", "aff_domain": "unibe.ch;unibe.ch", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nhorvat2024on,\ntitle={On gauge freedom, conservativity and intrinsic dimensionality estimation in diffusion models},\nauthor={Christian Horvat and Jean-Pascal Pfister},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=92KV9xAMhF}\n}", "github": "", "project": "", "reviewers": "ovjk;pZCv;h3ie;KRsj", "pdf_size": 5143768, "rating": "5;6;8;8", "confidence": "4;3;5;4", "soundness": "3;2;4;2", "contribution": "2;2;3;2", "presentation": "2;2;4;3", "wc_summary": "131;194;65;105", "wc_strengths": "45;56;61;58", "wc_weaknesses": "51;297;515;172", "wc_questions": "375;191;380;133", "wc_review": "602;738;1021;468", "wc_reply_reviewers": "0;124;14;167", "wc_reply_authors": "288;583;87;156", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 123.75, 46.87949978402073 ], "wc_strengths_avg": [ 55.0, 6.041522986797286 ], "wc_weaknesses_avg": [ 258.75, 171.61930981098834 ], "wc_questions_avg": [ 269.75, 109.69816543589049 ], "wc_review_avg": [ 707.25, 204.75763111542386 ], "wc_reply_reviewers_avg": [ 76.25, 71.07170674748144 ], "wc_reply_authors_avg": [ 278.5, 190.05854361222492 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5443310539518174, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8157807008404479797&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=92KV9xAMhF", "pdf": "https://openreview.net/pdf?id=92KV9xAMhF", "email": "unibe.ch;unibe.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Theoretical Neuroscience;University of Bern", "aff_unique_dep": "Neuroscience Department;Department of Physiology", "aff_unique_url": ";https://www.unibe.ch", "aff_unique_abbr": ";UniBE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Switzerland" }, { "title": "SPDER: Semiperiodic Damping-Enabled Object Representation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19307", "id": "92btneN9Wm", "author_site": "Kathan Shah, Chawin Sitawarin", "tldr": "", "abstract": "We present a neural network architecture designed to naturally learn a positional embedding and overcome the spectral bias towards lower frequencies faced by conventional implicit neural representation networks. Our proposed architecture, SPDER, is a simple MLP that uses an activation function composed of a sinusoidal multiplied by a sublinear function, called the damping function. The sinusoidal enables the network to automatically learn the positional embedding of an input coordinate while the damping passes on the actual coordinate value by preventing it from being projected down to within a finite range of values. Our results indicate that SPDERs speed up training by 10 times and converge to losses 1,500 to 50,000 times lower than that of the state-of-the-art for image representation. SPDER is also state-of-the-art in audio representation. The superior representation capability allows SPDER to also excel on multiple downstream tasks such as image super-resolution and video frame interpolation. We provide intuition as to why SPDER significantly improves fitting compared to that of other INR methods while requiring no hyperparameter tuning or preprocessing. See code at https://github.com/katop1234/SPDER.", "keywords": "Implicit neural representations;spectral bias;computer vision;neural network architectures;activations;image representation;edge detection", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/8939af26ae9c398e386ecddf5fad22e5efa128f5.zip", "author": "Kathan Shah;Chawin Sitawarin", "authorids": "~Kathan_Shah1;~Chawin_Sitawarin1", "gender": "M;M", "homepage": ";https://chawins.github.io/", "dblp": ";211/7105", "google_scholar": "xRpZ_sgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-4949-9661", "linkedin": "kathans/;chawins/", "or_profile": "~Kathan_Shah1;~Chawin_Sitawarin1", "aff": "University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu", "position": "Undergrad student;PhD student", "bibtex": "@inproceedings{\nshah2024spder,\ntitle={{SPDER}: Semiperiodic Damping-Enabled Object Representation},\nauthor={Kathan Shah and Chawin Sitawarin},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=92btneN9Wm}\n}", "github": "", "project": "", "reviewers": "TKxh;V9Ce;9hcy", "pdf_size": 4004295, "rating": "5;6;8", "confidence": "4;4;3", "soundness": "2;2;3", "contribution": "3;3;2", "presentation": "4;3;3", "wc_summary": "98;97;49", "wc_strengths": "134;44;76", "wc_weaknesses": "302;20;155", "wc_questions": "257;28;43", "wc_review": "791;189;323", "wc_reply_reviewers": "0;0;6", "wc_reply_authors": "582;183;558", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 81.33333333333333, 22.866763848189994 ], "wc_strengths_avg": [ 84.66666666666667, 37.249906785863985 ], "wc_weaknesses_avg": [ 159.0, 115.16075720487426 ], "wc_questions_avg": [ 109.33333333333333, 104.5955172185798 ], "wc_review_avg": [ 434.3333333333333, 258.0663136138117 ], "wc_reply_reviewers_avg": [ 2.0, 2.8284271247461903 ], "wc_reply_authors_avg": [ 441.0, 182.69646958822165 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12155969879033561286&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=92btneN9Wm", "pdf": "https://openreview.net/pdf?id=92btneN9Wm", "email": "berkeley.edu;berkeley.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "92yrETgM6G", "title": "Calibration Attack: A Framework For Adversarial Attacks Targeting Calibration", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce a new framework of adversarial attacks, named calibration attacks, in which the attacks are generated and organized to trap victim models to be miscalibrated without altering their original accuracy, hence seriously endangering the trustworthiness of the models and any decision-making based on their confidence scores. Specifically, we identify four novel forms of calibration attacks: underconfidence attacks, overconfidence attacks, maximum miscalibration attacks, and random confidence attacks, in both the black-box and white-box setups. We then test these new attacks on typical victim models with comprehensive datasets, demonstrating that even with a relatively low number of queries, the attacks can create significant calibration mistakes. We further provide detailed analyses to understand different aspects of calibration attacks. Building on that, we investigate the effectiveness of widely used adversarial defences and calibration methods against these types of attacks, which then inspires us to devise two novel defences against such calibration attacks.", "keywords": "robustness;calibration;deep learning;image classification;adversarial", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Stephen Obadinma;Xiaodan Zhu;Hongyu Guo", "authorids": "~Stephen_Obadinma1;~Xiaodan_Zhu1;~Hongyu_Guo1", "gender": "M;M;M", "homepage": ";http://www.xiaodanzhu.com;https://hongyuharryguo.github.io/", "dblp": "271/8187;93/310.html;", "google_scholar": "https://scholar.google.ca/citations?user=bRbQBNsAAAAJ;https://scholar.google.ca/citations?user=a6MYnuUAAAAJ;https://scholar.google.ca/citations?user=bZUqlakAAAAJ", "orcid": ";0000-0003-3856-3696;", "linkedin": ";xiaodan-zhu-066833101/?originalSubdomain=ca;harry-h-y-guo-a582087/", "or_profile": "~Stephen_Obadinma1;~Xiaodan_Zhu1;~Hongyu_Guo1", "aff": "Queen's University;Queen's University;National Research Council Canada", "aff_domain": "queensu.ca;queensu.ca;nrc-cnrc.gc.ca", "position": "PhD student;Associate Professor;Senior Research Officer", "bibtex": "@misc{\nobadinma2024calibration,\ntitle={Calibration Attack: A Framework For Adversarial Attacks Targeting Calibration},\nauthor={Stephen Obadinma and Xiaodan Zhu and Hongyu Guo},\nyear={2024},\nurl={https://openreview.net/forum?id=92yrETgM6G}\n}", "github": "", "project": "", "reviewers": "YLZS;BDLx;PTEC;LAwv", "site": "https://openreview.net/forum?id=92yrETgM6G", "pdf_size": 2622314, "rating": "1;5;5;5", "confidence": "3;3;3;4", "soundness": "3;4;3;3", "contribution": "1;3;2;2", "presentation": "1;2;3;3", "wc_summary": "112;121;106;62", "wc_strengths": "97;30;96;64", "wc_weaknesses": "476;3;168;144", "wc_questions": "178;243;65;46", "wc_review": "863;397;435;316", "wc_reply_reviewers": "247;0;0;0", "wc_reply_authors": "603;151;490;295", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.0, 1.7320508075688772 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 100.25, 22.71976012197312 ], "wc_strengths_avg": [ 71.75, 27.517040175135115 ], "wc_weaknesses_avg": [ 197.75, 172.57226747076137 ], "wc_questions_avg": [ 133.0, 81.11411714368838 ], "wc_review_avg": [ 502.75, 212.3845274496238 ], "wc_reply_reviewers_avg": [ 61.75, 106.95413736737817 ], "wc_reply_authors_avg": [ 384.75, 174.2159220622501 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13536272327202876115&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Queen's University;National Research Council Canada", "aff_unique_dep": ";", "aff_unique_url": "https://www.queensu.ca;https://www.nrc-cnrc.gc.ca", "aff_unique_abbr": "Queen's;NRC-CNRC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Hybrid Internal Model: Learning Agile Legged Locomotion with Simulated Robot Response", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19306", "id": "93LoCyww8o", "author_site": "Junfeng Long, ZiRui Wang, Quanyi Li, Liu Cao, Jiawei Gao, Jiangmiao Pang", "tldr": "", "abstract": "Robust locomotion control depends on accurate state estimations. However, the sensors of most legged robots can only provide partial and noisy observations, making the estimation particularly challenging, especially for external states like terrain frictions and elevation maps. Inspired by the classical Internal Model Control principle, we consider these external states as disturbances and introduce Hybrid Internal Model (HIM) to estimate them according to the response of the robot. The response, which we refer to as the hybrid internal embedding, contains the robot\u2019s explicit velocity and implicit stability representation, corresponding to two primary goals for locomotion tasks: explicitly tracking velocity and implicitly maintaining stability. We use contrastive learning to optimize the embedding to be close to the robot\u2019s successor state, in which the response is naturally embedded. HIM has several appealing benefits: It only needs the robot\u2019s proprioceptions, i.e., those from joint encoders and IMU as observations. It innovatively maintains consistent observations between simulation reference and reality that avoids information loss in mimicking learning. It exploits batch-level information that is more robust to noises and keeps better sample efficiency. It only requires 1 hour of training on an RTX 4090 to enable a quadruped robot to traverse any terrain under any disturbances. A wealth of real-world experiments demonstrates its agility, even in high-difficulty tasks and cases never occurred during the training process, revealing remarkable open-world generalizability.", "keywords": "Reinforcement Learning;Quadrupedal Locomotion;Internal Model", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "", "author": "Junfeng Long;ZiRui Wang;Quanyi Li;Liu Cao;Jiawei Gao;Jiangmiao Pang", "authorids": "~Junfeng_Long1;~ZiRui_Wang8;~Quanyi_Li1;~Liu_Cao1;~Jiawei_Gao1;~Jiangmiao_Pang1", "gender": "M;M;M;M;M;M", "homepage": "https://junfeng-long.github.io/;https://quanyili.github.io;https://github.com/xiaohu-art;https://gao-jiawei.com/;https://oceanpang.github.io/;https://github.com/Wongziseoi", "dblp": "343/2990;270/7691;;124/9335-4;231/7630;", "google_scholar": "olmfqBEAAAAJ;Ty49X3UAAAAJ;;NJxUNrcAAAAJ;https://scholar.google.com/citations?authuser=0;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0001-7047-4963;;;;0000-0002-6711-9319;", "linkedin": ";https://www.linkedin.com/mwlite/in/quanyi-li-2b7985183;;;;", "or_profile": "~Junfeng_Long1;~Quanyi_Li1;~Liu_Cao1;~Jiawei_Gao1;~Jiangmiao_Pang1;~ZiRui_Wang4", "aff": "Shanghai AI Laboratory;University of Edinburgh;Tsinghua University;Tsinghua University;Shanghai AI Laboratory ;Shanghai Artificial Intelligence Laboratory", "aff_domain": "pjlab.org.cn;ed.ac.uk;tsinghua.edu.cn;tsinghua.edu.cn;pjlab.org.cn;pjlab.org.cn", "position": "Researcher;MS student;Undergrad student;Undergrad student;Research Scientist;Intern", "bibtex": "@inproceedings{\nlong2024hybrid,\ntitle={Hybrid Internal Model: Learning Agile Legged Locomotion with Simulated Robot Response},\nauthor={Junfeng Long and ZiRui Wang and Quanyi Li and Liu Cao and Jiawei Gao and Jiangmiao Pang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=93LoCyww8o}\n}", "github": "", "project": "", "reviewers": "9L23;FQfg;SBNA;Yqja", "pdf_size": 8357957, "rating": "5;6;6;8", "confidence": "5;3;4;4", "soundness": "1;3;3;3", "contribution": "2;3;2;3", "presentation": "1;2;2;3", "wc_summary": "56;67;93;35", "wc_strengths": "48;103;34;15", "wc_weaknesses": "284;67;140;54", "wc_questions": "78;81;11;101", "wc_review": "466;318;278;205", "wc_reply_reviewers": "30;0;12;10", "wc_reply_authors": "2790;353;1921;393", "reply_reviewers": "1;0;1;1", "reply_authors": "6;3;6;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.75, 20.90902915010642 ], "wc_strengths_avg": [ 50.0, 32.76430985081175 ], "wc_weaknesses_avg": [ 136.25, 91.38483189238792 ], "wc_questions_avg": [ 67.75, 33.9365216249397 ], "wc_review_avg": [ 316.75, 95.21915511072339 ], "wc_reply_reviewers_avg": [ 13.0, 10.816653826391969 ], "wc_reply_authors_avg": [ 1364.25, 1037.8688199864182 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 1.7853571071357126 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16640877501129933135&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=93LoCyww8o", "pdf": "https://openreview.net/pdf?id=93LoCyww8o", "email": "pjlab.org.cn;ed.ac.uk;tsinghua.edu.cn;tsinghua.edu.cn;pjlab.org.cn;pjlab.org.cn", "author_num": 6, "aff_unique_index": "0;1;2;2;0;3", "aff_unique_norm": "Shanghai AI Laboratory;University of Edinburgh;Tsinghua University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.ed.ac.uk;https://www.tsinghua.edu.cn;http://www.shailab.org/", "aff_unique_abbr": "SAIL;Edinburgh;THU;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "94FKDbtTqO", "title": "Rethinking the bert-like pretraining for dna sequences", "track": "main", "status": "Reject", "tldr": "", "abstract": "With the success of large-scale pretraining in NLP, there is an increasing trend of applying it to the domain of life sciences. In particular, pretraining methods based on DNA sequences have garnered growing attention due to their potential to capture generic information about genes. However, existing pretraining methods for DNA sequences largely rely on direct adoptions of BERT pretraining from NLP, lacking a comprehensive understanding and a specifically tailored approach. To address this research gap, we first conducted a series of exploratory experiments and gained several insightful observations: 1) In the fine-tuning phase of downstream tasks, when using K-mer overlapping tokenization instead of K-mer non-overlapping tokenization, both overlapping and non-overlapping pretraining weights show consistent performance improvement.\n2) During the pre-training process, using K-mer overlapping tokenization quickly produces clear K-mer embeddings and reduces the loss to a very low level, while using K-mer non-overlapping tokenization results in less distinct embeddings and continuously decreases the loss. 3) Using overlapping tokenization causes the self-attention in the intermediate layers of pre-trained models to tend to overly focus on certain tokens, reflecting that these layers are not adequately optimized. In summary, overlapping tokenization can benefit the fine-tuning of downstream tasks but leads to inadequate pretraining with fast convergence. To unleash the pretraining potential, we introduce a novel approach called RandomMask, which gradually increases the task difficulty of BERT-like pretraining by continuously expanding its mask boundary, forcing the model to learn more knowledge. RandomMask is simple but effective, achieving top-tier performance across 26 datasets spanning 7 downstream tasks. For example, RandomMask achieves a staggering 65.83\\% in Matthew's correlation coefficient for epigenetic mark prediction, which is a groundbreaking increase of 14.02\\% over the baseline and a remarkable 4.82\\% improvement over the SOTA results.", "keywords": "Pretrained;DNA;Large Language Model", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/4d7404012286f7bdac17ee876cdeec6b8ef75a04.pdf", "author": "chaoqi liang;Weiqiang Bai;Lifeng Qiao;Yuchen Ren;Jianle Sun;Peng Ye;Hongliang Yan;Xinzhu Ma;Wangmeng Zuo;Wanli Ouyang", "authorids": "~chaoqi_liang1;~Weiqiang_Bai1;~Lifeng_Qiao1;~Yuchen_Ren1;~Jianle_Sun1;~Peng_Ye4;~Hongliang_Yan1;~Xinzhu_Ma1;~Wangmeng_Zuo3;~Wanli_Ouyang1", "gender": "M;;M;;M;M;M;M;M;", "homepage": "https://github.com/ChaoqiLiang;;https://github.com/qiaoqiaoLF;;https://sjl-sjtu.github.io/;;;https://github.com/xinzhuma;;", "dblp": "320/0293;;55/10318;;307/2312;53/930-6;03/8409;191/3902;93/2671;", "google_scholar": "r1yke4EAAAAJ;;;;sRFyIxAAAAAJ;UEZZP5QAAAAJ;Obo7-bIAAAAJ;8PuKa_8AAAAJ;rUOpCEYAAAAJ;", "orcid": ";;;;0000-0002-0001-0992;0000-0002-8486-7562;;;0000-0002-3330-783X;", "linkedin": ";;;;;;;;;", "or_profile": "~chaoqi_liang1;~Weiqiang_Bai1;~Lifeng_Qiao1;~Yuchen_Ren1;~Jianle_Sun1;~Peng_Ye4;~Hongliang_Yan1;~Xinzhu_Ma1;~Wangmeng_Zuo3;~Wanli_Ouyang1", "aff": "Harbin Institute of Technology;;Shanghai Jiaotong University;;Shanghai Jiaotong University;Fudan University;Shanghai Artificial Intelligence Lab;The Chinese University of Hong Kong;Harbin Institute of Technology;", "aff_domain": "hit.edu.cn;;sjtu.edu.cn;;sjtu.edu.cn;fudan.edu.cn;pjlab.org.cn;cuhk.edu.hk;hit.edu.cn;", "position": "PhD student;;Undergrad student;;MS student;PhD student;Postdoc;Postdoc;Full Professor;", "bibtex": "@misc{\nliang2024rethinking,\ntitle={Rethinking the bert-like pretraining for dna sequences},\nauthor={chaoqi liang and Weiqiang Bai and Lifeng Qiao and Yuchen Ren and Jianle Sun and Peng Ye and Hongliang Yan and Xinzhu Ma and Wangmeng Zuo and Wanli Ouyang},\nyear={2024},\nurl={https://openreview.net/forum?id=94FKDbtTqO}\n}", "github": "", "project": "", "reviewers": "g7iT;kyqC;45WA;SHyo", "site": "https://openreview.net/forum?id=94FKDbtTqO", "pdf_size": 1883340, "rating": "3;6;6;6", "confidence": "3;3;3;4", "soundness": "3;2;4;2", "contribution": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "300;59;97;178", "wc_strengths": "50;6;142;68", "wc_weaknesses": "382;137;166;30", "wc_questions": "3;3;57;56", "wc_review": "735;205;462;332", "wc_reply_reviewers": "128;0;18;0", "wc_reply_authors": "1697;920;1181;945", "reply_reviewers": "1;0;1;0", "reply_authors": "4;2;3;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 158.5, 92.31061694084815 ], "wc_strengths_avg": [ 66.5, 49.07901792008475 ], "wc_weaknesses_avg": [ 178.75, 127.81113996831418 ], "wc_questions_avg": [ 29.75, 26.75233634656981 ], "wc_review_avg": [ 433.5, 196.3600010185374 ], "wc_reply_reviewers_avg": [ 36.5, 53.33619783974107 ], "wc_reply_authors_avg": [ 1185.75, 312.2429943169262 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:awX9quWX0vYJ:scholar.google.com/&scioq=Rethinking+the+bert-like+pretraining+for+dna+sequences&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;3;4;0", "aff_unique_norm": "Harbin Institute of Technology;Shanghai Jiao Tong University;Fudan University;Shanghai Artificial Intelligence Lab;Chinese University of Hong Kong", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.sjtu.edu.cn;https://www.fudan.edu.cn;https://www.shailab.org;https://www.cuhk.edu.hk", "aff_unique_abbr": "HIT;SJTU;Fudan;Shanghai AI Lab;CUHK", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Harbin;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9528xxcT7h", "title": "Two Heads are Better than One: Towards Better Adversarial Robustness by Combining Transduction and Rejection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Both transduction and rejection have emerged as important techniques for defending against adversarial perturbations. A recent work by Tram\u00e8r showed that, in the rejection-only case (no transduction), a strong rejection-solution can be turned into a strong (but computationally inefficient) non-rejection solution. This detector-to-classifier reduction has been mostly applied to give evidence that certain claims of strong selective-model solutions are susceptible, leaving the benefits of rejection unclear. On the other hand, a recent work by Goldwasser et al. showed that rejection combined with transduction can give provable guarantees (for certain problems) that cannot be achieved otherwise. Nevertheless, under recent strong adversarial attacks (GMSA, which has been shown to be much more effective than AutoAttack against transduction), Goldwasser et al.'s work was shown to have low performance in a practical deep-learning setting. In this paper, we take a step towards realizing the promise of transduction+rejection in more realistic scenarios. Theoretically, we show that a novel application of Tram\u00e8r's classifier-to-detector technique in the transductive setting can give significantly improved sample-complexity for robust generalization. While our theoretical construction is computationally inefficient, it guides us to identify an efficient transductive algorithm to learn a selective model. Extensive experiments using state of the art attacks (AutoAttack, GMSA) show that our solutions provide significantly better robust accuracy.", "keywords": "Adversarial robustness;Transductive machine learning;Rejection;Selective classification", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/756840c278f6b75cb543649e00bba7f20708cd4c.zip", "author": "Nils Palumbo;Yang Guo;Xi Wu;Jiefeng Chen;Yingyu Liang;Somesh Jha", "authorids": "~Nils_Palumbo1;~Yang_Guo4;~Xi_Wu1;~Jiefeng_Chen2;~Yingyu_Liang1;~Somesh_Jha1", "gender": ";M;M;M;;M", "homepage": ";;http://andrewxiwu.github.io/;https://jfc43.github.io/;;", "dblp": "258/3557;;37/4465-1;199/3381;;j/SomeshJha", "google_scholar": "vXBD3Q8AAAAJ;BbQQEPcAAAAJ;OmmxazMAAAAJ;5mOfQfAAAAAJ;;BaI7l8QAAAAJ", "orcid": ";;;;;", "linkedin": ";;;jiefeng-chen-aa1769122/;;", "or_profile": "~Nils_Palumbo1;~Yang_Guo4;~Xi_Wu1;~Jiefeng_Chen2;~Yingyu_Liang1;~Somesh_Jha1", "aff": "University of Wisconsin - Madison;;Google;Amazon;;Department of Computer Science, University of Wisconsin, Madison", "aff_domain": "wisc.edu;;google.com;amazon.com;;cs.wisc.edu", "position": "PhD student;;Software Engineer;Applied Scientist;;Full Professor", "bibtex": "@misc{\npalumbo2024two,\ntitle={Two Heads are Better than One: Towards Better Adversarial Robustness by Combining Transduction and Rejection},\nauthor={Nils Palumbo and Yang Guo and Xi Wu and Jiefeng Chen and Yingyu Liang and Somesh Jha},\nyear={2024},\nurl={https://openreview.net/forum?id=9528xxcT7h}\n}", "github": "", "project": "", "reviewers": "xAQ7;XrZg;qXLN;kR75", "site": "https://openreview.net/forum?id=9528xxcT7h", "pdf_size": 2247755, "rating": "6;6;6;8", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "contribution": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "94;224;95;75", "wc_strengths": "113;55;48;57", "wc_weaknesses": "255;201;119;148", "wc_questions": "71;5;24;5", "wc_review": "533;485;286;285", "wc_reply_reviewers": "0;0;0;12", "wc_reply_authors": "575;360;473;444", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 122.0, 59.42642509860407 ], "wc_strengths_avg": [ 68.25, 26.05163142684158 ], "wc_weaknesses_avg": [ 180.75, 51.982569193913456 ], "wc_questions_avg": [ 26.25, 26.975683494584526 ], "wc_review_avg": [ 397.25, 113.0317986232193 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 463.0, 76.834237160266 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Tq19TOg1zpQJ:scholar.google.com/&scioq=Two+Heads+are+Better+than+One:+Towards+Better+Adversarial+Robustness+by+Combining+Transduction+and+Rejection&hl=en&as_sdt=0,21", "gs_version_total": 3, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Wisconsin-Madison;Google;Amazon", "aff_unique_dep": ";Google;Amazon.com, Inc.", "aff_unique_url": "https://www.wisc.edu;https://www.google.com;https://www.amazon.com", "aff_unique_abbr": "UW-Madison;Google;Amazon", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Madison;Mountain View;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "95ObXevgHx", "title": "The Temporal Structure of Language Processing in the Human Brain Corresponds to The Layered Hierarchy of Deep Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep Language Models (DLMs) provide a novel computational paradigm for understanding the mechanisms of natural language processing in the human brain. Unlike traditional psycholinguistic models, DLMs use layered sequences of continuous numerical vectors to represent words and context, allowing a plethora of emerging applications such as human-like text generation. \nIn this paper we show evidence that the layered hierarchy of DLMs may be used to model the temporal dynamics of language comprehension in the brain by demonstrating a strong correlation between DLM layer depth and \nthe time at which layers are most predictive of the human brain.\nOur ability to temporally resolve individual layers benefits from our use of electrocorticography (ECoG) data, which has a much higher temporal resolution than noninvasive methods like fMRI. Using ECoG, we record neural activity from participants listening to a 30-minute narrative while also feeding the same narrative to a high-performing DLM (GPT2-XL). We then extract contextual embeddings from the different layers of the DLM and use linear encoding models to predict neural activity. We first focus on the Inferior Frontal Gyrus (IFG, or Broca's area) and then extend our model to track the increasing temporal receptive window along the linguistic processing hierarchy from auditory to syntactic and semantic areas. \nOur results reveal a connection between human language processing and DLMs, with the DLM's layer-by-layer accumulation of contextual information mirroring the timing of neural activity in high-order language areas.", "keywords": "natural language processing;NLP;neuroscience;cognitive science;deep language models;GPT2", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/a0c98662662e4a5c55ff3d4366101e06c2b94617.zip", "author": "Ariel Goldstein;Eric Ham;Mariano Schain;Samuel Nastase;Zaid Zada;Avigail Dabush;Bobbi Aubrey;Harshvardhan Gazula;Amir Feder;Werner K Doyle;Sasha Devore;Patricia Dugan;Daniel Friedman;Roi Reichart;Michael Brenner;Avinatan Hassidim;Orrin Devinsky;Adeen Flinker;Omer Levy;Uri Hasson", "authorids": "~Ariel_Goldstein1;~Eric_Ham1;~Mariano_Schain1;~Samuel_Nastase1;~Zaid_Zada1;~Avigail_Dabush1;~Bobbi_Aubrey1;~Harshvardhan_Gazula1;~Amir_Feder1;~Werner_K_Doyle1;~Sasha_Devore1;~Patricia_Dugan1;~Daniel_Friedman2;~Roi_Reichart1;~Michael_Brenner1;~Avinatan_Hassidim3;~Orrin_Devinsky1;~Adeen_Flinker1;~Omer_Levy1;~Uri_Hasson1", "gender": "M;M;;M;M;;;;;M;;F;M;M;;;M;M;M;M", "homepage": "https://www.deepcognitionlab.com/;;;https://snastase.github.io/;https://zaidzada.com;https://www.linkedin.com/in/avigail-dabush-grinstein-546a15226/;;;https://www.amirfeder.com/;http://neuroviewtech.com;https://med.nyu.edu/faculty/sasha-devore;https://nyulangone.org/doctors/1467789107/patricia-c-dugan;;https://roireichart.com/;https://brennergroup.seas.harvard.edu;;;https://flinkerlab.org;;https://hassonlab.princeton.edu/", "dblp": ";;;165/6494;;;;;214/3604;;;;73/1093;96/5429;;;;;117/4866;", "google_scholar": "p8hQgVuVOTgC;Ts4PsekAAAAJ;;tjLH8mQAAAAJ;RC_hwc0AAAAJ;;pryO0XYAAAAJ;;ERwoPLIAAAAJ;;;;CMIm2eUAAAAJ;https://scholar.google.co.il/citations?user=xXJIsh4AAAAJ;;;25Q74uMAAAAJ;Us1mDooAAAAJ;PZVd2h8AAAAJ;VRw8v4kAAAAJ", "orcid": ";0009-0004-1453-7180;;0000-0001-7013-5275;0000-0002-3096-0059;;;;0000-0001-5472-1135;;;0000-0001-6199-1870;0000-0003-1068-1797;;;;;0000-0003-1247-1283;0000-0001-7300-8191;", "linkedin": ";eric-ham-1b10ab12a/;;;zzada/;;;;amir-feder-b65b7035/;;;;;roi-reichart-ba2a8a7/;;;;;;", "or_profile": "~Ariel_Goldstein1;~Eric_Ham1;~Mariano_Schain1;~Samuel_Nastase1;~Zaid_Zada1;~Avigail_Dabush1;~Bobbi_Aubrey1;~Harshvardhan_Gazula1;~Amir_Feder1;~Werner_K_Doyle1;~Sasha_Devore1;~Patricia_Dugan1;~Daniel_Friedman2;~Roi_Reichart1;~Michael_Brenner1;~Avinatan_Hassidim3;~Orrin_Devinsky1;~Adeen_Flinker1;~Omer_Levy1;~Uri_Hasson1", "aff": "Hebrew University of Jerusalem;Gladstone Institutes;;Princeton University;Princeton University;;Princeton University;;Google;New York University;NYU Langone;NYU Grossman School of Medicine;NYU Langone;Technion, Israel Institute of Technology;Harvard University;;;New York University;Tel Aviv University;", "aff_domain": "huji.ac.il;gladstone.ucsf.edu;;princeton.edu;princeton.edu;;princeton.edu;;google.com;nyu.edu;nyumc.org;nyulangone.org;nyumc.org;technion.ac.il;fas.harvard.edu;;;nyu.edu;tau.ac.il;", "position": "Assistant Professor;Researcher;;Postdoc;PhD student;;Researcher;;Researcher;Associate Professor;Associate Professor;Associate Professor;Full Professor;Associate Professor;Professor;;;Assistant Professor;Senior Lecturer;", "bibtex": "@misc{\ngoldstein2024the,\ntitle={The Temporal Structure of Language Processing in the Human Brain Corresponds to The Layered Hierarchy of Deep Language Models},\nauthor={Ariel Goldstein and Eric Ham and Mariano Schain and Samuel Nastase and Zaid Zada and Avigail Dabush and Bobbi Aubrey and Harshvardhan Gazula and Amir Feder and Werner K Doyle and Sasha Devore and Patricia Dugan and Daniel Friedman and Roi Reichart and Michael Brenner and Avinatan Hassidim and Orrin Devinsky and Adeen Flinker and Omer Levy and Uri Hasson},\nyear={2024},\nurl={https://openreview.net/forum?id=95ObXevgHx}\n}", "github": "", "project": "", "reviewers": "MbTk;4Wkk;r5Ne;4TH4;rNmu", "site": "https://openreview.net/forum?id=95ObXevgHx", "pdf_size": 18254906, "rating": "3;6;6;8;8", "confidence": "4;3;5;4;4", "soundness": "2;3;3;4;3", "contribution": "2;3;3;3;3", "presentation": "2;3;3;4;4", "wc_summary": "157;79;165;259;94", "wc_strengths": "25;85;189;99;77", "wc_weaknesses": "137;125;165;129;30", "wc_questions": "119;65;103;417;9", "wc_review": "438;354;622;904;210", "wc_reply_reviewers": "260;19;0;0;0", "wc_reply_authors": "1951;1079;1119;1514;739", "reply_reviewers": "1;1;0;0;0", "reply_authors": "4;3;2;2;2", "rating_avg": [ 6.2, 1.8330302779823362 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "contribution_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 150.8, 63.76331233554292 ], "wc_strengths_avg": [ 95.0, 53.246596135339956 ], "wc_weaknesses_avg": [ 117.2, 45.783839943805496 ], "wc_questions_avg": [ 142.6, 142.33144417169382 ], "wc_review_avg": [ 505.6, 239.66777004845684 ], "wc_reply_reviewers_avg": [ 55.8, 102.36483771295687 ], "wc_reply_authors_avg": [ 1280.4, 415.69200136639625 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 20, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12394723397824789272&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;2;2;3;4;5;6;5;7;8;4;9", "aff_unique_norm": "Hebrew University of Jerusalem;Gladstone Institutes;Princeton University;Google;New York University;NYU Langone Health;New York University Grossman School of Medicine;Israel Institute of Technology;Harvard University;Tel Aviv University", "aff_unique_dep": ";;;Google;;;School of Medicine;;;", "aff_unique_url": "https://www.huji.ac.il;https://www.gladstone.org;https://www.princeton.edu;https://www.google.com;https://www.nyu.edu;https://nyulangone.org;https://med.nyu.edu;https://www.technion.ac.il/en/;https://www.harvard.edu;https://www.tau.ac.il", "aff_unique_abbr": "HUJI;;Princeton;Google;NYU;NYU Langone;NYU Grossman SOM;Technion;Harvard;TAU", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Jerusalem;;Mountain View;New York", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;1;0;1;1;0", "aff_country_unique": "Israel;United States" }, { "id": "95joD3Yc5t", "title": "Generative Semantic Communication: Diffusion Models Beyond Bit Recovery", "track": "main", "status": "Reject", "tldr": "", "abstract": "Semantic communication is expected to be one of the cores of next-generation AI-based communications. One of the possibilities offered by semantic communication is the capability to regenerate, at the destination side, images or videos semantically equivalent to the transmitted ones, without necessarily recovering the transmitted sequence of bits. The current solutions still lack the ability to build complex scenes from the received partial information. Clearly, there is an unmet need to balance the effectiveness of generation methods and the complexity of the transmitted information, possibly taking into account the goal of communication. In this paper, we aim to bridge this gap by proposing a novel generative diffusion-guided framework for semantic communication that leverages the strong abilities of diffusion models in synthesizing multimedia content while preserving semantic features. Concurrently, we propose a novel strategy to make diffusion models resilient to corrupted conditioning data, avoiding that heavily noise-affected conditioning may mislead the generation process. We reduce bandwidth usage by sending highly-compressed semantic information only. Then, the diffusion model learns to synthesize semantic-consistent scenes from such semantic information.\nWe prove, through an in-depth assessment of multiple scenarios, that our method outperforms existing solutions in generating high-quality images with preserved semantic information even in cases where the received conditioning content is significantly degraded. More specifically, our results show that objects, locations, and depths are still recognizable even in the presence of extremely noisy conditions of the communication channel.", "keywords": "Semantic image synthesis;Diffusion models;Deep generative models;Semantic communication", "primary_area": "generative models", "supplementary_material": "/attachment/b8cdcaeebef29d66f185b12dc0c713cd06e7dbc1.zip", "author": "Eleonora Grassucci;Sergio Barbarossa;Danilo Comminiello", "authorids": "~Eleonora_Grassucci1;~Sergio_Barbarossa1;~Danilo_Comminiello1", "gender": "F;;M", "homepage": "https://sites.google.com/uniroma1.it/eleonoragrassucci/home-page;https://sites.google.com/a/uniroma1.it/sergiobarbarossa/;https://danilocomminiello.site.uniroma1.it/", "dblp": "275/6348;66/426;33/9433", "google_scholar": "https://scholar.google.it/citations?user=Jcv0TgQAAAAJ;https://scholar.google.it/citations?hl=it;https://scholar.google.it/citations?user=H3Y52cMAAAAJ", "orcid": "0000-0003-4626-4506;;0000-0003-4067-4504", "linkedin": ";;danilocomminiello/", "or_profile": "~Eleonora_Grassucci1;~Sergio_Barbarossa1;~Danilo_Comminiello1", "aff": "Sapienza University of Rome;University of Roma \"La Sapienza\";Sapienza University of Rome", "aff_domain": "uniroma1.it;uniroma1.it;uniroma1.it", "position": "Assistant Professor;Full Professor;Associate Professor", "bibtex": "@misc{\ngrassucci2024generative,\ntitle={Generative Semantic Communication: Diffusion Models Beyond Bit Recovery},\nauthor={Eleonora Grassucci and Sergio Barbarossa and Danilo Comminiello},\nyear={2024},\nurl={https://openreview.net/forum?id=95joD3Yc5t}\n}", "github": "", "project": "", "reviewers": "ckTn;4cUJ;q6SF;M1DV", "site": "https://openreview.net/forum?id=95joD3Yc5t", "pdf_size": 26810978, "rating": "3;5;5;6", "confidence": "2;3;4;3", "soundness": "2;3;2;3", "contribution": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "82;65;48;101", "wc_strengths": "38;40;62;86", "wc_weaknesses": "146;179;164;136", "wc_questions": "34;2;23;63", "wc_review": "300;286;297;386", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "924;823;925;1348", "reply_reviewers": "0;0;0;0", "reply_authors": "4;3;4;4", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.0, 19.685019685029527 ], "wc_strengths_avg": [ 56.5, 19.461500456028563 ], "wc_weaknesses_avg": [ 156.25, 16.528384676065595 ], "wc_questions_avg": [ 30.5, 22.005681084665387 ], "wc_review_avg": [ 317.25, 40.03357965508455 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1005.0, 202.32029062849827 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6442150188972160068&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Sapienza University of Rome;University of Rome La Sapienza", "aff_unique_dep": ";", "aff_unique_url": "https://www.uniroma1.it;https://www.uniroma1.it", "aff_unique_abbr": "Sapienza;La Sapienza", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Rome", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "id": "96UB3vQpAA", "title": "Fast Learning in Balanced Deep Spiking Neural Networks with Strong and Weak Synapses", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The intricate neural dynamics of the cerebral cortex are often characterized in terms of the delicate balance between excitation and inhibition (E-I balance). While numerous studies have delved into its functional implications, one fundamental issue has remained unresolved -- namely, _the unstructured, random connections posed by E-I balance dynamics versus the necessity for structured neural connections to fulfill specific computational tasks_. This raises the crucial question: How can neural circuits reconcile these seemingly contradictory demands? Drawing inspirations from recent data in neuroscience, we propose a biologically grounded spiking neural network. This network incorporates two distinct sets of synaptic connections, one featuring strong synapses dedicated to maintaining the balance condition, and the other comprising weak synapses utilized for neural computation. Crucially, only the weak synapses undergo training, while the strong synapses remain fixed. Interestingly, we have discovered that this architecture not only resolves the structural conflicts, but also offers several compelling computational advantages. Firstly, the E-I balance dynamics mediated by strong synapses can closely mimic the function of normalization operations, effectively alleviating the internal covariate shift problem. Secondly, we have observed that weak synapses remain weak during training without any imposed constraints, thus preserving the balance condition established by the strong synapses. Lastly, the coexistence of strong and weak synapses allows for a seamless transition from the \"lazy\" learning regime, characterized by the primary training of readout weights, to the \"rich\" learning regime, marked by alterations in neural representations. We believe this study can shed light on how structured computations can coexist with unstructured E-I balance dynamics and offer novel perspectives on the computational advantages of E-I balance.", "keywords": "excitation-inhibition balance;spiking neural networks;brain-inspired;neuroscience", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Xiaohan Lin;Chaoming Wang;Boxin Shi;Si Wu", "authorids": "~Xiaohan_Lin1;~Chaoming_Wang1;~Boxin_Shi3;~Si_Wu1", "gender": ";M;M;M", "homepage": ";https://brainpy.tech/;http://camera.pku.edu.cn;https://mgv.pku.edu.cn/english/people/lbd/soeeace/267528.htm", "dblp": ";;69/783;25/437-1", "google_scholar": ";;K1LjZxcAAAAJ;", "orcid": ";;0000-0001-6749-0364;", "linkedin": ";;;", "or_profile": "~Xiaohan_Lin1;~Chaoming_Wang1;~Boxin_Shi3;~Si_Wu1", "aff": ";;Peking University;Peking University", "aff_domain": ";;pku.edu.cn;pku.edu.cn", "position": ";;Assistant Professor;Full Professor", "bibtex": "@misc{\nlin2024fast,\ntitle={Fast Learning in Balanced Deep Spiking Neural Networks with Strong and Weak Synapses},\nauthor={Xiaohan Lin and Chaoming Wang and Boxin Shi and Si Wu},\nyear={2024},\nurl={https://openreview.net/forum?id=96UB3vQpAA}\n}", "github": "", "project": "", "reviewers": "W5kj;VbNy;aXEa;yPXB", "site": "https://openreview.net/forum?id=96UB3vQpAA", "pdf_size": 703402, "rating": "1;3;6;8", "confidence": "5;4;4;4", "soundness": "2;1;3;4", "contribution": "1;2;4;3", "presentation": "2;1;3;4", "wc_summary": "110;116;37;67", "wc_strengths": "36;23;45;89", "wc_weaknesses": "140;513;198;55", "wc_questions": "1;2;151;218", "wc_review": "287;654;431;429", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 2.692582403567252 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 82.5, 32.361242250568814 ], "wc_strengths_avg": [ 48.25, 24.79289212657531 ], "wc_weaknesses_avg": [ 226.5, 173.05273762642415 ], "wc_questions_avg": [ 93.0, 94.5171942029597 ], "wc_review_avg": [ 450.25, 131.32664428820223 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7504787743864564, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oRMmFyWEFDMJ:scholar.google.com/&scioq=Fast+Learning+in+Balanced+Deep+Spiking+Neural+Networks+with+Strong+and+Weak+Synapses&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "96nX9xIIx2", "title": "Visual Prompting Upgrades Neural Network Sparsification: A Data-Model Perspective", "track": "main", "status": "Reject", "tldr": "", "abstract": "The rapid development of large-scale deep learning models questions the affordability of hardware platforms, which necessitates the pruning to reduce their computational and memory footprints. Sparse neural networks as the product, have demonstrated numerous favorable benefits like low complexity, undamaged generalization, $\\textit{etc}$. Most of the prominent pruning strategies are invented from a $\\textit{model-centric}$ perspective, focusing on searching and preserving crucial weights by analyzing network topologies. However, the role of data and its interplay with model-centric pruning has remained relatively unexplored. In this research, we introduce a novel $\\textit{data-model co-design}$ perspective: to promote superior weight sparsity by learning important model topology and adequate input data in a synergetic manner. Specifically, customized $\\textbf{V}$isual $\\textbf{P}$rompts are mounted to upgrade neural $\\textbf{N}$etwork $\\textbf{s}$parsification in our proposed $\\textbf{\\texttt{VPNs}}$ framework. As a pioneering effort, this paper conducts systematic investigations about the impact of different visual prompts on model pruning and suggests an effective joint optimization approach. Extensive experiments with $3$ network architectures and $8$ datasets evidence the substantial performance improvements from $\\textbf{\\texttt{VPNs}}$ over existing start-of-the-art pruning algorithms. Furthermore, we find that subnetworks discovered by $\\textbf{\\texttt{VPNs}}$ from pre-trained models enjoy better transferability across diverse downstream scenarios. These insights shed light on new promising possibilities of data-model co-designs for vision model sparsification. Codes are in the supplement.", "keywords": "neural network sparsification;visual prompt", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/7b2339ecfd6931cea6805aafbe426323e33c3767.zip", "author": "Can Jin;Tianjin Huang;Yihua Zhang;Mykola Pechenizkiy;Sijia Liu;Shiwei Liu;Tianlong Chen", "authorids": "~Can_Jin1;~Tianjin_Huang1;~Yihua_Zhang1;~Mykola_Pechenizkiy1;~Sijia_Liu1;~Shiwei_Liu2;~Tianlong_Chen1", "gender": "M;M;M;M;M;M;M", "homepage": "https://jincan333.github.io/;https://research.tue.nl/nl/persons/tianjin-huang;https://yihua-zhang.com;http://www.win.tue.nl/~mpechen/;https://lsjxjtu.github.io/;https://shiweiliuiiiiiii.github.io/;https://tianlong-chen.github.io", "dblp": ";189/3972;;37/4649;128/6972-1;234/8697-3.html;", "google_scholar": "RK-8dz0AAAAJ;https://scholar.google.co.uk/citations?user=yFLmPsoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=F0uFT_kAAAAJ;C7dO_UgAAAAJ;73IbXtsAAAAJ;LE3ctn0AAAAJ", "orcid": "0009-0007-3407-1658;;;0000-0003-4955-0743;;;0000-0001-7774-8197", "linkedin": ";;zhangyihua/;mpechen/;;;tianlong-chen-783862167/", "or_profile": "~Can_Jin1;~Tianjin_Huang1;~Yihua_Zhang1;~Mykola_Pechenizkiy1;~Sijia_Liu1;~Shiwei_Liu2;~Tianlong_Chen1", "aff": "Rutgers University;University of Exeter;Michigan State University;Eindhoven University of Technology;Michigan State University;University of Oxford;Harvard University", "aff_domain": "rutgers.edu;exeter.ac.uk;msu.edu;tue.nl;msu.edu;ox.ac.uk;harvard.edu", "position": "PhD student;Lecturer;PhD student;Full Professor;Assistant Professor;Postdoc;Postdoc", "bibtex": "@misc{\njin2024visual,\ntitle={Visual Prompting Upgrades Neural Network Sparsification: A Data-Model Perspective},\nauthor={Can Jin and Tianjin Huang and Yihua Zhang and Mykola Pechenizkiy and Sijia Liu and Shiwei Liu and Tianlong Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=96nX9xIIx2}\n}", "github": "", "project": "", "reviewers": "eRqj;hiB3;sGzb;8b5d", "site": "https://openreview.net/forum?id=96nX9xIIx2", "pdf_size": 4569192, "rating": "3;5;5;5", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "4;3;3;4", "wc_summary": "39;43;92;104", "wc_strengths": "24;40;40;83", "wc_weaknesses": "115;273;56;137", "wc_questions": "2;55;8;153", "wc_review": "180;411;196;477", "wc_reply_reviewers": "361;196;0;11", "wc_reply_authors": "2971;1947;1204;995", "reply_reviewers": "5;1;0;1", "reply_authors": "7;5;4;4", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 69.5, 28.848743473503312 ], "wc_strengths_avg": [ 46.75, 21.924586655168667 ], "wc_weaknesses_avg": [ 145.25, 79.48073665989766 ], "wc_questions_avg": [ 54.5, 60.45866356445534 ], "wc_review_avg": [ 316.0, 130.23248442688944 ], "wc_reply_reviewers_avg": [ 142.0, 148.49410762720518 ], "wc_reply_authors_avg": [ 1779.25, 773.6873964980947 ], "reply_reviewers_avg": [ 1.75, 1.920286436967152 ], "reply_authors_avg": [ 5.0, 1.224744871391589 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=654969446584179655&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;2;4;5", "aff_unique_norm": "Rutgers University;University of Exeter;Michigan State University;Eindhoven University of Technology;University of Oxford;Harvard University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.rutgers.edu;https://www.exeter.ac.uk;https://www.msu.edu;https://www.tue.nl;https://www.ox.ac.uk;https://www.harvard.edu", "aff_unique_abbr": "Rutgers;Exeter;MSU;TU/e;Oxford;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0;1;0", "aff_country_unique": "United States;United Kingdom;Netherlands" }, { "title": "Alt-Text with Context: Improving Accessibility for Images on Twitter", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19305", "id": "97Dl82avFs", "author_site": "Nikita Srivatsan, Sofia Samaniego, Omar Florez, Taylor Berg-Kirkpatrick", "tldr": "", "abstract": "In this work we present an approach for generating alternative text (or alt-text) descriptions for images shared on social media, specifically Twitter. More than just a special case of image captioning, alt-text is both more literally descriptive and context-specific. Also critically, images posted to Twitter are often accompanied by user-written text that despite not necessarily describing the image may provide useful context that if properly leveraged can be informative. We address this task with a multimodal model that conditions on both textual information from the associated social media post as well as visual signal from the image, and demonstrate that the utility of these two information sources stacks. We put forward a new dataset of 371k images paired with alt-text and tweets scraped from Twitter and evaluate on it across a variety of automated metrics as well as human evaluation. We show that our approach of conditioning on both tweet text and visual information significantly outperforms prior work, by more than 2x on BLEU@4.", "keywords": "alt-text;social media;twitter;clip;computer vision;image captioning;accessibility", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Nikita Srivatsan;Sofia Samaniego;Omar Florez;Taylor Berg-Kirkpatrick", "authorids": "~Nikita_Srivatsan1;~Sofia_Samaniego1;~Omar_Florez1;~Taylor_Berg-Kirkpatrick1", "gender": "F;F;M;M", "homepage": "https://www.cs.cmu.edu/~asrivats;;https://www.linkedin.com/in/omar-u-florez-35338015/;https://cseweb.ucsd.edu/~tberg/", "dblp": "227/3475.html;;;22/8160", "google_scholar": "Zbihne0AAAAJ;;;mN6_BKAAAAAJ", "orcid": ";;;", "linkedin": ";sofia-samaniego;omar-u-florez-35338015/;", "or_profile": "~Nikita_Srivatsan1;~Sofia_Samaniego1;~Omar_Florez1;~Taylor_Berg-Kirkpatrick1", "aff": "Carnegie Mellon University;Twitter;Twitter;University of California, San Diego", "aff_domain": "cmu.edu;twitter.com;twitter.com;ucsd.edu", "position": "PhD student;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nsrivatsan2024alttext,\ntitle={Alt-Text with Context: Improving Accessibility for Images on Twitter},\nauthor={Nikita Srivatsan and Sofia Samaniego and Omar Florez and Taylor Berg-Kirkpatrick},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=97Dl82avFs}\n}", "github": "", "project": "", "reviewers": "2NmJ;1eBv;xHBk", "pdf_size": 2532532, "rating": "5;6;8", "confidence": "4;3;3", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "2;3;3", "wc_summary": "68;114;89", "wc_strengths": "186;164;24", "wc_weaknesses": "650;119;54", "wc_questions": "80;68;32", "wc_review": "984;465;199", "wc_reply_reviewers": "124;31;0", "wc_reply_authors": "821;362;403", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 90.33333333333333, 18.80307303489394 ], "wc_strengths_avg": [ 124.66666666666667, 71.74646719912803 ], "wc_weaknesses_avg": [ 274.3333333333333, 266.958590213705 ], "wc_questions_avg": [ 60.0, 20.396078054371138 ], "wc_review_avg": [ 549.3333333333334, 325.97580005611184 ], "wc_reply_reviewers_avg": [ 51.666666666666664, 52.689868307125444 ], "wc_reply_authors_avg": [ 528.6666666666666, 207.38745274378476 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4912397428830109366&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=97Dl82avFs", "pdf": "https://openreview.net/pdf?id=97Dl82avFs", "email": "cmu.edu;twitter.com;twitter.com;ucsd.edu", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Carnegie Mellon University;Twitter, Inc.;University of California, San Diego", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://twitter.com;https://www.ucsd.edu", "aff_unique_abbr": "CMU;Twitter;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "98g9NdJPxm", "title": "Theoretically Understanding Data Reconstruction Leakage in Federated Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning is an emerging collaborative learning paradigm that aims to protect data privacy. Unfortunately, recent works show that federated learning algorithms are vulnerable to data reconstruction attacks, and a series of follow-up works are proposed to enhance the attack effectiveness. However, existing works lack of a theoretical understanding on to what extent the devices' data can be reconstructed and the effectiveness of these attacks cannot be compared theoretically. To address it, we propose a theoretical framework to understand data reconstruction attacks to FL. Our framework involves bounding the data reconstruction error and an attack's error bound reflects its inherent attack effectiveness. Under the framework, we can theoretically compare the effectiveness of existing attacks. For instance, our experimental results on multiple datasets validate that the iDLG data reconstruction attack inherently outperforms the DLG attack.", "keywords": "Privacy leakage;model reconstruction attacks;federated learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Zifan Wang;Binghui Zhang;Meng Pang;Yuan Hong;Binghui Wang", "authorids": "~Zifan_Wang5;~Binghui_Zhang1;~Meng_Pang1;~Yuan_Hong1;~Binghui_Wang2", "gender": "M;M;M;M;M", "homepage": ";;;https://yhongcs.github.io/;https://wangbinghui.net", "dblp": ";;172/9447.html;79/5433-1;123/7149", "google_scholar": ";;;KJuZW2wAAAAJ;SoOztcEAAAAJ", "orcid": ";;;;0000-0001-5616-060X", "linkedin": "zifan-wang-2bb067173;binghui-zhang-68b20a108;;;", "or_profile": "~Zifan_Wang5;~Binghui_Zhang1;~Meng_Pang1;~Yuan_Hong1;~Binghui_Wang2", "aff": "University of Georgia;Illinois Institute of Technology;Nanchang University;University of Connecticut;Illinois Institute of Technology", "aff_domain": "uga.edu;iit.edu;ncu.edu.cn;uconn.edu;iit.edu", "position": "PhD student;PhD student;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@misc{\nwang2024theoretically,\ntitle={Theoretically Understanding Data Reconstruction Leakage in Federated Learning},\nauthor={Zifan Wang and Binghui Zhang and Meng Pang and Yuan Hong and Binghui Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=98g9NdJPxm}\n}", "github": "", "project": "", "reviewers": "kviX;sB6V;3bse", "site": "https://openreview.net/forum?id=98g9NdJPxm", "pdf_size": 1840938, "rating": "3;5;5", "confidence": "4;5;4", "soundness": "2;2;2", "contribution": "1;3;3", "presentation": "3;3;3", "wc_summary": "48;134;72", "wc_strengths": "47;48;33", "wc_weaknesses": "161;1346;105", "wc_questions": "5;483;4", "wc_review": "261;2011;214", "wc_reply_reviewers": "33;1370;0", "wc_reply_authors": "362;2383;182", "reply_reviewers": "1;2;0", "reply_authors": "1;4;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.66666666666667, 36.23380864453651 ], "wc_strengths_avg": [ 42.666666666666664, 6.847546194724712 ], "wc_weaknesses_avg": [ 537.3333333333334, 572.2705265480265 ], "wc_questions_avg": [ 164.0, 225.56743263748572 ], "wc_review_avg": [ 828.6666666666666, 836.2560745502674 ], "wc_reply_reviewers_avg": [ 467.6666666666667, 638.188234161538 ], "wc_reply_authors_avg": [ 975.6666666666666, 997.8444545897701 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5WBXkpXVnnAJ:scholar.google.com/&scioq=Theoretically+Understanding+Data+Reconstruction+Leakage+in+Federated+Learning&hl=en&as_sdt=0,3", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "University of Georgia;Illinois Institute of Technology;Nanchang University;University of Connecticut", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uga.edu;https://www.iit.edu;https://www.ncu.edu.cn;https://www.uconn.edu", "aff_unique_abbr": "UGA;IIT;NCU;UConn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;China" }, { "id": "992eLydH8G", "title": "Do Pre-trained Transformers Really Learn In-context by Gradient Descent?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Is In-Context Learning (ICL) implicitly equivalent to Gradient Descent (GD)? Several recent works draw analogies between the dynamics of GD and the emergent behavior of ICL in large language models. However, these works make assumptions far from the realistic natural language setting in which language models are trained. Such discrepancies between theory and practice, therefore necessitate further investigation to validate their applicability in reality.\n\nWe start by highlighting the weaknesses in prior works that construct Transformer weights to simulate gradient descent. Their experiments with training Transformers on ICL objective, inconsistencies in the order-sensitivity of ICL and GD, sparsity of the constructed weights, and sensitivity to parameter changes are some examples of a mismatch from the real-world setting. \n\nFurthermore, we probe and compare the ICL vs. GD hypothesis in a natural setting. We conduct comprehensive empirical analyses on language models pre-trained on natural data (LLaMa-7B). Our comparisons on various performance metrics highlight the inconsistent behavior of ICL and GD as a function of various factors such as datasets, models, and number of demonstrations. \nWe observe that ICL and GD adapt the output distribution of language models differently. These results indicate that the equivalence between ICL and GD is an open hypothesis, requires nuanced considerations and calls for further studies.", "keywords": "In-context learning;gradient descent;large language models", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Lingfeng Shen;Aayush Mishra;Daniel Khashabi", "authorids": "~Lingfeng_Shen1;~Aayush_Mishra1;~Daniel_Khashabi2", "gender": "M;M;M", "homepage": ";https://aamixsh.github.io;http://danielkhashabi.com/", "dblp": "240/5490.html;263/3200;71/10515", "google_scholar": "PoSTdLAAAAAJ;https://scholar.google.com/citations?hl=en;pK2kQvgAAAAJ", "orcid": ";;", "linkedin": ";aamixsh/;", "or_profile": "~Lingfeng_Shen1;~Aayush_Mishra1;~Daniel_Khashabi2", "aff": "Johns Hopkins University;Adobe Systems;Johns Hopkins University", "aff_domain": "jh.edu;adobe.com;jhu.edu", "position": "MS student;Intern;Assistant Professor", "bibtex": "@misc{\nshen2024do,\ntitle={Do Pre-trained Transformers Really Learn In-context by Gradient Descent?},\nauthor={Lingfeng Shen and Aayush Mishra and Daniel Khashabi},\nyear={2024},\nurl={https://openreview.net/forum?id=992eLydH8G}\n}", "github": "", "project": "", "reviewers": "pVVn;GeTu;ivcd;uBmS", "site": "https://openreview.net/forum?id=992eLydH8G", "pdf_size": 3321649, "rating": "3;5;6;6", "confidence": "3;2;4;4", "soundness": "2;2;3;3", "contribution": "2;2;2;3", "presentation": "2;2;4;3", "wc_summary": "124;77;72;102", "wc_strengths": "53;42;97;85", "wc_weaknesses": "334;69;526;570", "wc_questions": "80;87;8;127", "wc_review": "591;275;703;884", "wc_reply_reviewers": "0;18;259;485", "wc_reply_authors": "592;552;1956;1504", "reply_reviewers": "0;1;2;2", "reply_authors": "3;3;5;4", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 93.75, 20.837166314064877 ], "wc_strengths_avg": [ 69.25, 22.498611068241523 ], "wc_weaknesses_avg": [ 374.75, 197.57451126094176 ], "wc_questions_avg": [ 75.5, 42.8981351576033 ], "wc_review_avg": [ 613.25, 221.51114531779209 ], "wc_reply_reviewers_avg": [ 190.5, 198.41181920440124 ], "wc_reply_authors_avg": [ 1151.0, 600.815279432872 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4923659639173309, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;0", "aff_unique_norm": "Johns Hopkins University;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.jhu.edu;https://www.adobe.com", "aff_unique_abbr": "JHU;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "99hq9VMkbg", "title": "Fisher-aware Quantization for DETR Detectors with Critical-category Objectives", "track": "main", "status": "Reject", "tldr": "", "abstract": "The impact of quantization on the overall performance of deep learning models is a well-studied problem. However, understanding and overcoming its effects on a more fine-grained level is still lacking, especially for harder tasks such as object detection with both classification and regression objectives. This work identifies the performance for a subset of task-critical categories, i.e. the critical-category performance, as a crucial yet largely overlooked fine-grained objective for detection tasks. We analyze the impact of quantization at the category-level granularity, and propose methods to improve performance for the critical categories. Specifically, we find that certain critical categories have a higher sensitivity to quantization, and have inferior generalization after quantization-aware training (QAT). To explain this, we provide theoretical and empirical links between their performance gaps and the corresponding loss landscapes with the Fisher information framework. Using this evidence, we propose a Fisher-aware mixed-precision quantization scheme, and a Fisher-trace regularization for the QAT on the critical-category loss landscape. The proposed methods improve critical-category performance metrics of the quantized transformer-based DETR detectors. When compared to the conventional quantization objective, our Fisher-aware quantization scheme shows up to 0.9% mAP increase on COCO dataset. A further 0.5% mAP improvement is achieved for selected critical categories with the proposed Fisher-trace regularization.", "keywords": "Quantization;Detection Transformers;Fisher information;Finegrained performance", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/ad7138b4d3092f260e9ab8730f1a0b1e1bb44b6c.zip", "author": "Huanrui Yang;Yafeng Huang;Zhen Dong;Yizhao Zhang;Denis A Gudovskiy;Tomoyuki Okuno;Yohei Nakata;Yuan Du;Kurt Keutzer;Shanghang Zhang", "authorids": "~Huanrui_Yang1;~Yafeng_Huang1;~Zhen_Dong3;~Yizhao_Zhang2;~Denis_A_Gudovskiy1;~Tomoyuki_Okuno1;~Yohei_Nakata1;~Yuan_Du2;~Kurt_Keutzer1;~Shanghang_Zhang4", "gender": "M;;M;M;M;M;M;M;M;F", "homepage": "https://sites.google.com/view/huanrui-yang;https://github.com/AiyaYF;https://dong-zhen.com/;https://www.linkedin.com/in/zachary-zhang-874390221/;https://gudovskiy.github.io/;;;https://ese.nju.edu.cn/dy_en/list.htm;https://people.eecs.berkeley.edu/~keutzer/;https://www.shanghangzhang.com/", "dblp": "221/2845;;;;136/4981;;27/8364.html;26/8831;k/KurtKeutzer.html;95/11531", "google_scholar": "bjNCUt8AAAAJ;;czxMUzcAAAAJ;;03qjEm0AAAAJ;https://scholar.google.co.jp/citations?user=E7BhgRsAAAAJ;MA5f-rYAAAAJ;zyu8Qy4AAAAJ;ID9QePIAAAAJ;voqw10cAAAAJ", "orcid": ";;;;0000-0002-6829-6667;;0009-0006-9838-1367;0000-0002-5316-619X;0000-0003-3868-8501;", "linkedin": ";;zhen-dong/;;gudovskiy;;;;kurtkeutzer/;", "or_profile": "~Huanrui_Yang1;~Yafeng_Huang1;~Zhen_Dong3;~Yizhao_Zhang2;~Denis_A_Gudovskiy1;~Tomoyuki_Okuno1;~Yohei_Nakata1;~Yuan_Du2;~Kurt_Keutzer1;~Shanghang_Zhang1", "aff": "University of California, Berkeley;Nanjing University;Nexusflow.ai Inc;University of California, Berkeley;Panasonic Corp;Panasonic Holdings Corporation;Panasonic;Nanjing University;University of California, Berkeley;Peking University", "aff_domain": "berkeley.edu;nju.edu.cn;nexusflow.ai;berkeley.edu;panasonic.com;panasonic.com;us.panasonic.com;nju.edu.cn;berkeley.edu;pku.edu.cn", "position": "Postdoc;MS student;Principal Researcher;Undergrad student;Senior Researcher;Researcher;Researcher;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nyang2024fisheraware,\ntitle={Fisher-aware Quantization for {DETR} Detectors with Critical-category Objectives},\nauthor={Huanrui Yang and Yafeng Huang and Zhen Dong and Yizhao Zhang and Denis A Gudovskiy and Tomoyuki Okuno and Yohei Nakata and Yuan Du and Kurt Keutzer and Shanghang Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=99hq9VMkbg}\n}", "github": "", "project": "", "reviewers": "DjvC;ka68;WkhP", "site": "https://openreview.net/forum?id=99hq9VMkbg", "pdf_size": 5925157, "rating": "6;6;6", "confidence": "3;3;2", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "44;90;30", "wc_strengths": "21;16;65", "wc_weaknesses": "5;16;13", "wc_questions": "313;78;2", "wc_review": "383;200;110", "wc_reply_reviewers": "256;57;0", "wc_reply_authors": "913;353;319", "reply_reviewers": "2;2;0", "reply_authors": "3;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.666666666666664, 25.629843715654783 ], "wc_strengths_avg": [ 34.0, 22.015146301277824 ], "wc_weaknesses_avg": [ 11.333333333333334, 4.642796092394707 ], "wc_questions_avg": [ 131.0, 132.38076396012627 ], "wc_review_avg": [ 231.0, 113.58697108383514 ], "wc_reply_reviewers_avg": [ 104.33333333333333, 109.74009699689941 ], "wc_reply_authors_avg": [ 528.3333333333334, 272.35434435471905 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5zA5k8TYbMkJ:scholar.google.com/&scioq=Fisher-aware+Quantization+for+DETR+Detectors+with+Critical-category+Objectives&hl=en&as_sdt=0,44", "gs_version_total": 4, "aff_unique_index": "0;1;2;0;3;4;3;1;0;5", "aff_unique_norm": "University of California, Berkeley;Nanjing University;Nexusflow.ai;Panasonic Corporation;Panasonic Holdings Corporation;Peking University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.berkeley.edu;https://www.nju.edu.cn;https://www.nexusflow.ai;https://www.panasonic.com;https://www.panasonic.com/global;http://www.pku.edu.cn", "aff_unique_abbr": "UC Berkeley;Nanjing U;Nexusflow.ai;Panasonic;PHC;Peking U", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;0;0;2;2;2;1;0;1", "aff_country_unique": "United States;China;Japan" }, { "title": "Learning Decentralized Partially Observable Mean Field Control for Artificial Collective Behavior", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19304", "id": "99tKiMVJhY", "author_site": "Kai Cui, Sascha Hauck, Christian Fabian, Heinz Koeppl", "tldr": "", "abstract": "Recent reinforcement learning (RL) methods have achieved success in various domains. However, multi-agent RL (MARL) remains a challenge in terms of decentralization, partial observability and scalability to many agents. Meanwhile, collective behavior requires resolution of the aforementioned challenges, and remains of importance to many state-of-the-art applications such as active matter physics, self-organizing systems, opinion dynamics, and biological or robotic swarms. Here, MARL via mean field control (MFC) offers a potential solution to scalability, but fails to consider decentralized and partially observable systems. In this paper, we enable decentralized behavior of agents under partial information by proposing novel models for decentralized partially observable MFC (Dec-POMFC), a broad class of problems with permutation-invariant agents allowing for reduction to tractable single-agent Markov decision processes (MDP) with single-agent RL solution. We provide rigorous theoretical results, including a dynamic programming principle, together with optimality guarantees for Dec-POMFC solutions applied to finite swarms of interest. Algorithmically, we propose Dec-POMFC-based policy gradient methods for MARL via centralized training and decentralized execution, together with policy gradient approximation guarantees. In addition, we improve upon state-of-the-art histogram-based MFC by kernel methods, which is of separate interest also for fully observable MFC. We evaluate numerically on representative collective behavior tasks such as adapted Kuramoto and Vicsek swarming models, being on par with state-of-the-art MARL. Overall, our framework takes a step towards RL-based engineering of artificial collective behavior via MFC.", "keywords": "Mean Field Control;Multi-Agent Reinforcement Learning;Partial Observability;Collective Behavior", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Kai Cui;Sascha H. Hauck;Christian Fabian;Heinz Koeppl", "authorids": "~Kai_Cui3;~Sascha_H._Hauck1;~Christian_Fabian1;~Heinz_Koeppl1", "gender": ";;M;M", "homepage": ";;https://www.bcs.tu-darmstadt.de/team_sos/fabianchristian.en.jsp;", "dblp": ";;85/10135-1;41/6084", "google_scholar": ";;https://scholar.google.de/citations?user=hYtlGkMAAAAJ;https://scholar.google.de/citations?user=WaPW80kAAAAJ", "orcid": ";;0000-0003-4239-3861;", "linkedin": ";;https://de.linkedin.com/in/-christian-fabian;", "or_profile": "~Kai_Cui3;~Sascha_H._Hauck1;~Christian_Fabian1;~Heinz_Koeppl1", "aff": ";;Technische Universit\u00e4t Darmstadt;TU Darmstadt", "aff_domain": ";;tu-darmstadt.de;tu-darmstadt.de", "position": ";;PhD student;Full Professor", "bibtex": "@inproceedings{\ncui2024learning,\ntitle={Learning Decentralized Partially Observable Mean Field Control for Artificial Collective Behavior},\nauthor={Kai Cui and Sascha H. Hauck and Christian Fabian and Heinz Koeppl},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=99tKiMVJhY}\n}", "github": "", "project": "", "reviewers": "gUVQ;xiBU;PQjP", "pdf_size": 6688687, "rating": "5;6;8", "confidence": "2;2;3", "soundness": "2;3;3", "contribution": "2;3;3", "presentation": "2;3;3", "wc_summary": "39;49;45", "wc_strengths": "6;37;12", "wc_weaknesses": "133;79;21", "wc_questions": "59;73;43", "wc_review": "237;238;121", "wc_reply_reviewers": "119;0;0", "wc_reply_authors": "751;637;205", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 44.333333333333336, 4.109609335312651 ], "wc_strengths_avg": [ 18.333333333333332, 13.424687043734847 ], "wc_weaknesses_avg": [ 77.66666666666667, 45.73352769637999 ], "wc_questions_avg": [ 58.333333333333336, 12.256517540566822 ], "wc_review_avg": [ 198.66666666666666, 54.92014404771916 ], "wc_reply_reviewers_avg": [ 39.666666666666664, 56.09713797413277 ], "wc_reply_authors_avg": [ 531.0, 235.16802503741872 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14916352646919480751&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=99tKiMVJhY", "pdf": "https://openreview.net/pdf?id=99tKiMVJhY", "email": ";;tu-darmstadt.de;tu-darmstadt.de", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TUD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "9AnR2z7iNL", "title": "DivKnowQA: Verifying the Reasoning Ability of LLM Through Open-Domain Question Answering Over Knowledge Base and Text", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Open-domain complex question answering often breaks down a multi-hop question into single-hop questions, leveraging external knowledge for solutions. Current practices show a pronounced preference for unstructured texts, such as Wikipedia, often overlooking the potential of structured knowledge sources, such as WikiData. Additionally, while existing research has employed external tools to enhance the Large Language Model(LLM)\u2019s capabilities, many tests have been conducted in artificial or toy scenarios. We argue that open-domain complex question answering presents a realistic and intricate challenge for LLM, necessitating the integration of external tools, including retrieval systems and knowledge base engines. In this paper, we present a new benchmark DIVKNOWQA to assess the LLMs\u2019 reasoning skills and tool compatibility. Comprising 940 human-annotated intricate questions, DIVKNOWQA mandates both structured and unstructured knowledge for comprehensive answers. The subpar performance of prevailing SOTA methods, such as DSP and REACT, on our benchmark demonstrates its challenge. Moreover, we introduce our method DETLLM, which incorporates a symbolic language generation tool and a retrieval toolbox, pioneering a new approach to address this challenge. Our data and code will be released", "keywords": "Benchmark;Question Answering;LLM;Retrieval", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Wenting Zhao;Ye Liu;Tong Niu;Yao Wan;Philip S. Yu;Shafiq Joty;Yingbo Zhou;Semih Yavuz", "authorids": "~Wenting_Zhao4;~Ye_Liu4;~Tong_Niu1;~Yao_Wan2;~Philip_S._Yu1;~Shafiq_Joty1;~Yingbo_Zhou1;~Semih_Yavuz1", "gender": "F;M;M;M;M;;M;F", "homepage": ";;http://wanyao.me;https://cs.uic.edu/profiles/philip-yu/;https://raihanjoty.github.io/;;;", "dblp": "96/2615-6;;167/0275.html;y/PhilipSYu;62/2078;72/8614;;41/10049-6.html", "google_scholar": "QMKD6YMAAAAJ;rrMtKR4AAAAJ;c3MtqtMAAAAJ;D0lL1r0AAAAJ;hR249csAAAAJ;H_6RQ7oAAAAJ;krh3p8AAAAAJ;aySy_OMAAAAJ", "orcid": ";;0000-0001-6937-4180;0000-0002-3491-5968;;;;", "linkedin": ";;;;;yingbozhou/;semih-yavuz-4303518b;", "or_profile": "~Ye_Liu4;~Tong_Niu1;~Yao_Wan2;~Philip_S._Yu1;~Shafiq_Joty1;~Yingbo_Zhou1;~Semih_Yavuz2;~Wenting_Zhao5", "aff": "SalesForce.com;Salesforce AI Research;Huazhong University of Science and Technology;University of Illinois Chicago;SalesForce.com;Salesforce Research;SalesForce.com;University of Illinois at Chicago", "aff_domain": "salesforce.com;salesforce.com;hust.edu.cn;uic.edu;salesforce.com;salesforce.com;salesforce.com;uic.edu", "position": "Researcher;Research Scientist;Assistant Professor;Full Professor;Principal Researcher;Research Scientist;Research Scientist;PhD student", "bibtex": "@misc{\nzhao2024divknowqa,\ntitle={DivKnow{QA}: Verifying the Reasoning Ability of {LLM} Through Open-Domain Question Answering Over Knowledge Base and Text},\nauthor={Wenting Zhao and Ye Liu and Tong Niu and Yao Wan and Philip S. Yu and Shafiq Joty and Yingbo Zhou and Semih Yavuz},\nyear={2024},\nurl={https://openreview.net/forum?id=9AnR2z7iNL}\n}", "github": "", "project": "", "reviewers": "r9Z6;dqDr;7HFZ", "site": "https://openreview.net/forum?id=9AnR2z7iNL", "pdf_size": 909077, "rating": "3;5;5", "confidence": "4;3;4", "soundness": "2;3;2", "contribution": "2;2;2", "presentation": "2;3;2", "wc_summary": "105;78;300", "wc_strengths": "86;28;200", "wc_weaknesses": "206;88;308", "wc_questions": "42;46;19", "wc_review": "439;240;827", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "647;417;1098", "reply_reviewers": "0;0;0", "reply_authors": "1;1;2", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 161.0, 98.90399385262458 ], "wc_strengths_avg": [ 104.66666666666667, 71.44850515503379 ], "wc_weaknesses_avg": [ 200.66666666666666, 89.89376446055026 ], "wc_questions_avg": [ 35.666666666666664, 11.897712198383164 ], "wc_review_avg": [ 502.0, 243.74713673531974 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 720.6666666666666, 282.8548901625865 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QuMYRhh8zrcJ:scholar.google.com/&scioq=DivKnowQA:+Verifying+the+Reasoning+Ability+of+LLM+Through+Open-Domain+Question+Answering+Over+Knowledge+Base+and+Text&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0;0;0;2", "aff_unique_norm": "Salesforce;Huazhong University of Science and Technology;University of Illinois at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.salesforce.com;http://www.hust.edu.cn;https://www.uic.edu", "aff_unique_abbr": "Salesforce;HUST;UIC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "9BERij4Gbv", "title": "Guided Evolution with Binary Discriminators for ML Program Search", "track": "main", "status": "Reject", "tldr": "", "abstract": "How to automatically design better machine learning programs is an open problem within AutoML. While evolution has been a popular tool to search for better ML programs, using learning itself to guide the search has been less successful and less understood on harder problems but has the promise to dramatically increase the speed and final performance of the optimization process. We propose guiding evolution with a binary discriminator, trained online to distinguish which program is better given a pair of programs. The discriminator selects better programs without having to perform a costly evaluation and thus speed up the convergence of evolution. Our method can encode a wide variety of ML components including symbolic optimizers, neural architectures, RL loss functions, and symbolic regression equations with the same directed acyclic graph representation. By combining this representation with modern GNNs and an adaptive mutation strategy, we demonstrate our method can speed up evolution across a set of diverse problems including a 3.7x speedup on the symbolic search for ML optimizers and a 4x speedup for RL loss functions.", "keywords": "automl;evolution;program search", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/cd95578fe2161440c052fbb679f124667b06e639.pdf", "author": "John D Co-Reyes;Yingjie Miao;George Tucker;Aleksandra Faust;Esteban Real", "authorids": "~John_D_Co-Reyes1;~Yingjie_Miao1;~George_Tucker1;~Aleksandra_Faust1;~Esteban_Real1", "gender": "M;;M;F;M", "homepage": ";;https://sites.google.com/view/gjt;http://www.afaust.info;https://www.estebanreal.com/", "dblp": "198/1129;22/10043;135/5748;135/8420;156/0082", "google_scholar": ";ScqM05wAAAAJ;-gJkPHIAAAAJ;RK72t68AAAAJ;ipTsozQAAAAJ", "orcid": ";;;0000-0002-3268-8685;", "linkedin": ";yingjiemiao/;;aleksandrafaust;", "or_profile": "~John_D_Co-Reyes1;~Yingjie_Miao1;~George_Tucker1;~Aleksandra_Faust1;~Esteban_Real1", "aff": ";Google DeepMind;Google Brain;Google Brain;Google", "aff_domain": ";google.com;google.com;google.com;google.com", "position": ";Software Engineer;Research Scientist;Principal Researcher;Engineer/Researcher", "bibtex": "@misc{\nco-reyes2024guided,\ntitle={Guided Evolution with Binary Discriminators for {ML} Program Search},\nauthor={John D Co-Reyes and Yingjie Miao and George Tucker and Aleksandra Faust and Esteban Real},\nyear={2024},\nurl={https://openreview.net/forum?id=9BERij4Gbv}\n}", "github": "", "project": "", "reviewers": "qo6q;3xvf;Mfci", "site": "https://openreview.net/forum?id=9BERij4Gbv", "pdf_size": 1084741, "rating": "3;5;8", "confidence": "4;4;4", "soundness": "1;3;4", "contribution": "1;2;3", "presentation": "3;3;3", "wc_summary": "43;82;176", "wc_strengths": "7;134;214", "wc_weaknesses": "31;298;211", "wc_questions": "2;2;300", "wc_review": "83;516;901", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;356;0", "reply_reviewers": "0;0;0", "reply_authors": "0;1;0", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 1.247219128924647 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.33333333333333, 55.82313339666829 ], "wc_strengths_avg": [ 118.33333333333333, 85.23040667638647 ], "wc_weaknesses_avg": [ 180.0, 111.18453129819814 ], "wc_questions_avg": [ 101.33333333333333, 140.47854719572743 ], "wc_review_avg": [ 500.0, 334.13869375854495 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 118.66666666666667, 167.82000940160728 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8820123988891993038&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Multi-granularity Correspondence Learning from Long-term Noisy Videos", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19303", "id": "9Cu8MRmhq2", "author_site": "Yijie Lin, Jie Zhang, Zhenyu Huang, Jia Liu, zujie wen, Xi Peng", "tldr": "", "abstract": "Existing video-language studies mainly focus on learning short video clips, leaving long-term temporal dependencies rarely explored due to over-high computational cost of modeling long videos. To address this issue, one feasible solution is learning the correspondence between video clips and captions, which however inevitably encounters the multi-granularity noisy correspondence (MNC) problem. To be specific, MNC refers to the clip-caption misalignment (coarse-grained) and frame-word misalignment (fine-grained), hindering temporal learning and video understanding. In this paper, we propose NOise Robust Temporal Optimal traNsport (Norton) that addresses MNC in a unified optimal transport (OT) framework. In brief, Norton employs video-paragraph and clip-caption contrastive losses to capture long-term dependencies based on OT. To address coarse-grained misalignment in video-paragraph contrast, Norton filters out the irrelevant clips and captions through an alignable prompt bucket and realigns asynchronous clip-caption pairs based on transport distance. To address the fine-grained misalignment, Norton incorporates a soft-maximum operator to identify crucial words and key frames. Additionally, Norton exploits the potential faulty negative samples in clip-caption contrast by rectifying the alignment target with OT assignment to ensure precise temporal modeling. Extensive experiments on video retrieval, videoQA, and action segmentation verify the effectiveness of our method. \nCode is available at https://lin-yijie.github.io/projects/Norton.", "keywords": "Video-language pre-training;Noisy correspondence", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yijie Lin;Jie Zhang;Zhenyu Huang;Jia Liu;zujie wen;Xi Peng", "authorids": "~Yijie_Lin1;~Jie_Zhang42;~Zhenyu_Huang1;~Jia_Liu4;~zujie_wen1;~Xi_Peng3", "gender": ";F;M;M;M;M", "homepage": "https://lin-yijie.github.io;;https://hi-zhenyu.github.io/;;;http://www.pengxi.me", "dblp": "02/9654-1.html;84/6889-60;181/2445-5;;260/0351;18/931-1", "google_scholar": "https://scholar.google.com.hk/citations?user=KXKVYHsAAAAJ;;0mdxlb8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;vsZ4dK8AAAAJ;bw9FOHAAAAAJ", "orcid": "0000-0003-1746-295X;0000-0001-6331-4005;0000-0003-4161-9427;;;", "linkedin": ";;;;;", "or_profile": "~Yijie_Lin1;~Jie_Zhang42;~Zhenyu_Huang1;~Jia_Liu4;~zujie_wen1;~Xi_Peng2", "aff": "Sichuan University;;Alibaba Group;Ant Group;Ant Group;Sichuan University", "aff_domain": "scu.edu.cn;;antgroup.com;antgroup.com;antgroup.com;scu.edu.cn", "position": "PhD student;;Researcher;Principal Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nlin2024multigranularity,\ntitle={Multi-granularity Correspondence Learning from Long-term Noisy Videos},\nauthor={Yijie Lin and Jie Zhang and Zhenyu Huang and Jia Liu and zujie wen and Xi Peng},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9Cu8MRmhq2}\n}", "github": "", "project": "", "reviewers": "83JB;pP85;TtfV;UrFs", "pdf_size": 1235445, "rating": "8;8;8;8", "confidence": "5;5;4;3", "soundness": "4;4;4;3", "contribution": "4;4;4;3", "presentation": "4;4;4;3", "wc_summary": "100;151;127;111", "wc_strengths": "169;208;174;168", "wc_weaknesses": "114;185;189;138", "wc_questions": "137;14;61;185", "wc_review": "520;558;551;602", "wc_reply_reviewers": "0;0;0;24", "wc_reply_authors": "957;928;1278;926", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;3", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "contribution_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 122.25, 19.17517926904466 ], "wc_strengths_avg": [ 179.75, 16.467771555374455 ], "wc_weaknesses_avg": [ 156.5, 31.68990375498165 ], "wc_questions_avg": [ 99.25, 66.16031665583229 ], "wc_review_avg": [ 557.75, 29.277764600460877 ], "wc_reply_reviewers_avg": [ 6.0, 10.392304845413264 ], "wc_reply_authors_avg": [ 1022.25, 148.1660808012414 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7985543970551262066&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=9Cu8MRmhq2", "pdf": "https://openreview.net/pdf?id=9Cu8MRmhq2", "email": "scu.edu.cn;;antgroup.com;antgroup.com;antgroup.com;scu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "Sichuan University;Alibaba Group;Ant Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.scu.edu.cn;https://www.alibaba.com;https://www.antgroup.com", "aff_unique_abbr": "SCU;Alibaba;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Elucidating the design space of classifier-guided diffusion generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19302", "id": "9DXXMXnIGm", "author_site": "Jiajun Ma, Tianyang Hu, Wenjia Wang, Jiacheng Sun", "tldr": "", "abstract": "Guidance in conditional diffusion generation is of great importance for sample quality and controllability. \nHowever, existing guidance schemes are to be desired. \nOn one hand, mainstream methods such as classifier guidance and classifier-free guidance both require extra training with labeled data, which is time-consuming and unable to adapt to new conditions.\nOn the other hand, training-free methods such as universal guidance, though more flexible, have yet to demonstrate comparable performance. \nIn this work, through a comprehensive investigation into the design space, we show that it is possible to achieve significant performance improvements over existing guidance schemes by leveraging off-the-shelf classifiers in a training-free fashion, enjoying the best of both worlds. \nEmploying calibration as a general guideline, we propose several pre-conditioning techniques to better exploit pretrained off-the-shelf classifiers for guiding diffusion generation. \nExtensive experiments on ImageNet validate our proposed method, showing that state-of-the-art (SOTA) diffusion models (DDPM, EDM, DiT) can be further improved (up to 20\\%) using off-the-shelf classifiers with barely any extra computational cost.\nWith the proliferation of publicly available pretrained classifiers, our proposed approach has great potential and can be readily scaled up to text-to-image generation tasks.", "keywords": "conditional diffusion sampling;classifier guidance", "primary_area": "generative models", "supplementary_material": "/attachment/a77119b5f7e1cdcb60a31b0cda3fac0bd2d35a99.pdf", "author": "Jiajun Ma;Tianyang Hu;Wenjia Wang;Jiacheng Sun", "authorids": "~Jiajun_Ma1;~Tianyang_Hu1;~Wenjia_Wang2;~Jiacheng_Sun1", "gender": "M;M;M;M", "homepage": ";https://hu-tianyang.github.io/;https://www.wenjia-w.com/;", "dblp": ";170/2551;;165/5350", "google_scholar": ";mlA_3r0AAAAJ;EKS1sO0AAAAJ;", "orcid": ";;;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAABNx8OQBL99vmEOUUrE18c5XwhVpsxhEGu0;;;https://www.linkedin.cn/incareer/in/jiacheng-sun-ab622b131", "or_profile": "~Jiajun_Ma1;~Tianyang_Hu1;~Wenjia_Wang2;~Jiacheng_Sun1", "aff": "Hong Kong University of Science and Technology;Huawei Noah's Ark Lab;HKUST (GZ);Huawei Noah's Ark Lab", "aff_domain": "ust.hk;huawei.com;hkust-gz.edu.cn;huawei.com", "position": "PhD student;Researcher;Assistant Professor;Senior Researcher", "bibtex": "@inproceedings{\nma2024elucidating,\ntitle={Elucidating the design space of classifier-guided diffusion generation},\nauthor={Jiajun Ma and Tianyang Hu and Wenjia Wang and Jiacheng Sun},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9DXXMXnIGm}\n}", "github": "", "project": "", "reviewers": "VZ9L;rssc;wqgP;r6Zr;6jri", "pdf_size": 4818476, "rating": "5;5;6;6;8", "confidence": "5;4;5;3;5", "soundness": "3;2;3;2;4", "contribution": "3;2;3;3;3", "presentation": "3;2;3;2;3", "wc_summary": "78;60;43;68;116", "wc_strengths": "93;33;46;48;114", "wc_weaknesses": "157;232;493;286;257", "wc_questions": "60;2;29;169;69", "wc_review": "388;327;611;571;556", "wc_reply_reviewers": "66;32;0;372;52", "wc_reply_authors": "680;691;1641;1695;1085", "reply_reviewers": "1;1;0;4;1", "reply_authors": "1;1;4;3;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.4, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "contribution_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 73.0, 24.363907732545698 ], "wc_strengths_avg": [ 66.8, 31.121696611849426 ], "wc_weaknesses_avg": [ 285.0, 112.4651056995013 ], "wc_questions_avg": [ 65.8, 56.77816481711962 ], "wc_review_avg": [ 490.6, 111.82951309918147 ], "wc_reply_reviewers_avg": [ 104.4, 135.6253663589522 ], "wc_reply_authors_avg": [ 1158.4, 441.26164573867055 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250536 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.22821773229381925, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17380173202151892858&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=9DXXMXnIGm", "pdf": "https://openreview.net/pdf?id=9DXXMXnIGm", "email": "ust.hk;huawei.com;hkust-gz.edu.cn;huawei.com", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "https://www.ust.hk;https://www.huawei.com", "aff_unique_abbr": "HKUST;Huawei", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Hong Kong SAR;;Guangzhou", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "ED-NeRF: Efficient Text-Guided Editing of 3D Scene With Latent Space NeRF", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19301", "id": "9DvDRTTdlu", "author_site": "Jangho Park, Gihyun Kwon, Jong Chul YE", "tldr": "", "abstract": "Recently, there has been a significant advancement in text-to-image diffusion models, leading to groundbreaking performance in 2D image generation. These advancements have been extended to 3D models, enabling the generation of novel 3D objects from textual descriptions. This has evolved into NeRF editing methods, which allow the manipulation of existing 3D objects through textual conditioning. However, existing NeRF editing techniques have faced limitations in their performance due to slow training speeds and the use of loss functions that do not adequately consider editing. To address this, here we present a novel 3D NeRF editing approach dubbed ED-NeRF by successfully embedding real-world scenes into the latent space of the latent diffusion model (LDM) through a unique refinement layer. This approach enables us to obtain a NeRF backbone that is not only faster but also more amenable to editing compared to traditional image space NeRF editing. Furthermore, we propose an improved loss function tailored for editing by migrating the delta denoising score (DDS) distillation loss, originally used in 2D image editing to the three-dimensional domain. This novel loss function surpasses the well-known score distillation sampling (SDS) loss in terms of suitability for editing purposes. Our experimental results demonstrate that ED-NeRF achieves faster editing speed while producing improved output quality compared to state-of-the-art 3D editing models.", "keywords": "NeRF;Diffusion model;3D scene editing", "primary_area": "generative models", "supplementary_material": "/attachment/2c94c50c0d663aed124b968c3da8d7c433667995.zip", "author": "JangHo Park;Gihyun Kwon;Jong Chul Ye", "authorids": "~JangHo_Park2;~Gihyun_Kwon1;~Jong_Chul_Ye1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/janghopark/;https://sites.google.com/view/gihyunkwon;https://bispl.weebly.com/", "dblp": "154/0659;241/7060;15/5613", "google_scholar": ";yexbg8gAAAAJ;HNMjoNEAAAAJ", "orcid": ";;", "linkedin": ";gihyun-kwon-b4665a233/;", "or_profile": "~JangHo_Park2;~Gihyun_Kwon1;~Jong_Chul_Ye1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\npark2024ednerf,\ntitle={{ED}-Ne{RF}: Efficient Text-Guided Editing of 3D Scene With Latent Space Ne{RF}},\nauthor={JangHo Park and Gihyun Kwon and Jong Chul Ye},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9DvDRTTdlu}\n}", "github": "", "project": "", "reviewers": "rsnk;EEVN;kJsR;mMij", "pdf_size": 25650691, "rating": "5;5;6;6", "confidence": "5;4;3;3", "soundness": "2;3;3;4", "contribution": "2;2;2;3", "presentation": "3;3;3;2", "wc_summary": "137;67;70;107", "wc_strengths": "164;26;17;182", "wc_weaknesses": "141;229;118;133", "wc_questions": "146;32;12;137", "wc_review": "588;354;217;559", "wc_reply_reviewers": "0;0;0;12", "wc_reply_authors": "1113;921;386;678", "reply_reviewers": "0;0;0;1", "reply_authors": "4;3;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.25, 28.795615985771168 ], "wc_strengths_avg": [ 97.25, 76.08342460746624 ], "wc_weaknesses_avg": [ 155.25, 43.37265843823733 ], "wc_questions_avg": [ 81.75, 60.25103733546834 ], "wc_review_avg": [ 429.5, 152.27360243981883 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 774.5, 272.1621759172277 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4230739473308506611&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=9DvDRTTdlu", "pdf": "https://openreview.net/pdf?id=9DvDRTTdlu", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "9Ebi1euQZQ", "title": "HallE-Switch: Rethinking and Controlling Object Existence Hallucinations in Large Vision-Language Models for Detailed Caption", "track": "main", "status": "Reject", "tldr": "", "abstract": "Current large vision-language models (LVLMs) achieve remarkable progress, yet there remains significant uncertainty regarding their ability to accurately apprehend visual details, that is, in performing detailed captioning. To address this, we introduce \\textit{CCEval}, a GPT-4 assisted evaluation method tailored for detailed captioning. Interestingly, while LVLMs demonstrate minimal object existence hallucination in existing VQA benchmarks, our proposed evaluation reveals continued susceptibility to such hallucinations. In this paper, we make the first attempt to investigate and attribute such hallucinations, including image resolution, the language decoder size, and instruction data amount, quality, granularity. Our findings underscore the unwarranted inference when the language description includes details at a finer object granularity than what the vision module can ground or verify, thus inducing hallucination. To control such hallucinations, we further attribute the reliability of captioning to contextual knowledge (involving only contextually grounded objects) and parametric knowledge (containing inferred objects by the model). Thus, we introduce $\\textit{HallE-Switch}$, a controllable LVLM in terms of $\\textbf{Hall}$ucination in object $\\textbf{E}$xistence. HallE-Switch can condition the captioning to shift between (i) exclusively depicting contextual knowledge for grounded objects and (ii) blending it with parametric knowledge to imagine inferred objects. Our method reduces hallucination by 44\\% compared to LLaVA$_{7B}$ and maintains the same object coverage.", "keywords": "vision-language;large vision-language models;object hallucination", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/19243247ff574c8808b01f9ba639e85fbbe1320c.pdf", "author": "Bohan Zhai;Shijia Yang;Xiangchen Zhao;Chenfeng Xu;Sheng Shen;Dongdi Zhao;Kurt Keutzer;Manling Li;Tan Yan;Xiangjun Fan", "authorids": "~Bohan_Zhai1;~Shijia_Yang1;~Xiangchen_Zhao1;~Chenfeng_Xu1;~Sheng_Shen2;~Dongdi_Zhao1;~Kurt_Keutzer1;~Manling_Li1;~Tan_Yan2;~Xiangjun_Fan1", "gender": "M;F;M;M;M;M;F;M;M;M", "homepage": ";;;;https://sincerass.github.io;https://people.eecs.berkeley.edu/~keutzer/;https://limanling.github.io/;http://www.google.com;;https://github.com/YoPatapon", "dblp": ";;;65/1881;138/5764-1.html;k/KurtKeutzer.html;178/3620;;;", "google_scholar": "TAbgR14AAAAJ;;https://scholar.google.com/citations?hl=en;RpqvaTUAAAAJ;https://scholar.google.com/citations?hl=en;ID9QePIAAAAJ;6U4SXnUAAAAJ;;;", "orcid": ";;;0000-0002-4941-6985;;0000-0003-3868-8501;;;;", "linkedin": ";bronya-shijia-yang-762927193/;;;sheng-s-ab198a174/;kurtkeutzer/;;;xiangjun-max-fan-7984b12b/;", "or_profile": "~Bohan_Zhai1;~Shijia_Yang1;~Xiangchen_Zhao1;~Chenfeng_Xu1;~Sheng_Shen2;~Kurt_Keutzer1;~Manling_Li1;~Tan_Yan2;~Xiangjun_Fan1;~Dongdi_Zhao2", "aff": "Bytedance;Stanford University;Bytedance;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Stanford University;;ByteDance Inc.;Bytedance", "aff_domain": "bytedance.com;stanford.edu;bytedance.com;berkeley.edu;berkeley.edu;berkeley.edu;stanford.edu;;bytedance.com;bytedance.com", "position": "Researcher;MS student;Researcher;PhD student;PhD student;Full Professor;Postdoc;;Machine Learning Engineer;Researcher", "bibtex": "@misc{\nzhai2024halleswitch,\ntitle={HallE-Switch: Rethinking and Controlling Object Existence Hallucinations in Large Vision-Language Models for Detailed Caption},\nauthor={Bohan Zhai and Shijia Yang and Xiangchen Zhao and Chenfeng Xu and Sheng Shen and Dongdi Zhao and Kurt Keutzer and Manling Li and Tan Yan and Xiangjun Fan},\nyear={2024},\nurl={https://openreview.net/forum?id=9Ebi1euQZQ}\n}", "github": "", "project": "", "reviewers": "PjV9;DU9c;6XKb;ZWbF", "site": "https://openreview.net/forum?id=9Ebi1euQZQ", "pdf_size": 841003, "rating": "3;5;6;8", "confidence": "4;5;4;4", "soundness": "3;2;3;3", "contribution": "2;2;4;3", "presentation": "3;2;3;3", "wc_summary": "170;46;75;234", "wc_strengths": "64;52;44;15", "wc_weaknesses": "86;290;88;19", "wc_questions": "14;20;168;1", "wc_review": "334;408;375;269", "wc_reply_reviewers": "0;0;27;0", "wc_reply_authors": "504;788;1186;73", "reply_reviewers": "0;0;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 131.25, 74.9845817485168 ], "wc_strengths_avg": [ 43.75, 18.06066167115701 ], "wc_weaknesses_avg": [ 120.75, 101.58586269752303 ], "wc_questions_avg": [ 50.75, 68.04180700128414 ], "wc_review_avg": [ 346.5, 51.8579791353269 ], "wc_reply_reviewers_avg": [ 6.75, 11.691342951089922 ], "wc_reply_authors_avg": [ 637.75, 406.19722734159575 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.16012815380508713, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9614788549598911123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;2;2;1;0;0", "aff_unique_norm": "ByteDance;Stanford University;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bytedance.com;https://www.stanford.edu;https://www.berkeley.edu", "aff_unique_abbr": "Bytedance;Stanford;UC Berkeley", "aff_campus_unique_index": "1;2;2;2;1", "aff_campus_unique": ";Stanford;Berkeley", "aff_country_unique_index": "0;1;0;1;1;1;1;0;0", "aff_country_unique": "China;United States" }, { "id": "9F0xInGNBF", "title": "VIDEOPROMPTER: AN ENSEMBLE OF FOUNDATIONAL MODELS FOR ZERO-SHOT VIDEO UNDERSTANDING", "track": "main", "status": "Reject", "tldr": "", "abstract": "Vision-language models (VLMs) classify the query video by calculating a similarity score between the visual features and text-based class label representations.\nRecently, large language models (LLMs) have been used to enrich the text-based\nclass labels by enhancing the descriptiveness of the class names. However, these\nimprovements are restricted to the text-based classifier only, and the query visual\nfeatures are not considered. In this paper, we propose a framework which combines pre-trained discriminative VLMs with pre-trained generative video-to-text\nand text-to-text models. We introduce two key modifications to the standard zero-shot setting. First, we propose language-guided visual feature enhancement and\nemploy a video-to-text model to convert the query video to its descriptive form.\nThe resulting descriptions contain vital visual cues of the query video, such as\nwhat objects are present and their spatio-temporal interactions. These descriptive cues provide additional semantic knowledge to VLMs to enhance their zero-shot performance. Second, we propose video-specific prompts to LLMs to generate more meaningful descriptions to enrich class label representations. Specifically, we introduce prompt techniques to create a Tree Hierarchy of Categories for\nclass names, offering a higher-level action context for additional visual cues, We\ndemonstrate the effectiveness of our approach in video understanding across three\ndifferent zero-shot settings: 1) video action recognition, 2) video-to-text and text-to-video retrieval, and 3) time-sensitive video tasks. Consistent improvements\nacross multiple benchmarks and with various VLMs demonstrate the effectiveness of our proposed framework. Our code will be made publicly available.", "keywords": "Video-Language models;LLM;Video Understanding;Zero-shot", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Adeel Yousaf;Muzammal Naseer;Salman Khan;Fahad Khan;Mubarak Shah", "authorids": "~Adeel_Yousaf1;~Muzammal_Naseer1;~Salman_Khan4;~Fahad_Khan1;~Mubarak_Shah3", "gender": "M;M;M;M;M", "homepage": "https://www.crcv.ucf.edu/people/students/phd-students/;https://muzammal-naseer.com/;https://salman-h-khan.github.io/;https://sites.google.com/view/fahadkhans/home;https://www.crcv.ucf.edu/person/mubarak-shah/", "dblp": "217/1167;;32/11535-1;05/8618;s/MubarakShah", "google_scholar": "VpLbThoAAAAJ;https://scholar.google.ch/citations?user=tM9xKA8AAAAJ;https://scholar.google.es/citations?user=M59O9lkAAAAJ;zvaeYnUAAAAJ;https://scholar.google.com.tw/citations?user=p8gsO3gAAAAJ", "orcid": "0000-0003-0275-903X;0000-0001-7663-7161;0000-0002-9502-1749;;0000-0002-8216-1128", "linkedin": "adeel-yousaf-aa58b71a8/;muzammalnaseer/;;;mubarak-shah-b6aa68213/", "or_profile": "~Adeel_Yousaf1;~Muzammal_Naseer1;~Salman_Khan4;~Fahad_Khan1;~Mubarak_Shah3", "aff": "University of Central Florida;Mohamed bin Zayed University of Artificial Intelligence;Australian National University;Link\u00f6ping University;University of Central Florida", "aff_domain": "ucf.edu;mbzuai.ac.ae;anu.edu.au;liu.se;ucf.edu", "position": "PhD student;Researcher;Lecturer;Associate Professor;Full Professor", "bibtex": "@misc{\nyousaf2024videoprompter,\ntitle={{VIDEOPROMPTER}: {AN} {ENSEMBLE} {OF} {FOUNDATIONAL} {MODELS} {FOR} {ZERO}-{SHOT} {VIDEO} {UNDERSTANDING}},\nauthor={Adeel Yousaf and Muzammal Naseer and Salman Khan and Fahad Khan and Mubarak Shah},\nyear={2024},\nurl={https://openreview.net/forum?id=9F0xInGNBF}\n}", "github": "", "project": "", "reviewers": "FnrC;1vg9;8fLg;ypLm", "site": "https://openreview.net/forum?id=9F0xInGNBF", "pdf_size": 1069920, "rating": "5;5;5;6", "confidence": "4;4;3;5", "soundness": "3;2;3;3", "contribution": "2;2;3;3", "presentation": "3;3;2;4", "wc_summary": "57;174;57;46", "wc_strengths": "90;38;38;59", "wc_weaknesses": "113;263;155;99", "wc_questions": "11;8;14;2", "wc_review": "271;483;264;206", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "563;730;633;611", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 83.5, 52.44282601080914 ], "wc_strengths_avg": [ 56.25, 21.288200957337846 ], "wc_weaknesses_avg": [ 157.5, 64.30202174115523 ], "wc_questions_avg": [ 8.75, 4.437059837324712 ], "wc_review_avg": [ 306.0, 105.25920387310556 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 634.25, 60.80039062374517 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9194318577035266866&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Central Florida;Mohamed bin Zayed University of Artificial Intelligence;Australian National University;Link\u00f6ping University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucf.edu;https://mbzuai.ac.ae;https://www.anu.edu.au;https://www.liu.se", "aff_unique_abbr": "UCF;MBZUAI;ANU;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;0", "aff_country_unique": "United States;United Arab Emirates;Australia;Sweden" }, { "id": "9FXGX00iMF", "title": "BWS: Best Window Selection Based on Sample Scores for Data Pruning across Broad Ranges", "track": "main", "status": "Reject", "tldr": "", "abstract": "Data subset selection aims to find a smaller yet informative subset of a large dataset that can approximate the full-dataset training, addressing challenges associated with training neural networks on large-scale datasets. However, existing methods tend to specialize in either high or low selection ratio regimes, lacking a universal approach that consistently achieves competitive performance across a broad range of selection ratios. We introduce a universal and efficient data subset selection method, Best Window Selection (BWS), by proposing a method to choose the best window subset from samples ordered based on their difficulty scores. This approach offers flexibility by allowing the choice of window intervals that span from easy to difficult samples. Furthermore, we provide an efficient mechanism for selecting the best window subset by evaluating its quality using kernel ridge regression. Our experimental results demonstrate the superior performance of BWS compared to other baselines across a broad range of selection ratios over datasets, including CIFAR-10/100 and ImageNet, and the scenarios involving training from random initialization or fine-tuning of pre-trained models.", "keywords": "Data subset selection;data pruning;data-efficient learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/e0f31acbdace2895754365d4e7cb78ab4eb4bd25.zip", "author": "Hoyong Choi;Nohyun Ki;Hye Won Chung", "authorids": "~Hoyong_Choi1;~Nohyun_Ki1;~Hye_Won_Chung2", "gender": "M;F;M", "homepage": ";https://iids.kaist.ac.kr/;https://iids.kaist.ac.kr/people", "dblp": ";https://dblp.uni-trier.de/pers/hd/c/Chung:Hye_Won;", "google_scholar": ";;", "orcid": ";;", "linkedin": "%ED%98%B8%EC%9A%A9-%EC%B5%9C-67b9a919b/;;", "or_profile": "~Hoyong_Choi1;~Hye_Won_Chung2;~Ki_Nohyun1", "aff": "KAIST;Korea Advanced Institute of Science & Technology;KAIST, Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;ee.kaist.ac.kr", "position": "PhD student;Associate Professor;PhD student", "bibtex": "@misc{\nchoi2024bws,\ntitle={{BWS}: Best Window Selection Based on Sample Scores for Data Pruning across Broad Ranges},\nauthor={Hoyong Choi and Nohyun Ki and Hye Won Chung},\nyear={2024},\nurl={https://openreview.net/forum?id=9FXGX00iMF}\n}", "github": "", "project": "", "reviewers": "BL3J;j18Y;LWro", "site": "https://openreview.net/forum?id=9FXGX00iMF", "pdf_size": 1927725, "rating": "5;5;6", "confidence": "4;3;4", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "74;72;104", "wc_strengths": "60;52;101", "wc_weaknesses": "89;86;70", "wc_questions": "90;5;4", "wc_review": "313;215;279", "wc_reply_reviewers": "186;0;9", "wc_reply_authors": "1861;1214;599", "reply_reviewers": "1;0;1", "reply_authors": "4;2;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.33333333333333, 14.636332266733433 ], "wc_strengths_avg": [ 71.0, 21.463146709340332 ], "wc_weaknesses_avg": [ 81.66666666666667, 8.339997335464536 ], "wc_questions_avg": [ 33.0, 40.307154038292836 ], "wc_review_avg": [ 269.0, 40.62839729384691 ], "wc_reply_reviewers_avg": [ 65.0, 85.63877626402656 ], "wc_reply_authors_avg": [ 1224.6666666666667, 515.264549096438 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18175229063289832276&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "9G2IVZIh4H", "title": "FedMef: Towards Memory-efficient Federated Dynamic Pruning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated learning (FL) promotes decentralized training while prioritizing data confidentiality. However, its application on resource-constrained devices is challenging due to the high demand for computation and memory resources for training deep learning models. Neural network pruning techniques, such as dynamic pruning, could enhance model efficiency, but directly adopting them in FL still poses substantial challenges, including post-pruning performance degradation, high activation memory, etc. To address these challenges, we propose FedMef, a novel and memory-efficient federated dynamic pruning framework. FedMef comprises two key components. First, we introduce the budget-aware extrusion that maintains pruning efficiency while preserving post-pruning performance by salvaging crucial information from parameters marked for pruning within a given budget. Second, we propose scaled activation pruning to effectively reduce activation memory, which is particularly beneficial for deploying FL to memory-limited devices. Extensive experiments\ndemonstrate the effectiveness of our proposed FedMef. In particular, it achieves a significant reduction of 28.5\\% in memory footprint compared to state-of-the-art methods while obtaining superior accuracy.", "keywords": "federated learning;memory efficient training;nerual network pruning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Hong Huang;Weiming Zhuang;Chen Chen;Lingjuan Lyu", "authorids": "~Hong_Huang4;~Weiming_Zhuang1;~Chen_Chen20;~Lingjuan_Lyu1", "gender": "M;;M;F", "homepage": "https://little0o0.github.io/;https://weiming.me/;https://cc233.github.io/;https://sites.google.com/view/lingjuan-lyu", "dblp": ";274/0724;65/4423-43;178/9876", "google_scholar": "_E4FBygAAAAJ;lLuLAzEAAAAJ;;", "orcid": ";;0000-0001-7359-8515;", "linkedin": ";;;", "or_profile": "~Hong_Huang4;~Weiming_Zhuang1;~Chen_Chen20;~Lingjuan_Lyu1", "aff": "City University of Hong Kong;Sony Research;Sony AI;Sony", "aff_domain": "my.cityu.edu.hk;sony.com;sony.com;sony.com", "position": "PhD student;Researcher;Researcher;scientist", "bibtex": "@misc{\nhuang2024fedmef,\ntitle={FedMef: Towards Memory-efficient Federated Dynamic Pruning},\nauthor={Hong Huang and Weiming Zhuang and Chen Chen and Lingjuan Lyu},\nyear={2024},\nurl={https://openreview.net/forum?id=9G2IVZIh4H}\n}", "github": "", "project": "", "reviewers": "vuu4;Lm19;fyPU;pgar", "site": "https://openreview.net/forum?id=9G2IVZIh4H", "pdf_size": 1323869, "rating": "3;3;3;6", "confidence": "5;4;1;4", "soundness": "2;2;2;4", "contribution": "2;2;2;3", "presentation": "1;2;2;4", "wc_summary": "41;43;156;61", "wc_strengths": "39;27;127;66", "wc_weaknesses": "228;87;219;47", "wc_questions": "95;53;240;4", "wc_review": "403;210;742;178", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 75.25, 47.26719263929264 ], "wc_strengths_avg": [ 64.75, 38.615896985568 ], "wc_weaknesses_avg": [ 145.25, 79.58132632722327 ], "wc_questions_avg": [ 98.0, 88.08234783428516 ], "wc_review_avg": [ 383.25, 224.29598190783534 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9476448772804489695&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "City University of Hong Kong;Sony;Sony Corporation", "aff_unique_dep": ";Research;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.sony.com;https://www.sony.com", "aff_unique_abbr": "CityU;Sony;Sony", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;Japan" }, { "id": "9GE0N1htnu", "title": "RINGER: Conformer Ensemble Generation of Macrocyclic Peptides with Sequence-Conditioned Internal Coordinate Diffusion", "track": "main", "status": "Reject", "tldr": "", "abstract": "Macrocyclic peptides are an emerging therapeutic modality, yet computational approaches for accurately sampling their diverse 3D ensembles remain challenging due to their conformational diversity and geometric constraints. Here, we introduce RINGER, a diffusion-based transformer model for conditional generation of macrocycle peptides based on redundant internal coordinates. RINGER provides fast backbone- and side-chain sampling while respecting key structural invariances of cyclic peptides. Through extensive benchmarking and analysis against gold-standard conformer ensembles of cyclic peptides generated with metadynamics, we demonstrate how RINGER generates both high-quality and diverse geometries at a fraction of the computational cost. Our work lays the foundation for improved sampling of cyclic geometries and the development of geometric learning methods for peptides.", "keywords": "molecular conformer generation;generative models;diffusion models;internal coordinates;peptides;macrocycles", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/8f122a22d995b8aceee8bbaba9c273b1be18626d.pdf", "author": "Colin A Grambow;Hayley Weir;Nathaniel Lee Diamant;Tommaso Biancalani;Gabriele Scalia;Kangway V. Chuang", "authorids": "~Colin_A_Grambow1;~Hayley_Weir1;~Nathaniel_Lee_Diamant1;~Tommaso_Biancalani1;~Gabriele_Scalia1;~Kangway_V._Chuang1", "gender": "M;F;;M;;M", "homepage": ";;;;;https://www.kangway.com", "dblp": "250/2439;;290/2075;;201/9258;279/6285", "google_scholar": "WfRi2K4AAAAJ;cMK68xcAAAAJ;;https://scholar.google.it/citations?user=s_qd9x0AAAAJ;MxeFvewAAAAJ;HThiDv8AAAAJ", "orcid": "0000-0002-2204-9046;0000-0002-1039-327X;0000-0002-1738-304X;;0000-0003-3305-9220;0000-0002-0652-8071", "linkedin": "cgrambow/;hayley-v-weir/;nathaniel-diamant-6b35b0106;;gabriele-scalia;", "or_profile": "~Colin_A_Grambow1;~Hayley_Weir1;~Nathaniel_Lee_Diamant1;~Tommaso_Biancalani1;~Gabriele_Scalia1;~Kangway_V_Chuang1", "aff": "Genentech;Genentech;genentech;Genentech;Genentech;Genentech Research and Early Development", "aff_domain": "gene.com;gene.com;gene.com;gene.com;gene.com;gene.com", "position": "Researcher;Researcher;Researcher;Director;Researcher;Researcher", "bibtex": "@misc{\ngrambow2024ringer,\ntitle={{RINGER}: Conformer Ensemble Generation of Macrocyclic Peptides with Sequence-Conditioned Internal Coordinate Diffusion},\nauthor={Colin A Grambow and Hayley Weir and Nathaniel Lee Diamant and Tommaso Biancalani and Gabriele Scalia and Kangway V. Chuang},\nyear={2024},\nurl={https://openreview.net/forum?id=9GE0N1htnu}\n}", "github": "", "project": "", "reviewers": "2uae;spjZ;PLmK;yeES", "site": "https://openreview.net/forum?id=9GE0N1htnu", "pdf_size": 4600321, "rating": "5;5;5;8", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "contribution": "2;3;2;3", "presentation": "4;2;2;3", "wc_summary": "66;122;48;95", "wc_strengths": "81;36;71;132", "wc_weaknesses": "184;156;202;76", "wc_questions": "89;21;2;123", "wc_review": "420;335;323;426", "wc_reply_reviewers": "0;0;148;0", "wc_reply_authors": "1114;238;774;809", "reply_reviewers": "0;0;1;0", "reply_authors": "3;1;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 82.75, 28.19020219863632 ], "wc_strengths_avg": [ 80.0, 34.35840508521896 ], "wc_weaknesses_avg": [ 154.5, 48.194916744403656 ], "wc_questions_avg": [ 58.75, 49.21572411333597 ], "wc_review_avg": [ 376.0, 47.23875527572673 ], "wc_reply_reviewers_avg": [ 37.0, 64.08587988004847 ], "wc_reply_authors_avg": [ 733.75, 315.29381138867916 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YQTShisoLqUJ:scholar.google.com/&scioq=RINGER:+Conformer+Ensemble+Generation+of+Macrocyclic+Peptides+with+Sequence-Conditioned+Internal+Coordinate+Diffusion&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Genentech", "aff_unique_dep": "", "aff_unique_url": "https://www.genentech.com", "aff_unique_abbr": "Genentech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9GviaQcGnx", "title": "Constrained Parameter Regularization", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this work, we present constrained parameter regularization (CPR), an alternative to traditional weight decay. Instead of applying a constant penalty uniformly to all parameters, we enforce an upper bound on a statistical measure (e.g., the L2-norm) of parameter groups. Consequently, learning becomes a constraint optimization problem, which we address by an adaptation of the augmented Lagrangian method. This formulation permits varying regularization strengths for each parameter group, eliminating the need for explicit penalty coefficients for regularization terms. CPR only requires two hyperparameters and incurs no measurable runtime overhead. Additionally, we propose a simple but efficient mechanism to adapt the upper bounds during the optimization. We provide empirical evidence of CPR's efficacy in experiments on the ``grokking'' phenomenon, computer vision, and language modeling tasks. Our results demonstrate that CPR counteracts the effects of grokking and consistently matches or outperforms traditional weight decay.", "keywords": "Weight Decay;Parameter Regularization;Augmented Lagrangian;Deep Learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/a0588e30e64db00c13de4705f0b6e52201bfa9f1.zip", "author": "J\u00f6rg K.H. Franke;Michael Hefenbrock;Gregor Koehler;Frank Hutter", "authorids": "~J\u00f6rg_K.H._Franke1;~Michael_Hefenbrock1;~Gregor_Koehler1;~Frank_Hutter1", "gender": ";;M;M", "homepage": ";;;http://ml.informatik.uni-freiburg.de/~hutter/", "dblp": ";;251/8923;89/5383", "google_scholar": ";;b8U4UTAAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ", "orcid": ";;;0000-0002-2037-3694", "linkedin": ";;;frank-hutter-9190b24b/", "or_profile": "~J\u00f6rg_K.H._Franke1;~Michael_Hefenbrock1;~Gregor_Koehler1;~Frank_Hutter1", "aff": ";;German Cancer Research Center (DKFZ);Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": ";;dkfz.de;uni-freiburg.de", "position": ";;PhD student;Full Professor", "bibtex": "@misc{\nfranke2024constrained,\ntitle={Constrained Parameter Regularization},\nauthor={J{\\\"o}rg K.H. Franke and Michael Hefenbrock and Gregor Koehler and Frank Hutter},\nyear={2024},\nurl={https://openreview.net/forum?id=9GviaQcGnx}\n}", "github": "", "project": "", "reviewers": "HpN9;EQkA;VW8f", "site": "https://openreview.net/forum?id=9GviaQcGnx", "pdf_size": 1016545, "rating": "3;5;8", "confidence": "5;2;3", "soundness": "2;3;4", "contribution": "1;2;4", "presentation": "2;2;4", "wc_summary": "66;62;32", "wc_strengths": "39;82;48", "wc_weaknesses": "96;326;47", "wc_questions": "1;2;97", "wc_review": "202;472;224", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "647;1074;401", "reply_reviewers": "0;0;0", "reply_authors": "3;4;1", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 1.247219128924647 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 53.333333333333336, 15.173075568988056 ], "wc_strengths_avg": [ 56.333333333333336, 18.517259216441534 ], "wc_weaknesses_avg": [ 156.33333333333334, 121.62876121853562 ], "wc_questions_avg": [ 33.333333333333336, 45.02098276236192 ], "wc_review_avg": [ 299.3333333333333, 122.42367236590951 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 707.3333333333334, 278.0435617348876 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5636214801906779, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3553407105975230256&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "German Cancer Research Center;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.dkfz.de;https://www.uni-freiburg.de", "aff_unique_abbr": "DKFZ;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "1", "aff_campus_unique": ";Freiburg", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "9Gvs64deOj", "title": "Rendering Wireless Environments Useful for Gradient Estimators: A Zero-Order Stochastic Federated Learning Method", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning (FL) is a novel approach to machine learning that allows multiple edge devices to collaboratively train a model without disclosing their raw data. However, several challenges hinder the practical implementation of this approach, especially when devices and the server communicate over wireless channels, as it suffers from communication and computation bottlenecks in this case. By utilizing a communication-efficient framework, we propose a novel zero-order (ZO) method with two types of gradient estimators, one-point and two-point, that harnesses the nature of the wireless communication channel without requiring the knowledge of the channel state coefficient. It is the first method that includes the wireless channel in the learning algorithm itself instead of wasting resources to analyze it and remove its impact. The two main difficulties of this work are that in FL, the objective function is usually not convex, which makes the extension of FL to ZO methods challenging, and that including the impact of wireless channels requires extra attention. However, we overcome these difficulties and comprehensively analyze the proposed zero-order federated learning (ZOFL) framework. We establish its convergence theoretically, and we prove a convergence rate of $O(\\frac{1}{\\sqrt[3]{K}})$ with the one-point estimate and $O(\\frac{1}{\\sqrt{K}})$ with the two-point one in the nonconvex setting. We further demonstrate the potential of our algorithms with experimental results, taking into account independent and identically distributed (IID) and non-IID device data distributions.", "keywords": "Federated learning;zero-order optimization over wireless channels;gradient estimates;convergence analysis", "primary_area": "optimization", "supplementary_material": "", "author": "Elissa Mhanna;Mohamad Assaad", "authorids": "~Elissa_Mhanna1;~Mohamad_Assaad1", "gender": "F;M", "homepage": "https://l2s.centralesupelec.fr/u/mhanna-elissa/;https://l2s.centralesupelec.fr/u/assaad-mohamad/", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "elissa-mhanna;", "or_profile": "~Elissa_Mhanna1;~Mohamad_Assaad1", "aff": "CentraleSupelec;CentraleSupelec", "aff_domain": "centralesupelec.fr;centralesupelec.fr", "position": "PhD student;Full Professor", "bibtex": "@misc{\nmhanna2024rendering,\ntitle={Rendering Wireless Environments Useful for Gradient Estimators: A Zero-Order Stochastic Federated Learning Method},\nauthor={Elissa Mhanna and Mohamad Assaad},\nyear={2024},\nurl={https://openreview.net/forum?id=9Gvs64deOj}\n}", "github": "", "project": "", "reviewers": "b8PD;ufXa;3zxv;hpcJ;DD4A;eq2v", "site": "https://openreview.net/forum?id=9Gvs64deOj", "pdf_size": 761244, "rating": "3;3;3;3;5;5", "confidence": "4;4;4;3;4;3", "soundness": "3;2;2;2;2;3", "contribution": "2;2;2;2;3;3", "presentation": "2;2;2;2;3;2", "wc_summary": "40;41;84;57;59;72", "wc_strengths": "20;12;51;66;74;54", "wc_weaknesses": "325;339;281;295;43;218", "wc_questions": "4;10;267;15;212;160", "wc_review": "389;402;683;433;388;504", "wc_reply_reviewers": "0;0;0;0;0;0", "wc_reply_authors": "1711;966;656;620;453;848", "reply_reviewers": "0;0;0;0;0;0", "reply_authors": "3;2;1;1;1;2", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 58.833333333333336, 15.720651668709186 ], "wc_strengths_avg": [ 46.166666666666664, 22.74801578648613 ], "wc_weaknesses_avg": [ 250.16666666666666, 100.3367939602528 ], "wc_questions_avg": [ 111.33333333333333, 106.30407120875266 ], "wc_review_avg": [ 466.5, 104.66573778781033 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 875.6666666666666, 408.0386691914818 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.74535599249993 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.25, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12485515710879715834&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "CentraleSup\u00e9lec", "aff_unique_dep": "", "aff_unique_url": "https://www.centralesupelec.fr", "aff_unique_abbr": "CS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "9IUZya8bCN", "title": "PoisoningGuard: Provable Defense against Data Poisoning Attacks to Multi-label Classification", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Different from multi-class classification where each testing input only has a single ground truth label, multi-label classification aims to make predictions for testing inputs with multiple ground-truth labels. Multi-label classification has many real-world applications such as disease detection, object recognition, document classification, just to name a few. Recent studies, however, showed that a multi-label classifier is vulnerable to data-poisoning attacks, where an attacker can poison the training dataset of the multi-label classifier such that the classifier makes predictions as the attacker desires. Existing provable defenses are all designed for multi-class classification and they achieve sub-optimal results when applying their robustness guarantees to multi-label classification (as we will demonstrate in this paper). In this work, we propose PoisoningGuard, the first provable defense against data-poisoning attacks to multi-label classification. In particular, we generalize two state-of-the-art multi-class certification methods, namely bagging and Deep Partition Aggregation (DPA), to multi-label classification. Our major technical contribution is to jointly consider multiple labels when deriving the provable robustness guarantees. We perform comprehensive evaluations on three datasets. Our experimental results show that our generalized methods significantly outperform bagging and DPA when applying them to multi-label classification. The code will be released.", "keywords": "Certified defense;multi-label classification;data poisoning attacks", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/e6b53ad94cd405516d1a52bda6de2faf72333f70.pdf", "author": "yanting wang;Guohao Lan;Jinyuan Jia", "authorids": "~yanting_wang1;~Guohao_Lan1;~Jinyuan_Jia2", "gender": "M;M;", "homepage": "https://wang-yanting.github.io/;https://guohao.netlify.app/;https://jinyuan-jia.github.io/", "dblp": ";178/9755.html;24/5124-1.html", "google_scholar": "ClAr4UYAAAAJ;1ebZN5gAAAAJ;iyg4ytkAAAAJ", "orcid": "0009-0004-1653-1444;;0000-0002-9785-7769", "linkedin": ";;", "or_profile": "~yanting_wang1;~Guohao_Lan1;~Jinyuan_Jia2", "aff": "Pennsylvania State University;Delft University of Technology;Pennsylvania State University", "aff_domain": "ist.psu.edu;tudelft.nl;psu.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nwang2024poisoningguard,\ntitle={PoisoningGuard: Provable Defense against Data Poisoning Attacks to Multi-label Classification},\nauthor={yanting wang and Guohao Lan and Jinyuan Jia},\nyear={2024},\nurl={https://openreview.net/forum?id=9IUZya8bCN}\n}", "github": "", "project": "", "reviewers": "usg5;PaSr;hayp;oRDf", "site": "https://openreview.net/forum?id=9IUZya8bCN", "pdf_size": 631448, "rating": "3;5;5;5", "confidence": "4;3;5;3", "soundness": "2;2;2;2", "contribution": "2;2;2;3", "presentation": "2;3;3;2", "wc_summary": "62;42;64;75", "wc_strengths": "21;54;52;78", "wc_weaknesses": "373;79;312;254", "wc_questions": "10;37;124;223", "wc_review": "466;212;552;630", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.75, 11.903255857117413 ], "wc_strengths_avg": [ 51.25, 20.24073862288627 ], "wc_weaknesses_avg": [ 254.5, 109.7144019716646 ], "wc_questions_avg": [ 98.5, 83.3141644619929 ], "wc_review_avg": [ 465.0, 157.16551784663199 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:p7R9VwucWyMJ:scholar.google.com/&scioq=PoisoningGuard:+Provable+Defense+against+Data+Poisoning+Attacks+to+Multi-label+Classification&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Pennsylvania State University;Delft University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.tudelft.nl", "aff_unique_abbr": "PSU;TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Netherlands" }, { "title": "A Real-World WebAgent with Planning, Long Context Understanding, and Program Synthesis", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19300", "id": "9JQtrumvg8", "author_site": "Izzeddin Gur, Hiroki Furuta, Austin Huang, Mustafa Safdari, Yutaka Matsuo, Douglas Eck, Aleksandra Faust", "tldr": "", "abstract": "Pre-trained large language models (LLMs) have recently achieved better generalization and sample efficiency in autonomous web automation.\nHowever, the performance on real-world websites has still suffered from (1) open domainness, (2) limited context length, and (3) lack of inductive bias on HTML.\nWe introduce WebAgent, an LLM-driven agent that learns from self-experience to complete tasks on real websites following natural language instructions.\nWebAgent plans ahead by decomposing instructions into canonical sub-instructions, summarizes long HTML documents into task-relevant snippets, and acts on websites via Python programs generated from those.\nWe design WebAgent with Flan-U-PaLM, for grounded code generation, and HTML-T5, new pre-trained LLMs for long HTML documents using local and global attention mechanisms and a mixture of long-span denoising objectives, for planning and summarization.\nWe empirically demonstrate that our modular recipe improves the success on real websites by over 50%, and that HTML-T5 is the best model to solve various HTML understanding tasks; achieving 18.7% higher success rate than the prior method on MiniWoB web automation benchmark, and SoTA performance on Mind2Web, an offline task planning evaluation.", "keywords": "Web Navigation;Web Automation;Large Language Models;Language Model Agents;Tool Use;Program Synthesis", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "", "author": "Izzeddin Gur;Hiroki Furuta;Austin V Huang;Mustafa Safdari;Yutaka Matsuo;Douglas Eck;Aleksandra Faust", "authorids": "~Izzeddin_Gur1;~Hiroki_Furuta1;~Austin_V_Huang1;~Mustafa_Safdari1;~Yutaka_Matsuo1;~Douglas_Eck1;~Aleksandra_Faust1", "gender": ";M;;M;M;M;F", "homepage": ";https://github.com/frt03;https://github.com/austinvhuang;;http://ymatsuo.com;;http://www.afaust.info", "dblp": "188/9027;267/2065;;05/7184;m/YMatsuo.html;79/4646;135/8420", "google_scholar": "qS_ugJAAAAAJ;M0OhM1UAAAAJ;;;Dy8iau4AAAAJ;;RK72t68AAAAJ", "orcid": ";;;0009-0002-1604-8685;;;0000-0002-3268-8685", "linkedin": ";;austin-huang-74a75422/;mustafasafdari/;;;aleksandrafaust", "or_profile": "~Izzeddin_Gur1;~Hiroki_Furuta1;~Austin_V_Huang1;~Mustafa_Safdari1;~Yutaka_Matsuo1;~Douglas_Eck1;~Aleksandra_Faust1", "aff": "Google;Google DeepMind;;Research, Google;The University of Tokyo;Google;Google Brain", "aff_domain": "google.com;google.com;;research.google.com;u-tokyo.ac.jp;google.com;google.com", "position": "Research Scientist;Intern;;Researcher;Associate Professor;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\ngur2024a,\ntitle={A Real-World WebAgent with Planning, Long Context Understanding, and Program Synthesis},\nauthor={Izzeddin Gur and Hiroki Furuta and Austin V Huang and Mustafa Safdari and Yutaka Matsuo and Douglas Eck and Aleksandra Faust},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9JQtrumvg8}\n}", "github": "", "project": "", "reviewers": "Jsxu;eCh6;ASiH;Gbpj", "pdf_size": 1816977, "rating": "5;8;8;8", "confidence": "4;3;3;5", "soundness": "4;3;4;3", "contribution": "3;4;4;4", "presentation": "2;2;3;3", "wc_summary": "65;183;61;188", "wc_strengths": "74;144;30;140", "wc_weaknesses": "15;383;59;91", "wc_questions": "218;81;75;52", "wc_review": "372;791;225;471", "wc_reply_reviewers": "0;20;23;0", "wc_reply_authors": "922;678;494;431", "reply_reviewers": "0;1;1;0", "reply_authors": "3;1;1;2", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 124.25, 61.29182245618089 ], "wc_strengths_avg": [ 97.0, 47.634021455258214 ], "wc_weaknesses_avg": [ 137.0, 144.5683229480096 ], "wc_questions_avg": [ 106.5, 65.27825058930425 ], "wc_review_avg": [ 464.75, 207.70216055688974 ], "wc_reply_reviewers_avg": [ 10.75, 10.80219885023415 ], "wc_reply_authors_avg": [ 631.25, 190.82632811014312 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 233, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11247435352141794384&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=9JQtrumvg8", "pdf": "https://openreview.net/pdf?id=9JQtrumvg8", "email": "google.com;google.com;;research.google.com;u-tokyo.ac.jp;google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Google;University of Tokyo", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "Google;UTokyo", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;2;0;0", "aff_country_unique": "United States;United Kingdom;Japan" }, { "id": "9JRsAj3ymy", "title": "Time-Sensitive Replay for Continual Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Continual learning closely emulates the process of human learning, which allows a model to learn for a large number of tasks sequentially without forgetting knowledge obtained from the preceding tasks. Replay-based continual learning methods reintroduce examples from previous tasks to mitigate catastrophic forgetting. However, current replay-based methods often unnecessarily reintroduce training examples, leading to inefficiency, and require task information prior to training, which requires preceding knowledge of the training data stream. We propose a novel replay method, Time-Sensitive Replay (TSR), that reduces the number of replayed examples while maintaining accuracy. TSR detects drift in the model's prediction when learning a task and preemptively prevents forgetting events by reintroducing previously encountered examples to the training set. We extend this method to a task-free setting with Task-Free TSR (TF-TSR). In our experiments on benchmark datasets, our approach trains 23\\% to 25\\% faster than current task-based continual learning methods and 48\\% to 58\\% faster than task-free methods while maintaining accuracy.", "keywords": "Continual Learning;Replay Learning;Task-Free Learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Jack Colin Julian;Yun Sing Koh;Albert Bifet", "authorids": "~Jack_Colin_Julian1;~Yun_Sing_Koh2;~Albert_Bifet1", "gender": "M;;M", "homepage": ";https://profiles.auckland.ac.nz/y-koh;https://albertbifet.com/", "dblp": ";23/1879.html;48/1070", "google_scholar": ";0L38IrAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-7256-4049;0000-0002-8339-7773", "linkedin": "jack-julian-b88878223;yun-sing-koh-a7ba358/;abifet/", "or_profile": "~Jack_Colin_Julian1;~Yun_Sing_Koh2;~Albert_Bifet1", "aff": "University of Auckland;University of Auckland;T\u00e9l\u00e9com Paris", "aff_domain": "auckland.ac.nz;auckland.ac.nz;telecom-paris.fr", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@misc{\njulian2024timesensitive,\ntitle={Time-Sensitive Replay for Continual Learning},\nauthor={Jack Colin Julian and Yun Sing Koh and Albert Bifet},\nyear={2024},\nurl={https://openreview.net/forum?id=9JRsAj3ymy}\n}", "github": "", "project": "", "reviewers": "AYp8;D8wn;Xzud;qppC", "site": "https://openreview.net/forum?id=9JRsAj3ymy", "pdf_size": 877719, "rating": "3;3;3;5", "confidence": "4;3;4;4", "soundness": "2;3;2;3", "contribution": "2;2;2;2", "presentation": "1;2;2;3", "wc_summary": "38;101;55;94", "wc_strengths": "37;53;20;15", "wc_weaknesses": "140;376;154;50", "wc_questions": "188;37;37;100", "wc_review": "403;567;266;259", "wc_reply_reviewers": "21;0;14;0", "wc_reply_authors": "402;729;508;294", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.0, 26.315394733881533 ], "wc_strengths_avg": [ 31.25, 14.972892172189045 ], "wc_weaknesses_avg": [ 180.0, 119.99166637729472 ], "wc_questions_avg": [ 90.5, 61.88901356460612 ], "wc_review_avg": [ 373.75, 125.47783668839689 ], "wc_reply_reviewers_avg": [ 8.75, 9.093266739736606 ], "wc_reply_authors_avg": [ 483.25, 160.79703821899207 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lheoGJz0Go8J:scholar.google.com/&scioq=Time-Sensitive+Replay+for+Continual+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Auckland;T\u00e9l\u00e9com Paris", "aff_unique_dep": ";", "aff_unique_url": "https://www.auckland.ac.nz;https://www.telecom-paris.fr", "aff_unique_abbr": "UoA;T\u00e9l\u00e9com Paris", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "New Zealand;France" }, { "id": "9JxQyat11M", "title": "Zero-Shot Visual Classification with Guided Cropping", "track": "main", "status": "Reject", "tldr": "", "abstract": "Pretrained vision-language models, e.g., CLIP, show promising zero-shot transfer capability across various unseen classification datasets. However, there is an inherent limitation: CLIP image encoders are typically designed to extract generic image-level features that summarize superfluous or confounding information for the target tasks. This results in degradation of classification performance, especially when objects of interest cover small areas of input images. In this work, we propose CLIP with Guided Cropping (GC-CLIP), where we use an off-the-shelf zero-shot object detection model in a preprocessing step to increase the focus of zero-shot classifiers on the object of interest and minimize the influence of extraneous image regions. We empirically show that our approach improves zero-shot performance across architectures and datasets, most favorably for small objects.", "keywords": "zero-shot;open-vocabulary;CLIP;image classification", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/ad135640af6f7ecce8bedd5a8005608a5ffcd566.pdf", "author": "Piyapat Saranrittichai;Mauricio Munoz;Volker Fischer;Chaithanya Kumar Mummadi", "authorids": "~Piyapat_Saranrittichai1;~Mauricio_Munoz1;~Volker_Fischer1;~Chaithanya_Kumar_Mummadi1", "gender": "M;;M;M", "homepage": ";;;", "dblp": "299/7804;245/9820;84/4102-3;208/6386", "google_scholar": "https://scholar.google.de/citations?user=ncbD7EQAAAAJ;;https://scholar.google.de/citations?hl=de;XJLtaG4AAAAJ", "orcid": "0000-0003-0620-7945;;0000-0001-5437-4030;0000-0002-1173-2720", "linkedin": "https://www.linkedin.com/pub/piyapat-saranrittichai/38/684/b41;ammd010289/;;", "or_profile": "~Piyapat_Saranrittichai1;~Mauricio_Munoz1;~Volker_Fischer1;~Chaithanya_Kumar_Mummadi1", "aff": "Albert-Ludwigs-Universit\u00e4t Freiburg;Robert Bosch GmbH, Bosch;Bosch Center for Artificial Intelligence;Bosch Center for Artificial Intelligence", "aff_domain": "uni-freiburg.de;de.bosch.com;bosch.com;bosch.com", "position": "PhD student;Researcher;Postdoc;Researcher", "bibtex": "@misc{\nsaranrittichai2024zeroshot,\ntitle={Zero-Shot Visual Classification with Guided Cropping},\nauthor={Piyapat Saranrittichai and Mauricio Munoz and Volker Fischer and Chaithanya Kumar Mummadi},\nyear={2024},\nurl={https://openreview.net/forum?id=9JxQyat11M}\n}", "github": "", "project": "", "reviewers": "EzG9;tiUR;LrWg;KVmG", "site": "https://openreview.net/forum?id=9JxQyat11M", "pdf_size": 5447527, "rating": "3;5;5;6", "confidence": "5;3;4;4", "soundness": "2;3;3;2", "contribution": "1;2;2;2", "presentation": "2;3;4;3", "wc_summary": "69;95;70;72", "wc_strengths": "8;48;50;26", "wc_weaknesses": "200;162;219;157", "wc_questions": "1;3;52;96", "wc_review": "278;308;391;351", "wc_reply_reviewers": "0;0;0;353", "wc_reply_authors": "603;491;610;1016", "reply_reviewers": "0;0;0;2", "reply_authors": "1;1;1;3", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.5, 10.735455276791944 ], "wc_strengths_avg": [ 33.0, 17.233687939614086 ], "wc_weaknesses_avg": [ 184.5, 25.947061490658243 ], "wc_questions_avg": [ 38.0, 39.223717314910374 ], "wc_review_avg": [ 332.0, 42.81938813201328 ], "wc_reply_reviewers_avg": [ 88.25, 152.85348376795343 ], "wc_reply_authors_avg": [ 680.0, 199.65344975732324 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ipEt37xNIrcJ:scholar.google.com/&scioq=Zero-Shot+Visual+Classification+with+Guided+Cropping&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Albert-Ludwigs-Universit\u00e4t Freiburg;Robert Bosch GmbH;Bosch Center for Artificial Intelligence", "aff_unique_dep": ";;Center for Artificial Intelligence", "aff_unique_url": "https://www.uni-freiburg.de;https://www.bosch.com;https://www.bosch-ai.com", "aff_unique_abbr": "Albert-Ludwigs-Universit\u00e4t;Bosch;BCAI", "aff_campus_unique_index": "0", "aff_campus_unique": "Freiburg;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "9KVT1e1qf7", "title": "LoRAPrune: Pruning Meets Low-Rank Parameter-Efficient Fine-Tuning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large pre-trained models (LPMs), such as LLaMA and GLM, have shown exceptional performance across various tasks through fine-tuning. Although low-rank adaption (LoRA) has emerged to cheaply fine-tune these LPMs on downstream tasks, their deployment is still hindered by the vast model scale and computational costs. Neural network pruning offers a way to compress LPMs. However, the current pruning methods designed for LPMs are not compatible with LoRA. This is due to their utilization of unstructured pruning on LPMs, impeding the merging of LoRA weights, or their dependence on the gradients of pre-trained weights to guide pruning, which can impose significant memory overhead.\nTo this end, we propose LoRAPrune, a new framework that delivers an accurate, compact model for efficient inference in a highly memory-effective manner. Specifically, we first design a LoRA-guided pruning criterion, which uses the weights and gradients of LoRA, rather than the gradients of pre-trained weights for importance estimation. We then propose a structured iterative pruning procedure, to remove redundant channels and heads. \nExtensive experimental results demonstrate the superior performance of our LoRAPrune over existing approaches on the LLaMA series models.\nFor instance, at a 50\\% compression rate, LoRAPrune outperforms LLM-Pruner by a perplexity reduction of 8.0 on WikiText2 and 16.05 on PTB datasets, while concurrently reducing memory usage by 52.6\\%.", "keywords": "Neural Network Pruning;Parameter Efficient Tuning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/874a30065f9392ccee03ceaeff0c4a4c359b960d.zip", "author": "Mingyang Zhang;Hao Chen;Chunhua Shen;Zhen Yang;Linlin Ou;Xinyi Yu;Bohan Zhuang", "authorids": "~Mingyang_Zhang3;~Hao_Chen17;~Chunhua_Shen2;~Zhen_Yang15;~Linlin_Ou1;~Xinyi_Yu1;~Bohan_Zhuang1", "gender": "M;;;M;F;M;M", "homepage": "https://www.researchgate.net/profile/Mingyang_Zhang25;;;https://zhenyangcs.github.io/;https://www.researchgate.net/profile/Linlin_Ou;https://www.researchgate.net/profile/Yu_Xinyi;https://bohanzhuang.github.io/", "dblp": "76/4874-7.html;;;;17/7817.html;;145/1096", "google_scholar": ";;;Vm1moSIAAAAJ;;;https://scholar.google.com.au/citations?user=DFuDBBwAAAAJ", "orcid": ";;;;;0000-0001-8716-7687;", "linkedin": ";;;;;;bohan-zhuang/", "or_profile": "~Mingyang_Zhang3;~Hao_Chen17;~Chunhua_Shen2;~Zhen_Yang15;~Linlin_Ou1;~Xinyi_Yu1;~Bohan_Zhuang1", "aff": "Zhejiang University;;;Hong Kong University of Science and Technology;Zhejiang University of Technology;Zhejiang University of Technology;Monash University", "aff_domain": "zju.edu.cn;;;connect.hkust-gz.edu.cn;zjut.edu.cn;zjut.edu.cn;monash.edu", "position": "Postdoc;;;PhD student;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@misc{\nzhang2024loraprune,\ntitle={Lo{RAP}rune: Pruning Meets Low-Rank Parameter-Efficient Fine-Tuning},\nauthor={Mingyang Zhang and Hao Chen and Chunhua Shen and Zhen Yang and Linlin Ou and Xinyi Yu and Bohan Zhuang},\nyear={2024},\nurl={https://openreview.net/forum?id=9KVT1e1qf7}\n}", "github": "", "project": "", "reviewers": "C5GK;nG8N;nuRr;pQXa;RcVc", "site": "https://openreview.net/forum?id=9KVT1e1qf7", "pdf_size": 691160, "rating": "5;5;5;5;6", "confidence": "5;4;4;4;2", "soundness": "2;2;2;2;3", "contribution": "3;2;2;2;3", "presentation": "2;3;2;3;2", "wc_summary": "50;100;104;78;80", "wc_strengths": "67;68;60;64;36", "wc_weaknesses": "67;269;145;189;236", "wc_questions": "18;55;5;6;7", "wc_review": "202;492;314;337;359", "wc_reply_reviewers": "0;554;195;0;43", "wc_reply_authors": "275;1315;781;356;453", "reply_reviewers": "0;2;1;0;1", "reply_authors": "2;4;3;2;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 82.4, 19.24162155328911 ], "wc_strengths_avg": [ 59.0, 11.832159566199232 ], "wc_weaknesses_avg": [ 181.2, 70.88695225498131 ], "wc_questions_avg": [ 18.2, 18.988417522268673 ], "wc_review_avg": [ 340.8, 92.94170215785807 ], "wc_reply_reviewers_avg": [ 158.4, 210.39828896642672 ], "wc_reply_authors_avg": [ 636.0, 380.59847608733276 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9185586535436918, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13109504067265056757&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "Zhejiang University;Hong Kong University of Science and Technology;Zhejiang University of Technology;Monash University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.ust.hk;https://www.zjut.edu.cn;https://www.monash.edu", "aff_unique_abbr": "ZJU;HKUST;ZJUT;Monash", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Australia" }, { "id": "9Kgnvknvwd", "title": "A First-Order Multi-Gradient Algorithm for Multi-Objective Bi-Level Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we study the Multi-Objective Bi-Level Optimization (MOBLO) problem, where the upper-level subproblem is a multi-objective optimization problem and the lower-level subproblem is for scalar optimization. Existing gradient-based MOBLO algorithms need to compute the Hessian matrix, causing the computational inefficient problem. To address this, we propose an efficient first-order multi-gradient method for MOBLO, called FORUM. Specifically, we reformulate MOBLO problems as a constrained multi-objective optimization (MOO) problem via the value-function approach. Then we propose a novel multi-gradient aggregation method to solve the challenging constrained MOO problem. Theoretically, we provide the complexity analysis to show the efficiency of the proposed method and a non-asymptotic convergence result. Empirically, extensive experiments demonstrate the effectiveness and efficiency of the proposed FORUM method in different learning problems. In particular, it achieves state-of-the-art performance on three multi-task learning benchmark datasets.", "keywords": "Multi-Objective Bi-Level Optimization;Multi-Task Learning", "primary_area": "optimization", "supplementary_material": "", "author": "Feiyang Ye;Baijiong Lin;Xiaofeng Cao;Yu Zhang;Ivor Tsang", "authorids": "~Feiyang_Ye4;~Baijiong_Lin1;~Xiaofeng_Cao2;~Yu_Zhang3;~Ivor_Tsang1", "gender": "M;M;M;M;M", "homepage": "https://feiyang-ye.github.io/;https://baijiong-lin.github.io/;http://cse.sustech.edu.cn/faculty/~zhangy/;https://www.a-star.edu.sg/cfar/about-cfar/management/prof-ivor-tsang;https://xiaofengcaoml.github.io/", "dblp": "285/4704;279/2950;50/671-6;35/5873;117/3982-2.html", "google_scholar": "3EX25cAAAAAJ;KVdbYTYAAAAJ;https://scholar.google.com.hk/citations?user=jaRS5w4AAAAJ;rJMOlVsAAAAJ;", "orcid": ";0000-0002-4257-0226;;;", "linkedin": ";;;;", "or_profile": "~Feiyang_Ye4;~Baijiong_Lin1;~Yu_Zhang3;~Ivor_W_Tsang1;~Xiaofeng_Cao1", "aff": "University of Technology Sydney;The Hong Kong University of Science and Technology (Guangzhou);Southern University of Science and Technology;A*STAR;Jilin University", "aff_domain": "uts.edu.au;connect.hkust-gz.edu.cn;sustc.edu.cn;cfar.a-star.edu.sg;jlu.edu.cn", "position": "PhD student;PhD student;Associate Professor;Principal Researcher;Associate Professor", "bibtex": "@misc{\nye2024a,\ntitle={A First-Order Multi-Gradient Algorithm for Multi-Objective Bi-Level Optimization},\nauthor={Feiyang Ye and Baijiong Lin and Xiaofeng Cao and Yu Zhang and Ivor Tsang},\nyear={2024},\nurl={https://openreview.net/forum?id=9Kgnvknvwd}\n}", "github": "", "project": "", "reviewers": "xYWk;6mPv;nzzn;MMwK", "site": "https://openreview.net/forum?id=9Kgnvknvwd", "pdf_size": 506917, "rating": "3;5;6;6", "confidence": "3;4;3;3", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "79;67;46;62", "wc_strengths": "50;35;37;126", "wc_weaknesses": "390;202;78;132", "wc_questions": "40;8;3;76", "wc_review": "559;312;164;396", "wc_reply_reviewers": "0;4;17;7", "wc_reply_authors": "1381;653;356;726", "reply_reviewers": "0;1;1;1", "reply_authors": "3;1;2;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.5, 11.84271928232701 ], "wc_strengths_avg": [ 62.0, 37.39652390263031 ], "wc_weaknesses_avg": [ 200.5, 117.909923246519 ], "wc_questions_avg": [ 31.75, 29.22648627529488 ], "wc_review_avg": [ 357.75, 142.82572422361457 ], "wc_reply_reviewers_avg": [ 7.0, 6.284902544988268 ], "wc_reply_authors_avg": [ 779.0, 374.17175200701615 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12337236500024581689&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Technology Sydney;Hong Kong University of Science and Technology;Southern University of Science and Technology;Agency for Science, Technology and Research;Jilin University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.uts.edu.au;https://www.ust.hk;https://www.sustech.edu.cn;https://www.a-star.edu.sg;http://www.jlu.edu.cn", "aff_unique_abbr": "UTS;HKUST;SUSTech;A*STAR;JLU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Guangzhou", "aff_country_unique_index": "0;1;1;2;1", "aff_country_unique": "Australia;China;Singapore" }, { "id": "9Klj7QG0NO", "title": "ONE-PEACE: Exploring One General Representation Model Toward Unlimited Modalities", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this work, we propose ONE-PEACE, a highly extensible model with 4B parameters that seamlessly aligns and integrates representations across vision, audio, and language modalities. The ONE-PEACE architecture consists of shared self-attention layers, modality adapters and FFNs. This design allows for multi-modal fusion through self-attention layers, while also providing the flexibility to easily incorporate new modalities. Two modality-agnostic pretraining tasks, cross-modal aligning contrast and intra-modal denoising contrast, are developed to align the semantic space of different modalities and capture fine-grained details within each modality simultaneously. With the scaling-friendly architecture and tasks, ONE-PEACE has the potential to expand to unlimited modalities. Without utilizing any vision or language pretrained model for initialization, ONE-PEACE achieves new SOTAs across a wide range of uni-modal and cross-modal tasks. Furthermore, we show that ONE-PEACE possesses a strong emergent retrieval capability, enabling it to align modalities that are not paired in the training data.", "keywords": "We propose a scalable way for building a general representation model toward unlimited modalities.", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/e7177c8d49e3d4fb104722f7ff0ea6e28a202e0e.zip", "author": "Peng Wang;Shijie Wang;Junyang Lin;Shuai Bai;Xiaohuan Zhou;Jingren Zhou;Xinggang Wang;Chang Zhou", "authorids": "~Peng_Wang20;~Shijie_Wang1;~Junyang_Lin1;~Shuai_Bai1;~Xiaohuan_Zhou1;~Jingren_Zhou1;~Xinggang_Wang1;~Chang_Zhou2", "gender": "M;M;M;M;F;M;M;M", "homepage": ";https://github.com/simonJJJ;;;;;https://xwcv.github.io/index.htm;", "dblp": "95/4442-28;;215/3823;208/8033;217/2489;84/2644;95/3056;", "google_scholar": "7fjqA0YAAAAJ;DuAqyTwAAAAJ;qp6IwtgAAAAJ;ylhI1JsAAAAJ;;;qNCTLV0AAAAJ;QeSoG3sAAAAJ", "orcid": ";;;;;;0000-0001-6732-7823;", "linkedin": ";;;;;;;", "or_profile": "~Peng_Wang20;~Shijie_Wang1;~Junyang_Lin1;~Shuai_Bai1;~Xiaohuan_Zhou1;~Jingren_Zhou1;~Xinggang_Wang1;~Chang_Zhou2", "aff": "Alibaba Group;;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Huazhong University of Science and Technology;Alibaba Group", "aff_domain": "alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;hust.edu.cn;alibaba-inc.com", "position": "Researcher;;Principal Researcher;Senior Engineer;Researcher;Researcher;Full Professor;Researcher", "bibtex": "@misc{\nwang2024onepeace,\ntitle={{ONE}-{PEACE}: Exploring One General Representation Model Toward Unlimited Modalities},\nauthor={Peng Wang and Shijie Wang and Junyang Lin and Shuai Bai and Xiaohuan Zhou and Jingren Zhou and Xinggang Wang and Chang Zhou},\nyear={2024},\nurl={https://openreview.net/forum?id=9Klj7QG0NO}\n}", "github": "", "project": "", "reviewers": "zvUR;DHvY;9Evr", "site": "https://openreview.net/forum?id=9Klj7QG0NO", "pdf_size": 0, "rating": "5;6;8", "confidence": "4;4;2", "soundness": "2;4;3", "contribution": "2;2;3", "presentation": "2;3;3", "wc_summary": "39;69;46", "wc_strengths": "29;78;50", "wc_weaknesses": "129;197;84", "wc_questions": "3;38;55", "wc_review": "200;382;235", "wc_reply_reviewers": "11;11;0", "wc_reply_authors": "459;978;898", "reply_reviewers": "1;1;0", "reply_authors": "3;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 51.333333333333336, 12.81492185782739 ], "wc_strengths_avg": [ 52.333333333333336, 20.07209228976613 ], "wc_weaknesses_avg": [ 136.66666666666666, 46.449494674921446 ], "wc_questions_avg": [ 32.0, 21.64871050817269 ], "wc_review_avg": [ 272.3333333333333, 78.8514778273404 ], "wc_reply_reviewers_avg": [ 7.333333333333333, 5.185449728701348 ], "wc_reply_authors_avg": [ 778.3333333333334, 228.15248312379936 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 143, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15486176021242692169&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Alibaba Group;Huazhong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;http://www.hust.edu.cn", "aff_unique_abbr": "Alibaba;HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9L9j5bQPIY", "title": "Metanetwork: A novel approach to interpreting ANNs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent work on mechanistic interpretability, which attempts to demystify the black box of artificial neural network (ANN) models through analytical approaches, has made it possible to give a qualitative interpretation of how each component of the model works, even without using the dataset the model was trained on. However, it is also desirable from the viewpoint of interpretability to understand the ability of the entire model; and considering the previous studies on task embedding, the ability of the entire model should also be represented by a vector. In this study we propose a novel approach to quantitatively interpreting an unseen ANN's ability based on relationships with other ANNs through obtaining a low-dimensional representation of ANNs by training a \"metanetwork\" that autoencodes ANNs. As a first-ever attempt of such an approach, we train a \"metanetwork\" to autoencode ANNs consisting of one fully-connected layer. We demonstrate the validity of our proposed approach by showing that a simple k-Nearest Neighbor classifier can successfully predict properties of the training datasets of unseen models from their embedded representations.", "keywords": "AI interpretability;Model representation;Model capability;Autoencoder;Meta learning", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "/attachment/6cece3e9fe0474793ffa192016303a5b791bfaf1.zip", "author": "Ryota Takatsuki;Ippei Fujisawa;Ryota Kanai", "authorids": "~Ryota_Takatsuki1;~Ippei_Fujisawa1;~Ryota_Kanai1", "gender": "M;;M", "homepage": ";;", "dblp": ";;", "google_scholar": "https://scholar.google.co.jp/citations?user=RTxFWIoAAAAJ;VR-c7ckAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "ryota-takatsuki-8a4697212/;;ryota-kanai-3585979/", "or_profile": "~Ryota_Takatsuki1;~Ippei_Fujisawa1;~Ryota_Kanai1", "aff": "Araya Inc.;Araya;Araya, Inc.", "aff_domain": "araya.org;research.araya.org;research.araya.org", "position": "Intern;Researcher;Principal Researcher", "bibtex": "@misc{\ntakatsuki2024metanetwork,\ntitle={Metanetwork: A novel approach to interpreting {ANN}s},\nauthor={Ryota Takatsuki and Ippei Fujisawa and Ryota Kanai},\nyear={2024},\nurl={https://openreview.net/forum?id=9L9j5bQPIY}\n}", "github": "", "project": "", "reviewers": "aLkj;3Bf5;rUtA;Bzy7", "site": "https://openreview.net/forum?id=9L9j5bQPIY", "pdf_size": 702501, "rating": "1;3;3;3", "confidence": "4;3;4;4", "soundness": "1;1;2;3", "contribution": "2;1;2;2", "presentation": "1;1;1;3", "wc_summary": "75;42;79;37", "wc_strengths": "5;8;29;59", "wc_weaknesses": "123;70;148;227", "wc_questions": "8;163;31;75", "wc_review": "211;283;287;398", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "127;564;389;647", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_avg": [ 58.25, 18.886172190256023 ], "wc_strengths_avg": [ 25.25, 21.568205766822608 ], "wc_weaknesses_avg": [ 142.0, 56.581799193733666 ], "wc_questions_avg": [ 69.25, 59.23839548806163 ], "wc_review_avg": [ 294.75, 66.84450239174498 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 431.75, 199.07457773407432 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VZmkChpQtSQJ:scholar.google.com/&scioq=Metanetwork:+A+novel+approach+to+interpreting+ANNs&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Araya Inc.;Araya;Araya, Inc.", "aff_unique_dep": ";;", "aff_unique_url": ";;", "aff_unique_abbr": ";;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "id": "9NKRfhKgzI", "title": "Adversarially Robust and Privacy-Preserving Representation Learning via Information Theory", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Machine learning models are vulnerable to both security (e.g., adversarial examples) attacks and privacy (e.g., private attribute inference) attacks. In this paper, we aim to mitigate both the security and privacy attacks, and maintain utility of the primary task simultaneously.\nParticularly, we propose an information-theoretical framework to achieve the goals through the lens of representation learning, i.e., learning representations that are robust to both adversarial examples and attribute inference adversaries. We also derive novel theoretical results, i.e., the inherent tradeoff between adversarial robustness/utility and attribute privacy, as well as guaranteed attribute privacy leakage against attribute inference adversaries.", "keywords": "Representation learning;adversarially robust;privacy-preserving;information theory", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Binghui Zhang;Sayedeh Leila Noorbakhsh;Yun Dong;Yuan Hong;Binghui Wang", "authorids": "~Binghui_Zhang1;~Sayedeh_Leila_Noorbakhsh1;~Yun_Dong1;~Yuan_Hong1;~Binghui_Wang2", "gender": "M;F;F;M;M", "homepage": ";;;https://yhongcs.github.io/;https://wangbinghui.net", "dblp": ";;;79/5433-1;123/7149", "google_scholar": ";;;KJuZW2wAAAAJ;SoOztcEAAAAJ", "orcid": ";;;;0000-0001-5616-060X", "linkedin": "binghui-zhang-68b20a108;leilynourbakhsh/;yun-dong/;;", "or_profile": "~Binghui_Zhang1;~Sayedeh_Leila_Noorbakhsh1;~Yun_Dong1;~Yuan_Hong1;~Binghui_Wang2", "aff": "Illinois Institute of Technology;Illinois Institute of Technology;Milwaukee School of Engineering;University of Connecticut;Illinois Institute of Technology", "aff_domain": "iit.edu;iit.edu;msoe.edu;uconn.edu;iit.edu", "position": "PhD student;MS student;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@misc{\nzhang2024adversarially,\ntitle={Adversarially Robust and Privacy-Preserving Representation Learning via Information Theory},\nauthor={Binghui Zhang and Sayedeh Leila Noorbakhsh and Yun Dong and Yuan Hong and Binghui Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=9NKRfhKgzI}\n}", "github": "", "project": "", "reviewers": "qq4Y;jNV8;qrh8", "site": "https://openreview.net/forum?id=9NKRfhKgzI", "pdf_size": 1104626, "rating": "3;3;5", "confidence": "4;3;3", "soundness": "2;2;3", "contribution": "1;2;2", "presentation": "2;2;2", "wc_summary": "44;123;73", "wc_strengths": "47;29;55", "wc_weaknesses": "350;161;345", "wc_questions": "5;442;108", "wc_review": "446;755;581", "wc_reply_reviewers": "721;330;0", "wc_reply_authors": "502;689;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 80.0, 32.629230249374054 ], "wc_strengths_avg": [ 43.666666666666664, 10.873004286866726 ], "wc_weaknesses_avg": [ 285.3333333333333, 87.94063654281538 ], "wc_questions_avg": [ 185.0, 186.5279246297097 ], "wc_review_avg": [ 594.0, 126.48320046551636 ], "wc_reply_reviewers_avg": [ 350.3333333333333, 294.69796213901594 ], "wc_reply_authors_avg": [ 397.0, 290.9169411819578 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 0.6666666666666666, 0.4714045207910317 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:owgVDhfyracJ:scholar.google.com/&scioq=Adversarially+Robust+and+Privacy-Preserving+Representation+Learning+via+Information+Theory&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Illinois Institute of Technology;Milwaukee School of Engineering;University of Connecticut", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iit.edu;https://www.msoe.edu;https://www.uconn.edu", "aff_unique_abbr": "IIT;MSOE;UConn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9NiprOP4OL", "title": "Zero-shot Inversion Process for Image Attribute Editing with Diffusion Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Denoising diffusion models have shown outstanding performance in image editing. Existing works tend to use either image-guided methods, which provide a visual reference but lack control over semantic coherence, or text-guided methods, which ensure faithfulness to text guidance but lack visual quality. To address the problem, we propose the Zero-shot Inversion Process (ZIP), a framework that injects a fusion of generated visual reference and text guidance into the semantic latent space of a frozen pre-trained diffusion model. Only using a tiny neural network, the proposed ZIP produces diverse content and attributes under the intuitive control of the text prompt. Moreover, ZIP shows remarkable robustness for both in-domain and out-of-domain attribute manipulation on real images. We perform detailed experiments on various benchmark datasets. Compared to state-of-the-art methods, ZIP produces images of equivalent quality while providing a realistic editing effect.", "keywords": "Diffusion Models; Attribute Editing; Zero Shot", "primary_area": "generative models", "supplementary_material": "/attachment/88ef13782f9e35b9d1490bae22b8f160767a5336.zip", "author": "Zhanbo Feng;Zenan Ling;Feng Zhou;Ci Gong;Jie LI;Robert C Qiu", "authorids": "~Zhanbo_Feng1;~Zenan_Ling1;~Feng_Zhou9;~Ci_Gong1;~Jie_LI12;~Robert_C_Qiu1", "gender": "M;M;;M;M;", "homepage": "http://SadAngel.cn/;https://scholar.google.com/citations?user=BabePTkAAAAJ&hl=zh-CN;;https://github.com/HomuraToHikari;https://www.cs.sjtu.edu.cn/~lijie;", "dblp": "234/7758;183/7798;;;17/2703-2.html;", "google_scholar": ";BabePTkAAAAJ;;;Krl5HRcAAAAJ;", "orcid": ";;;;0000-0002-4974-6116;", "linkedin": ";;;;;", "or_profile": "~Zhanbo_Feng1;~Zenan_Ling1;~Feng_Zhou9;~Ci_Gong1;~Jie_LI12;~Robert_C_Qiu1", "aff": "Shanghai Jiaotong University;Huazhong University of Science and Technology;;Huazhong University of Science and Technology;Shanghai Jiaotong University;", "aff_domain": "sjtu.edu.cn;hust.edu.cn;;hust.edu.cn;cs.sjtu.edu.cn;", "position": "PhD student;Researcher;;MS student;Full Professor;", "bibtex": "@misc{\nfeng2024zeroshot,\ntitle={Zero-shot Inversion Process for Image Attribute Editing with Diffusion Models},\nauthor={Zhanbo Feng and Zenan Ling and Feng Zhou and Ci Gong and Jie LI and Robert C Qiu},\nyear={2024},\nurl={https://openreview.net/forum?id=9NiprOP4OL}\n}", "github": "", "project": "", "reviewers": "VELF;nLmB;pQWL;L9gz", "site": "https://openreview.net/forum?id=9NiprOP4OL", "pdf_size": 7482303, "rating": "3;3;3;3", "confidence": "4;5;5;4", "soundness": "3;3;1;2", "contribution": "2;2;1;1", "presentation": "2;2;3;2", "wc_summary": "112;115;79;48", "wc_strengths": "79;32;45;16", "wc_weaknesses": "131;83;165;136", "wc_questions": "3;75;29;6", "wc_review": "325;305;318;206", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.5, 27.31757675929547 ], "wc_strengths_avg": [ 43.0, 23.18404623873926 ], "wc_weaknesses_avg": [ 128.75, 29.431063521388417 ], "wc_questions_avg": [ 28.25, 28.80429655450728 ], "wc_review_avg": [ 288.5, 48.16897341650536 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZroxgevmYEgJ:scholar.google.com/&scioq=Zero-shot+Inversion+Process+for+Image+Attribute+Editing+with+Diffusion+Models&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Huazhong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.hust.edu.cn", "aff_unique_abbr": "SJTU;HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Neural Field Classifiers via Target Encoding and Classification Loss", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19299", "id": "9NqC72m31m", "author_site": "Xindi Yang, Zeke Xie, Xiong Zhou, Boyu Liu, Buhua Liu, Yi Liu, Haoran Wang, YUNFENG CAI, Mingming Sun", "tldr": "", "abstract": "Neural field methods have seen great progress in various long-standing tasks in computer vision and computer graphics, including novel view synthesis and geometry reconstruction. As existing neural field methods try to predict some coordinate-based continuous target values, such as RGB for Neural Radiance Field (NeRF), all of these methods are regression models and are optimized by some regression loss. However, are regression models really better than classification models for neural field methods? In this work, we try to visit this very fundamental but overlooked question for neural fields from a machine learning perspective. We successfully propose a novel Neural Field Classifier (NFC) framework which formulates existing neural field methods as classification tasks rather than regression tasks. The proposed NFC can easily transform arbitrary Neural Field Regressor (NFR) into its classification variant via employing a novel Target Encoding module and optimizing a classification loss. By encoding a continuous regression target into a high-dimensional discrete encoding, we naturally formulate a multi-label classification task. Extensive experiments demonstrate the impressive effectiveness of NFC at the nearly free extra computational costs. Moreover, NFC also shows robustness to sparse inputs, corrupted images, and dynamic scenes.", "keywords": "Neural Fields;NeRF;3D Vision;Scene Reconstruction", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Xindi Yang;Zeke Xie;Xiong Zhou;Boyu Liu;Buhua Liu;Yi Liu;Haoran Wang;YUNFENG CAI;Mingming Sun", "authorids": "~Xindi_Yang1;~Zeke_Xie1;~Xiong_Zhou3;~Boyu_Liu3;~Buhua_Liu1;~Yi_Liu40;~Haoran_Wang2;~YUNFENG_CAI1;~Mingming_Sun1", "gender": ";M;M;M;;;M;M;M", "homepage": ";https://sites.google.com/view/zeke-xie;https://hitcszx.github.io/;https://github.com/pascalliu;;http://faculty.bjtu.edu.cn/8546/;;https://www.bimsa.cn/detail/yfcai.html;", "dblp": ";210/1039;;143/0142;;;382/4779.html;133/8201;87/8665-1.html", "google_scholar": ";https://scholar.google.co.jp/citations?user=ysXmZCMAAAAJ;BMGootgAAAAJ;;;;xfnL2IEAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;0000-0002-0856-6696;;;;0000-0002-6098-4772;;", "linkedin": ";;;;;;;;", "or_profile": "~Xindi_Yang1;~Zeke_Xie1;~Xiong_Zhou3;~Boyu_Liu3;~Buhua_Liu1;~Yi_Liu40;~Haoran_Wang2;~YUNFENG_CAI1;~Mingming_Sun1", "aff": ";Baidu;Harbin Institute of Technology;Beihang University;;Beijing Jiaotong University;Baidu;Baidu Research;Baidu", "aff_domain": ";baidu.com;hit.edu.cn;buaa.edu.cn;;bjtu.edu.cn;baidu.com;baidu.com;baidu.com", "position": ";Researcher;PhD student;PhD student;;Associate Professor;Researcher;Resseacher;Principal Researcher", "bibtex": "@inproceedings{\nyang2024neural,\ntitle={Neural Field Classifiers via Target Encoding and Classification Loss},\nauthor={Xindi Yang and Zeke Xie and Xiong Zhou and Boyu Liu and Buhua Liu and Yi Liu and Haoran Wang and YUNFENG CAI and Mingming Sun},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9NqC72m31m}\n}", "github": "", "project": "", "reviewers": "fXmy;WcH4;zn3A;dKd6", "pdf_size": 8584881, "rating": "6;6;6;8", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "43;93;66;64", "wc_strengths": "54;101;21;39", "wc_weaknesses": "54;59;116;157", "wc_questions": "24;90;2;24", "wc_review": "175;343;205;284", "wc_reply_reviewers": "17;50;152;77", "wc_reply_authors": "267;550;888;858", "reply_reviewers": "1;1;2;3", "reply_authors": "1;1;3;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.5, 17.755280904564703 ], "wc_strengths_avg": [ 53.75, 29.67637949615822 ], "wc_weaknesses_avg": [ 96.5, 42.58227330709341 ], "wc_questions_avg": [ 35.0, 33.0 ], "wc_review_avg": [ 251.75, 66.03550181531143 ], "wc_reply_reviewers_avg": [ 74.0, 49.794578018093496 ], "wc_reply_authors_avg": [ 640.75, 253.10805498837843 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:01FbbTUi7vUJ:scholar.google.com/&scioq=Neural+Field+Classifiers+via+Target+Encoding+and+Classification+Loss&hl=en&as_sdt=0,33", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=9NqC72m31m", "pdf": "https://openreview.net/pdf?id=9NqC72m31m", "email": ";baidu.com;hit.edu.cn;buaa.edu.cn;;bjtu.edu.cn;baidu.com;baidu.com;baidu.com", "author_num": 9, "aff_unique_index": "0;1;2;3;0;0;0", "aff_unique_norm": "Baidu;Harbin Institute of Technology;Beihang University;Beijing Jiao Tong University", "aff_unique_dep": "Baidu, Inc.;;;", "aff_unique_url": "https://www.baidu.com;http://www.hit.edu.cn/;http://www.buaa.edu.cn/;http://www.njtu.edu.cn/en", "aff_unique_abbr": "Baidu;HIT;BUAA;BJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Understanding Factual Knowledge of Large Language Models", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19298", "id": "9OevMUdods", "author_site": "Xuming Hu, Junzhe Chen, Xiaochuan Li, Yufei Guo, Lijie Wen, Philip Yu, Zhijiang Guo", "tldr": "", "abstract": "Large language models (LLMs) have recently driven striking performance improvements across a range of natural language processing tasks. The factual knowledge acquired during pretraining and instruction tuning can be useful in various downstream tasks, such as question answering, and language generation. Unlike conventional Knowledge Bases (KBs) that explicitly store factual knowledge, LLMs implicitly store facts in their parameters. Content generated by the LLMs can often exhibit inaccuracies or deviations from the truth, due to facts that can be incorrectly induced or become obsolete over time. To this end, we aim to explore the extent and scope of factual knowledge within LLMs by designing the benchmark Pinocchio. Pinocchio contains 20K diverse factual questions that span different sources, timelines, domains, regions, and languages. Furthermore, we investigate whether LLMs can compose multiple facts, update factual knowledge temporally, reason over multiple pieces of facts, identify subtle factual differences, and resist adversarial examples. Extensive experiments on different sizes and types of LLMs show that existing LLMs still lack factual knowledge and suffer from various spurious correlations. We believe this is a critical bottleneck for realizing trustworthy artificial intelligence. The dataset Pinocchio and our codes are publicly available at: https://github.com/THU-BPM/Pinocchio.", "keywords": "Large Language Models;Resource and Evaluation;Interpretability;NLP Application", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/51a07766af2dbf3f53f933abc73c84392c2f0628.zip", "author": "Xuming Hu;Junzhe Chen;Xiaochuan Li;Yufei Guo;Lijie Wen;Philip S. Yu;Zhijiang Guo", "authorids": "~Xuming_Hu1;~Junzhe_Chen1;~Xiaochuan_Li3;~Yufei_Guo3;~Lijie_Wen1;~Philip_S._Yu1;~Zhijiang_Guo2", "gender": "M;M;M;M;M;M;M", "homepage": "https://xuminghu.github.io/;;https://xiaochuanli.com;https://none.com;https://www.thss.tsinghua.edu.cn/en/faculty/lijiewen.htm;https://cs.uic.edu/profiles/philip-yu/;https://cartus.github.io/", "dblp": "262/3664;351/9670;;;36/172-1;y/PhilipSYu;43/6147", "google_scholar": "dbBKbXoAAAAJ;KUXvSuIAAAAJ;97QHT-0AAAAJ;;https://scholar.google.com.tw/citations?user=f3C0jUIAAAAJ;D0lL1r0AAAAJ;8b-u3icAAAAJ", "orcid": "0000-0001-6075-4224;0009-0005-7573-0707;;;0000-0003-0358-3160;0000-0002-3491-5968;", "linkedin": ";;;;;;", "or_profile": "~Xuming_Hu1;~Junzhe_Chen1;~Xiaochuan_Li3;~Yufei_Guo3;~Lijie_Wen1;~Philip_S._Yu1;~Zhijiang_Guo2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;University of Illinois Chicago;University of Cambridge", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;uic.edu;cam.ac.uk", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;Associate Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nhu2024towards,\ntitle={Towards Understanding Factual Knowledge of Large Language Models},\nauthor={Xuming Hu and Junzhe Chen and Xiaochuan Li and Yufei Guo and Lijie Wen and Philip S. Yu and Zhijiang Guo},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9OevMUdods}\n}", "github": "", "project": "", "reviewers": "1LZe;9WHz;SmAT;XTL4", "pdf_size": 2082950, "rating": "5;6;8;8", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "371;51;37;187", "wc_strengths": "23;39;38;29", "wc_weaknesses": "41;130;306;203", "wc_questions": "4;55;2;40", "wc_review": "439;275;383;459", "wc_reply_reviewers": "0;0;0;47", "wc_reply_authors": "1756;959;1428;1359", "reply_reviewers": "0;0;0;1", "reply_authors": "4;3;4;3", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 161.5, 134.3977306356026 ], "wc_strengths_avg": [ 32.25, 6.609652033201143 ], "wc_weaknesses_avg": [ 170.0, 97.24453712162962 ], "wc_questions_avg": [ 25.25, 22.884219453588535 ], "wc_review_avg": [ 389.0, 71.47027354082255 ], "wc_reply_reviewers_avg": [ 11.75, 20.351596988934308 ], "wc_reply_authors_avg": [ 1375.5, 283.408274402848 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=827760621825055587&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=9OevMUdods", "pdf": "https://openreview.net/pdf?id=9OevMUdods", "email": "tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;uic.edu;cam.ac.uk", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;2", "aff_unique_norm": "Tsinghua University;University of Illinois at Chicago;University of Cambridge", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.uic.edu;https://www.cam.ac.uk", "aff_unique_abbr": "THU;UIC;Cambridge", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Chicago;Cambridge", "aff_country_unique_index": "0;0;0;0;0;1;2", "aff_country_unique": "China;United States;United Kingdom" }, { "id": "9QV7Q9gKl9", "title": "DIFUSCO-LNS: Diffusion-Guided Large Neighbourhood Search for Integer Linear Programming", "track": "main", "status": "Reject", "tldr": "", "abstract": "Integer Linear Programming (ILP) is a powerful and flexible framework for modeling and solving a variety of combinatorial optimization problems. This paper introduces a novel ILP solver, namely DIFUSCO-LNS, which combines the strengths of carefully engineered traditional solvers in symbolic reasoning and the generative power of a neural diffusion model in graph-based learning for the Large Neighborhood Search (LNS) approach. Our diffusion model treats the destroy policy in LNS as a generative problem in the discrete $\\{0, 1\\}$-vector space and is trained to imitate the high-quality Local Branching (LB) destroy heuristic through iterative denoising. Specifically, this addresses the unimodal limitation of other neural LNS solvers with its capability to capture the multimodal nature of optimal policies during variable selection. Our evaluations span four representative MIP problems: MIS, CA, SC, and MVC. Experimental results reveal that DIFUSCO-LNS substantially surpasses prior neural LNS solvers.", "keywords": "Large Neighborhood Search;Diffusion Models;Combinatorial Optimization Solvers", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "/attachment/ba5aca64f8e2a198eb0a3806f8de36908c03c8e6.pdf", "author": "Shengyu Feng;Zhiqing Sun;Yiming Yang", "authorids": "~Shengyu_Feng1;~Zhiqing_Sun1;~Yiming_Yang1", "gender": "M;M;F", "homepage": "https://shengyu-feng.github.io/;https://www.cs.cmu.edu/~zhiqings/;http://www.cs.cmu.edu/~yiming/", "dblp": "47/2121;211/7692;25/1666", "google_scholar": "ApUH8ZcAAAAJ;https://scholar.google.com/citations?hl=en;MlZq4XwAAAAJ", "orcid": ";;0000-0001-8322-607X", "linkedin": "shengyu-feng-331a6214b/;zhiqing-sun-5781b3100/;yiming-yang-24100924/", "or_profile": "~Shengyu_Feng1;~Zhiqing_Sun1;~Yiming_Yang1", "aff": "Apple;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "apple.com;cs.cmu.edu;cs.cmu.edu", "position": "Intern;PhD student;Full Professor", "bibtex": "@misc{\nfeng2024difuscolns,\ntitle={{DIFUSCO}-{LNS}: Diffusion-Guided Large Neighbourhood Search for Integer Linear Programming},\nauthor={Shengyu Feng and Zhiqing Sun and Yiming Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=9QV7Q9gKl9}\n}", "github": "", "project": "", "reviewers": "Xy75;Gizx;3tnY", "site": "https://openreview.net/forum?id=9QV7Q9gKl9", "pdf_size": 1713594, "rating": "3;5;5", "confidence": "4;3;4", "soundness": "2;3;3", "contribution": "2;3;2", "presentation": "3;2;3", "wc_summary": "67;113;79", "wc_strengths": "42;39;23", "wc_weaknesses": "205;298;114", "wc_questions": "265;20;168", "wc_review": "579;470;384", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "367;246;536", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 86.33333333333333, 19.48218559493661 ], "wc_strengths_avg": [ 34.666666666666664, 8.339997335464536 ], "wc_weaknesses_avg": [ 205.66666666666666, 75.11916459126053 ], "wc_questions_avg": [ 151.0, 100.74059095849431 ], "wc_review_avg": [ 477.6666666666667, 79.79278719839837 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 383.0, 118.93135274883014 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6209733794584114338&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Apple;Carnegie Mellon University", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.cmu.edu", "aff_unique_abbr": "Apple;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "9QVqYBvCD8", "title": "Asking Before Acting: Gather Information in Embodied Decision-Making with Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "With strong capabilities of reasoning and a broad understanding of the world, Large Language Models (LLMs) have demonstrated immense potential in building versatile embodied decision-making agents capable of executing a wide array of tasks.\nNevertheless, when deployed in unfamiliar environments, we show that LLM agents encounter challenges in efficiently gathering essential information, leading to suboptimal performance.\nConversely, human individuals often seek additional information from their peers prior to taking action, harnessing external knowledge to avoid unnecessary trial and error. Drawing inspiration from this behavior, we propose \\textit{Asking Before Acting} (ABA), a method that empowers the agent to proactively inquire with external sources for pertinent information using natural language during their interactions within the environment. \nIn this way, the agent is able to enhance its efficiency and performance by circumventing potentially laborious steps and combating the difficulties associated with exploration in unfamiliar environments and vagueness of the instructions.\nWe conduct extensive experiments involving a spectrum of environments including text-based household everyday tasks, robot arm manipulation tasks, and real world open domain image based embodied tasks. The experiments involve various models from Vicuna to GPT-4. The results demonstrate that, even with modest prompts modifications, ABA exhibits substantial advantages on both performance and efficiency over baseline LLM agents.\nFurther finetuning ABA with reformulated metadata (ABA-FT) faciliates learning the rationale for asking and allows for additional enhancements especially in tasks that baselines struggle to solve.", "keywords": "human in the loop;embodied decision making;language model", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/18d3d13ea2cab853919b64523eacb46fcc13d876.zip", "author": "Xiaoyu Chen;Shenao Zhang;Pushi Zhang;Li Zhao;Jianyu Chen", "authorids": "~Xiaoyu_Chen4;~Shenao_Zhang1;~Pushi_Zhang1;~Li_Zhao1;~Jianyu_Chen1", "gender": ";M;;F;M", "homepage": "https://github.com/Cospui;https://shenao-zhang.github.io/;https://zpschang.github.io/;https://www.microsoft.com/en-us/research/people/lizo/;http://people.iiis.tsinghua.edu.cn/~jychen/", "dblp": ";253/4543.html;288/4226;97/4708-7;", "google_scholar": ";8NamuusAAAAJ;_DLMSkIAAAAJ;b-LJkLQAAAAJ;", "orcid": ";;;;", "linkedin": ";shenao-zhang-055a53178/;;;", "or_profile": "~Xiaoyu_Chen4;~Shenao_Zhang1;~Pushi_Zhang1;~Li_Zhao1;~Jianyu_Chen1", "aff": "Tsinghua University;Georgia Institute of Technology;Microsoft;Microsoft;Tsinghua University", "aff_domain": "tsinghua.edu.cn;gatech.edu;microsoft.com;microsoft.com;tsinghua.edu.cn", "position": "Graduate student;MS student;Researcher;Researcher;Assistant Professor", "bibtex": "@misc{\nchen2024asking,\ntitle={Asking Before Acting: Gather Information in Embodied Decision-Making with Language Models},\nauthor={Xiaoyu Chen and Shenao Zhang and Pushi Zhang and Li Zhao and Jianyu Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=9QVqYBvCD8}\n}", "github": "", "project": "", "reviewers": "mCAD;P7Hr;umUa;23Y9", "site": "https://openreview.net/forum?id=9QVqYBvCD8", "pdf_size": 3480122, "rating": "3;3;6;6", "confidence": "4;3;3;5", "soundness": "2;3;3;4", "contribution": "2;1;2;3", "presentation": "2;3;2;4", "wc_summary": "51;166;112;60", "wc_strengths": "55;100;36;90", "wc_weaknesses": "231;239;201;194", "wc_questions": "22;3;9;2", "wc_review": "359;508;358;346", "wc_reply_reviewers": "305;143;21;0", "wc_reply_authors": "1784;1483;385;396", "reply_reviewers": "2;1;1;0", "reply_authors": "4;4;2;2", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 97.25, 46.01833873576924 ], "wc_strengths_avg": [ 70.25, 25.8879798362097 ], "wc_weaknesses_avg": [ 216.25, 19.122957407263137 ], "wc_questions_avg": [ 9.0, 7.968688725254614 ], "wc_review_avg": [ 392.75, 66.73595357826244 ], "wc_reply_reviewers_avg": [ 117.25, 121.37210346698289 ], "wc_reply_authors_avg": [ 1012.0, 630.5572931938857 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3015113445777637, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13115928641223429866&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "Tsinghua University;Georgia Institute of Technology;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.gatech.edu;https://www.microsoft.com", "aff_unique_abbr": "THU;Georgia Tech;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "China;United States" }, { "title": "On Double Descent in Reinforcement Learning with LSTD and Random Features", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19297", "id": "9RIbNmx984", "author_site": "David Brellmann, Elo\u00efse Berthier, David Filliat, Goran Frehse", "tldr": "", "abstract": "Temporal Difference (TD) algorithms are widely used in Deep Reinforcement Learning (RL). Their performance is heavily influenced by the size of the neural network. While in supervised learning, the regime of over-parameterization and its benefits are well understood, the situation in RL is much less clear. In this paper, we present a theoretical analysis of the influence of network size and $l_2$-regularization on performance. We identify the ratio between the number of parameters and the number of visited states as a crucial factor and define over-parameterization as the regime when it is larger than one. Furthermore, we observe a double descent phenomenon, i.e., a sudden drop in performance around the parameter/state ratio of one. Leveraging random features and the lazy training regime, we study the regularized Least-Square Temporal Difference (LSTD) algorithm in an asymptotic regime, as both the number of parameters and states go to infinity, maintaining a constant ratio. We derive deterministic limits of both the empirical and the true Mean-Squared Bellman Error (MSBE) that feature correction terms responsible for the double descent. Correction terms vanish when the $l_2$-regularization is increased or the number of unvisited states goes to zero. Numerical experiments with synthetic and small real-world environments closely match the theoretical predictions.", "keywords": "Regularized Least-Square Temporal Difference;double descent;over-parameterization;random features", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/91988691567d4725cfa0a2f23110312354b228ee.zip", "author": "David Brellmann;Elo\u00efse Berthier;David Filliat;Goran Frehse", "authorids": "~David_Brellmann1;~Elo\u00efse_Berthier1;~David_Filliat1;~Goran_Frehse1", "gender": "M;F;M;M", "homepage": ";https://eloiseberthier.github.io/;https://perso.ensta-paris.fr/~filliat/en/;https://sites.google.com/site/frehseg/", "dblp": ";267/0937;13/5289;95/3625", "google_scholar": "https://scholar.google.com/citations?hl=fr;-PQBEZMAAAAJ;https://scholar.google.fr/citations?user=Wzq_c20AAAAJ;IgZwd6MAAAAJ", "orcid": ";;0000-0002-5739-1618;0000-0002-5441-0481", "linkedin": "david-brellmann;;;goran-frehse-84b8311/", "or_profile": "~David_Brellmann1;~Elo\u00efse_Berthier1;~David_Filliat1;~Goran_Frehse1", "aff": "ENSTA Paris;ENSTA;ENSTA Paris;ENSTA Paris", "aff_domain": "ensta-paris.fr;ensta-paris.fr;ensta-paris.fr;ensta-paris.fr", "position": "PhD student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbrellmann2024on,\ntitle={On Double Descent in Reinforcement Learning with {LSTD} and Random Features},\nauthor={David Brellmann and Elo{\\\"\\i}se Berthier and David Filliat and Goran Frehse},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9RIbNmx984}\n}", "github": "", "project": "", "reviewers": "jHhA;xcHJ;a6mC;7TMf", "pdf_size": 1395169, "rating": "6;6;8;10", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "contribution": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "198;259;111;169", "wc_strengths": "76;63;90;175", "wc_weaknesses": "200;119;537;356", "wc_questions": "94;2;80;83", "wc_review": "568;443;818;783", "wc_reply_reviewers": "26;0;20;76", "wc_reply_authors": "286;361;728;374", "reply_reviewers": "1;0;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 7.5, 1.6583123951777 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 184.25, 53.326236506995315 ], "wc_strengths_avg": [ 101.0, 43.777848279695064 ], "wc_weaknesses_avg": [ 303.0, 159.7106759111613 ], "wc_questions_avg": [ 64.75, 36.601741761834234 ], "wc_review_avg": [ 653.0, 154.47491705775408 ], "wc_reply_reviewers_avg": [ 30.5, 27.977669667075563 ], "wc_reply_authors_avg": [ 437.25, 171.1919609677978 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6954276998867653729&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=9RIbNmx984", "pdf": "https://openreview.net/pdf?id=9RIbNmx984", "email": "ensta-paris.fr;ensta-paris.fr;ensta-paris.fr;ensta-paris.fr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "\u00c9cole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es", "aff_unique_dep": "", "aff_unique_url": "https://www.ensta.fr", "aff_unique_abbr": "ENSTA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "id": "9RLC0J2N9n", "title": "SynBench: Evaluating Pretrained Representations for Image Classification using Synthetic Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Fine-tuning large models pretrained at scale on broad data for solving downstream tasks has made considerable success in recent years. There seems to be indeed an ongoing paradigm shift in deep learning from task-centric model design to task-agnostic representation learning and task-specific fine-tuning. Specifically, the representations of pretrained models are used as a foundation for different downstream tasks. This paper proposes a new task-agnostic framework, \\textit{SynBench}, to measure the quality of pretrained representations for image classification using synthetic data. To address the challenge of task-agnostic data-free evaluation, we design synthetic binary classification proxy tasks with class-conditional Gaussian mixtures. This way we probe and compare the robustness-accuracy performance on pretrained representations and input synthetic data. SynBench offers a holistic quantitative evaluation, informs the model designers of the intrinsic performance, and spares efforts on task-specific finetuning with real-life data. Evaluated with various pretrained vision models for different downstream image classification tasks, the experimental results show that our SynBench score matches well the actual linear probing performance of the pretrained model when fine-tuned on downstream tasks using real-life data. Finally, SynBench can also be used in robust linear probing to mitigate the robustness-accuracy tradeoff in downstream tasks.", "keywords": "Vision pretrained model; synthetic data; evaluation", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Ching-Yun Ko;Pin-Yu Chen;Payel Das;Jeet Mohapatra;Luca Daniel", "authorids": "~Ching-Yun_Ko1;~Pin-Yu_Chen1;~Payel_Das1;~Jeet_Mohapatra1;~Luca_Daniel1", "gender": "F;M;F;M;", "homepage": ";http://www.pinyuchen.com;;;https://www.mit.edu/~dluca/", "dblp": "206/6472;39/8969;56/7926;210/2304;35/5202", "google_scholar": ";jxwlCUUAAAAJ;;;", "orcid": ";0000-0003-1039-8369;;;0000-0002-5880-3151", "linkedin": ";pin-yu-chen-940062a2;;;", "or_profile": "~Ching-Yun_Ko1;~Pin-Yu_Chen1;~Payel_Das1;~Jeet_Mohapatra1;~Luca_Daniel1", "aff": "Massachusetts Institute of Technology;International Business Machines;IBM, International Business Machines;;", "aff_domain": "mit.edu;ibm.com;us.ibm.com;;", "position": "PhD student;Principal Researcher;Principal Researcher;;", "bibtex": "@misc{\nko2024synbench,\ntitle={SynBench: Evaluating Pretrained Representations for Image Classification using Synthetic Data},\nauthor={Ching-Yun Ko and Pin-Yu Chen and Payel Das and Jeet Mohapatra and Luca Daniel},\nyear={2024},\nurl={https://openreview.net/forum?id=9RLC0J2N9n}\n}", "github": "", "project": "", "reviewers": "af4U;4o5T;97TE;S4rE", "site": "https://openreview.net/forum?id=9RLC0J2N9n", "pdf_size": 2774636, "rating": "3;3;6;6", "confidence": "4;3;2;3", "soundness": "1;2;4;3", "contribution": "2;2;4;3", "presentation": "3;3;3;3", "wc_summary": "126;100;103;74", "wc_strengths": "66;58;91;60", "wc_weaknesses": "392;173;39;129", "wc_questions": "190;50;77;2", "wc_review": "774;381;310;265", "wc_reply_reviewers": "519;242;35;25", "wc_reply_authors": "2547;2062;644;723", "reply_reviewers": "1;1;1;1", "reply_authors": "6;5;2;2", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.75, 18.430613120566555 ], "wc_strengths_avg": [ 68.75, 13.179055353097201 ], "wc_weaknesses_avg": [ 183.25, 129.83908309904226 ], "wc_questions_avg": [ 79.75, 69.08825877093734 ], "wc_review_avg": [ 432.5, 201.45533003621424 ], "wc_reply_reviewers_avg": [ 205.25, 200.78891279151844 ], "wc_reply_authors_avg": [ 1494.0, 828.9110326205099 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xTaPW4fqbKkJ:scholar.google.com/&scioq=SynBench:+Evaluating+Pretrained+Representations+for+Image+Classification+using+Synthetic+Data&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Massachusetts Institute of Technology;International Business Machines Corporation;International Business Machines", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.ibm.com;https://www.ibm.com", "aff_unique_abbr": "MIT;IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Leave-one-out Distinguishability in Machine Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19296", "id": "9RNfX0ah0K", "author_site": "Jiayuan Ye, Anastasia Borovykh, Soufiane Hayou, Reza Shokri", "tldr": "", "abstract": "We introduce an analytical framework to quantify the changes in a machine learning algorithm's output distribution following the inclusion of a few data points in its training set, a notion we define as leave-one-out distinguishability (LOOD). This is key to measuring data **memorization** and information **leakage** as well as the **influence** of training data points in machine learning. We illustrate how our method broadens and refines existing empirical measures of memorization and privacy risks associated with training data. We use Gaussian processes to model the randomness of machine learning algorithms, and validate LOOD with extensive empirical analysis of leakage using membership inference attacks. Our analytical framework enables us to investigate the causes of leakage and where the leakage is high. For example, we analyze the influence of activation functions, on data memorization. Additionally, our method allows us to identify queries that disclose the most information about the training data in the leave-one-out setting. We illustrate how optimal queries can be used for accurate **reconstruction** of training data.", "keywords": "memorization;influence estimation;information leakage;neural network Gaussian process", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Jiayuan Ye;Anastasia Borovykh;Soufiane Hayou;Reza Shokri", "authorids": "~Jiayuan_Ye1;~Anastasia_Borovykh1;~Soufiane_Hayou1;~Reza_Shokri1", "gender": ";;M;", "homepage": ";;https://www.soufianehayou.com/;", "dblp": ";;220/5617;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jiayuan_Ye1;~Anastasia_Borovykh1;~Soufiane_Hayou1;~Reza_Shokri1", "aff": ";;National University of Singapore;", "aff_domain": ";;nus.edu.sg;", "position": ";;Assistant Professor;", "bibtex": "@inproceedings{\nye2024leaveoneout,\ntitle={Leave-one-out Distinguishability in Machine Learning},\nauthor={Jiayuan Ye and Anastasia Borovykh and Soufiane Hayou and Reza Shokri},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9RNfX0ah0K}\n}", "github": "", "project": "", "reviewers": "s1Kq;Azxn;mmKM;rJTi", "pdf_size": 5745906, "rating": "5;6;6;8", "confidence": "4;3;3;2", "soundness": "3;3;3;3", "contribution": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "82;294;107;158", "wc_strengths": "80;88;25;59", "wc_weaknesses": "242;128;122;16", "wc_questions": "52;30;24;43", "wc_review": "456;540;278;276", "wc_reply_reviewers": "0;0;0;4", "wc_reply_authors": "1113;947;0;368", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;0;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 160.25, 81.93404359605353 ], "wc_strengths_avg": [ 63.0, 24.361855430159665 ], "wc_weaknesses_avg": [ 127.0, 79.95623803056269 ], "wc_questions_avg": [ 37.25, 10.940178243520533 ], "wc_review_avg": [ 387.5, 114.42355526726129 ], "wc_reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "wc_reply_authors_avg": [ 607.0, 446.43196570138207 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.82915619758885 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9733285267845754, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11532538955694417653&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=9RNfX0ah0K", "pdf": "https://openreview.net/pdf?id=9RNfX0ah0K", "email": ";;nus.edu.sg;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "title": "NECO: NEural Collapse Based Out-of-distribution detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19295", "id": "9ROuKblmi7", "author_site": "Mou\u00efn Ben Ammar, Nacim Belkhir, Sebastian Popescu, Antoine Manzanera, Gianni Franchi", "tldr": "", "abstract": "Detecting out-of-distribution (OOD) data is a critical challenge in machine learning due to model overconfidence, often without awareness of their epistemological limits. We hypothesize that \"neural collapse\", a phenomenon affecting in-distribution data for models trained beyond loss convergence, also influences OOD data. To benefit from this interplay, we introduce NECO, a novel post-hoc method for OOD detection, which leverages the geometric properties of \u201cneural collapse\u201d and of principal component spaces to identify OOD data. Our extensive experiments demonstrate that NECO achieves state-of-the-art results on both small and large-scale OOD detection tasks while exhibiting strong generalization capabilities across different network architectures. Furthermore, we provide a theoretical explanation for the effectiveness of our method in OOD detection. We plan to release the code after the anonymity period.", "keywords": "anomaly detection;OOD", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/6ac26ebf27b2712ee42924778a40e47549861536.zip", "author": "Mou\u00efn Ben Ammar;Nacim Belkhir;Sebastian Popescu;Antoine Manzanera;Gianni Franchi", "authorids": "~Mou\u00efn_Ben_Ammar1;~Nacim_Belkhir1;~Sebastian_Popescu1;~Antoine_Manzanera1;~Gianni_Franchi1", "gender": "M;M;M;M;M", "homepage": ";https://www.linkedin.com/in/belkhirnacim;;https://perso.ensta-paris.fr/~manzaner/;https://giannifranchi.github.io/", "dblp": "359/1584;;;73/2951;155/3061", "google_scholar": "https://scholar.google.com/citations?hl=en;As-piMYAAAAJ;FKDRATgAAAAJ;YBJbAhoAAAAJ;ZCW6-psAAAAJ", "orcid": ";;;0000-0001-5718-411X;0000-0002-2184-1381", "linkedin": "mouinbenammar/;;;;gianni-franchi-94435754/", "or_profile": "~Mou\u00efn_Ben_Ammar1;~Nacim_Belkhir1;~Sebastian_Popescu1;~Antoine_Manzanera1;~Gianni_Franchi1", "aff": "ENSTA Paris;Safran;Find&Order;Ecole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es;ENSTA Paris", "aff_domain": "ensta-paristech.fr;safrangroup.com;findnorder.com;ensta-paris.fr;ensta-paristech.fr", "position": "PhD student;Researcher;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nammar2024neco,\ntitle={{NECO}: {NE}ural Collapse Based Out-of-distribution detection},\nauthor={Mou{\\\"\\i}n Ben Ammar and Nacim Belkhir and Sebastian Popescu and Antoine Manzanera and Gianni Franchi},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9ROuKblmi7}\n}", "github": "", "project": "", "reviewers": "1ZaR;Qb8y;Ek3F;r6r7", "pdf_size": 6740316, "rating": "5;6;6;6", "confidence": "3;3;4;4", "soundness": "2;2;3;2", "contribution": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "73;75;49;57", "wc_strengths": "72;54;49;38", "wc_weaknesses": "63;121;84;257", "wc_questions": "237;124;11;11", "wc_review": "445;374;193;363", "wc_reply_reviewers": "0;127;0;0", "wc_reply_authors": "540;1372;366;526", "reply_reviewers": "0;2;0;0", "reply_authors": "2;4;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 63.5, 10.897247358851684 ], "wc_strengths_avg": [ 53.25, 12.275483697190918 ], "wc_weaknesses_avg": [ 131.25, 75.51283003569658 ], "wc_questions_avg": [ 95.75, 93.69465032754005 ], "wc_review_avg": [ 343.75, 92.5509994543549 ], "wc_reply_reviewers_avg": [ 31.75, 54.99261314031185 ], "wc_reply_authors_avg": [ 701.0, 393.38657831705444 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4653753281710190123&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 10, "openreview": "https://openreview.net/forum?id=9ROuKblmi7", "pdf": "https://openreview.net/pdf?id=9ROuKblmi7", "email": "ensta-paristech.fr;safrangroup.com;findnorder.com;ensta-paris.fr;ensta-paristech.fr", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "\u00c9cole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es;Safran;Find&Order;Ecole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ensta.fr;https://www.safran-group.com;;https://www.ensae.fr", "aff_unique_abbr": "ENSTA;;;ENSTA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France;" }, { "id": "9St5HsXMOr", "title": "Long-range Meta-path Search through Progressive Sampling on Large-scale Heterogeneous Information Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Utilizing long-range dependency, though extensively studied in homogeneous graphs, is rarely studied in large-scale heterogeneous information networks (HINs), whose main challenge is the high costs and the difficulty in utilizing effective information. To this end, we investigate the importance of different meta-paths and propose an automatic framework for utilizing long-range dependency in HINs, called Long-range Meta-path Search through Progressive Sampling (LMSPS). Specifically, to discover meta-paths for various datasets or tasks without prior, we develop a search space with all target-node-related meta-paths. With a progressive sampling algorithm, we dynamically shrink the search space with hop-independent time complexity, leading to a compact search space driven by the current HIN and task. Utilizing a sampling evaluation strategy as the guidance, we conduct a specialized and expressive meta-path selection. Extensive experiments on eight heterogeneous datasets demonstrate that LMSPS discovers effective long-range meta-paths and outperforms state-of-the-art models. Besides, it ranks top-1 on the leaderboards of ogbn-mag in Open Graph Benchmark.", "keywords": "Neural architecture search;heterogeneous graph neural networks;long-range dependency;meta-path search", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/3b1003a69e73a2a2719eaab2b87fa49fc9612f95.zip", "author": "Chao Li;Zijie Guo;Qiuting He;Hao Xu;Kun He", "authorids": "~Chao_Li14;~Zijie_Guo1;~Qiuting_He1;~Hao_Xu11;~Kun_He1", "gender": "M;;M;M;F", "homepage": "https://github.com/lichaoaaron;;;http://www.masterx.top/;http://faculty.hust.edu.cn/hekun/zh_CN/more/1411001/jsjjgd/index.htm", "dblp": ";;https://dblp.uni-trier.de/pid/322/9335.html;43/6008;59/1028-1", "google_scholar": "Wik8bkIAAAAJ;;;8zcB6s4AAAAJ;YTQnGJsAAAAJ", "orcid": "0000-0001-9066-1440;;;0000-0003-4207-6161;0000-0001-7627-4604", "linkedin": ";;;;", "or_profile": "~Chao_Li14;~Zijie_Guo1;~Qiuting_He1;~Hao_Xu11;~Kun_He1", "aff": "Huazhong University of Science and Technology;;;Huazhong University of Science and Technology;Huazhong University of Sceince and Technology", "aff_domain": "hust.edu.cn;;;hust.edu.cn;hust.edu.cn", "position": "PhD student;;;MS student;Full Professor", "bibtex": "@misc{\nli2024longrange,\ntitle={Long-range Meta-path Search through Progressive Sampling on Large-scale Heterogeneous Information Networks},\nauthor={Chao Li and Zijie Guo and Qiuting He and Hao Xu and Kun He},\nyear={2024},\nurl={https://openreview.net/forum?id=9St5HsXMOr}\n}", "github": "", "project": "", "reviewers": "f416;vDix;dvsv;kpcU;zWVu", "site": "https://openreview.net/forum?id=9St5HsXMOr", "pdf_size": 624254, "rating": "5;5;5;5;8", "confidence": "4;4;3;4;3", "soundness": "3;2;3;3;4", "contribution": "2;2;2;3;3", "presentation": "3;3;2;2;3", "wc_summary": "93;73;119;124;84", "wc_strengths": "33;48;38;59;33", "wc_weaknesses": "138;555;150;175;49", "wc_questions": "21;2;51;3;51", "wc_review": "285;678;358;361;217", "wc_reply_reviewers": "12;0;0;28;0", "wc_reply_authors": "955;1606;918;749;590", "reply_reviewers": "1;0;0;1;0", "reply_authors": "2;3;2;1;1", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 98.6, 19.80504986108341 ], "wc_strengths_avg": [ 42.2, 10.02796090937734 ], "wc_weaknesses_avg": [ 213.4, 176.00977245596337 ], "wc_questions_avg": [ 25.6, 21.813757127097567 ], "wc_review_avg": [ 379.8, 158.26736871509556 ], "wc_reply_reviewers_avg": [ 8.0, 11.027239001672177 ], "wc_reply_authors_avg": [ 963.6, 346.5432729111907 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4lJBZmkZIaYJ:scholar.google.com/&scioq=Long-range+Meta-path+Search+through+Progressive+Sampling+on+Large-scale+Heterogeneous+Information+Networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "9SwObx9Jdn", "title": "Generation of Geodesics with Actor-Critic Reinforcement Learning to Predict Midpoints", "track": "main", "status": "Reject", "tldr": "", "abstract": "Various tasks in the real world, such as path planning, can be reduced to the generation of geodesics on manifolds. For reinforcement learning to generate geodesics sequentially, we need to define rewards appropriately. To generate geodesics without any adjustment of rewards, we propose to use a modified version of sub-goal trees, called midpoint trees. While sub-goal trees consist of arbitrary intermediate points, midpoint trees consist of midpoints. In addition, we propose an actor-critic method to learn to predict midpoints and theoretically prove that, under mild assumptions, when the learning converges at the limit of infinite tree depth, the resulting policy generates exact midpoints.\nWe show experimentally that our proposed method outperforms existing methods in a certain path planning task.", "keywords": "reinforcement learning;actor-critic method;path planning;path optimization;subgoal;midpoint;geodesic;Finsler geometry", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/88737306bff9b1bedc84e99ad1af389499b699d6.zip", "author": "Kazumi Kasaura", "authorids": "~Kazumi_Kasaura1", "gender": "", "homepage": "", "dblp": "334/3342.html", "google_scholar": "rONY2jEAAAAJ", "orcid": "0000-0002-3219-9961", "linkedin": "", "or_profile": "~Kazumi_Kasaura1", "aff": "OMRON SINIC X Corporation", "aff_domain": "sinicx.com", "position": "Researcher", "bibtex": "@misc{\nkasaura2024generation,\ntitle={Generation of Geodesics with Actor-Critic Reinforcement Learning to Predict Midpoints},\nauthor={Kazumi Kasaura},\nyear={2024},\nurl={https://openreview.net/forum?id=9SwObx9Jdn}\n}", "github": "", "project": "", "reviewers": "8vLp;4ppY;etxs;ZvgN", "site": "https://openreview.net/forum?id=9SwObx9Jdn", "pdf_size": 757217, "rating": "3;5;5;6", "confidence": "3;1;2;3", "soundness": "3;3;3;3", "contribution": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "55;93;82;80", "wc_strengths": "49;18;60;37", "wc_weaknesses": "125;137;351;63", "wc_questions": "106;45;70;66", "wc_review": "335;293;563;246", "wc_reply_reviewers": "55;0;0;0", "wc_reply_authors": "597;257;472;216", "reply_reviewers": "1;0;0;0", "reply_authors": "3;1;2;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.5, 13.901438774457844 ], "wc_strengths_avg": [ 41.0, 15.572411502397436 ], "wc_weaknesses_avg": [ 169.0, 108.76580344942982 ], "wc_questions_avg": [ 71.75, 21.935986415021322 ], "wc_review_avg": [ 359.25, 121.77515140618796 ], "wc_reply_reviewers_avg": [ 13.75, 23.81569860407206 ], "wc_reply_authors_avg": [ 385.5, 156.09051861019617 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.20751433915982243, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-hXDlkd16w4J:scholar.google.com/&scioq=Generation+of+Geodesics+with+Actor-Critic+Reinforcement+Learning+to+Predict+Midpoints&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "OMRON Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.omron.com", "aff_unique_abbr": "OMRON", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "id": "9TG42oozQP", "title": "Causal Effect Estimation with Mixed Latent Confounders and Post-treatment Variables", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In recent years, causal inference from observational data has attracted considerable attention among researchers. One main obstacle for inferring causal effects from observational data is the handling of confounders. As direct measurement of confounders may not always be feasible, recent methods seek to adjust the confounding effects based on proxy variables, which are high-dimensional features researchers postulated to be determined by the latent confounders. However, observed features may scramble both latent confounders and post-treatment variables simultaneously in observational study, where existing methods risk distorting the estimation by unintentionally controlling variables affected by the treatment. In this paper, we systematically investigate the latent post-treatment bias in causal inference. We first derive the bias of existing methods when the selected proxies scramble both latent confounders and post-treatment variables, which we demonstrate can be arbitrarily bad. We then propose a novel Confounder-identifiable VAE (CiVAE) to address the bias, built upon the assumption that the prior of latent variables belongs to the general exponential family with at least one invertible sufficient statistic in the factorized part. Based on this, we show that latent confounders and latent post-treatment variables can be properly distinguished. Furthermore, we show that latent confounders can be identified up to simple bijective transformations. Finally, we prove that the true causal effects can be unbiasedly estimated with transformed confounder proxies. Experiments on both simulated and real-world datasets demonstrate that CiVAE is signi\ufb01cantly more robust than existing methods.", "keywords": "Causal Inference;Latent Post-treatment Bias;Proxy of Confounders;Identifiable VAE", "primary_area": "causal reasoning", "supplementary_material": "", "author": "Yaochen Zhu;Jing Ma;Liang Wu;Qi Guo;Liangjie Hong;Jundong Li", "authorids": "~Yaochen_Zhu1;~Jing_Ma2;~Liang_Wu3;~Qi_Guo9;~Liangjie_Hong1;~Jundong_Li2", "gender": "M;F;M;M;M;M", "homepage": "http://www.ychzhu.com/;https://jma712.github.io/;http://liangwu.me/;https://qiguo.xyz/;https://www.hongliangjie.com/;https://jundongli.github.io/", "dblp": "251/3533;96/6129-2;https://dblp.org/pers/hd/w/Wu_0006:Liang;67/398-3;00/7186;144/7997.html", "google_scholar": "mNKYtHEAAAAJ;VLElvX8AAAAJ;r2BP6FsAAAAJ;IatRsPEAAAAJ;4uaSNpYAAAAJ;uY6ek7sAAAAJ", "orcid": ";;;0009-0009-0078-1533;;", "linkedin": ";;wuliang1/;qi-guo/;liangjiehong/;", "or_profile": "~Yaochen_Zhu1;~Jing_Ma2;~Liang_Wu3;~Qi_Guo9;~Liangjie_Hong1;~Jundong_Li2", "aff": "LinkedIn;Case Western Reserve University;LinkedIn;LinkedIn;LinkedIn;University of Virginia", "aff_domain": "linkedin.com;case.edu;linkedin.com;linkedin.com;linkedin.com;virginia.edu", "position": "Intern;Assistant Professor;Applied Scientist;Researcher;Researcher;Assistant Professor", "bibtex": "@misc{\nzhu2024causal,\ntitle={Causal Effect Estimation with Mixed Latent Confounders and Post-treatment Variables},\nauthor={Yaochen Zhu and Jing Ma and Liang Wu and Qi Guo and Liangjie Hong and Jundong Li},\nyear={2024},\nurl={https://openreview.net/forum?id=9TG42oozQP}\n}", "github": "", "project": "", "reviewers": "cihL;Eaxa;gqDy;wUiz", "site": "https://openreview.net/forum?id=9TG42oozQP", "pdf_size": 458832, "rating": "3;3;5;5", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "contribution": "2;2;2;2", "presentation": "2;2;3;2", "wc_summary": "46;27;100;38", "wc_strengths": "29;25;25;29", "wc_weaknesses": "697;211;188;229", "wc_questions": "2;2;22;2", "wc_review": "774;265;335;298", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 52.75, 28.101378969723175 ], "wc_strengths_avg": [ 27.0, 2.0 ], "wc_weaknesses_avg": [ 331.25, 211.66527230511858 ], "wc_questions_avg": [ 7.0, 8.660254037844387 ], "wc_review_avg": [ 418.0, 207.02294558816422 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-W6LS7n_ENoJ:scholar.google.com/&scioq=Causal+Effect+Estimation+with+Mixed+Latent+Confounders+and+Post-treatment+Variables&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "LinkedIn Corporation;Case Western Reserve University;University of Virginia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.linkedin.com;https://www.case.edu;https://www.virginia.edu", "aff_unique_abbr": "LinkedIn;CWRU;UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9TJDsOEaBC", "title": "Bayesian Vector Optimization with Gaussian Processes", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning problems in which multiple conflicting objectives must be considered simultaneously often arise in various fields, including engineering, drug design, and environmental management. Traditional methods of multi-objective optimization, such as scalarization and identification of the Pareto set under componentwise order, have limitations in incorporating objective preferences and exploring the solution space accordingly. While vector optimization offers improved flexibility and adaptability via specifying partial orders based on ordering cones, current techniques designed for sequential experiments suffer from high sample complexity, which makes them unfit for large-scale learning problems. To address this issue, we propose VOGP, an ($\\epsilon,\\delta$)-PAC adaptive elimination algorithm that performs vector optimization using Gaussian processes. VOGP allows users to convey objective preferences through ordering cones while performing efficient sampling by exploiting the smoothness of the objective function, resulting in a more effective optimization process that requires fewer evaluations. We first establish provable theoretical guarantees for VOGP, and then derive information gain based and kernel specific sample complexity bounds. VOGP demonstrates strong empirical results on both real-world and synthetic datasets, outperforming previous work in sequential vector optimization and its special case multi-objective optimization. This work highlights the potential of VOGP as a powerful preference-driven method for addressing complex sequential vector optimization problems.", "keywords": "Vector Optimization;Bayesian Optimization;Gaussian Processes;Ordering Cones", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/9ea67f537d09082f69a1d889d546c9ec75f82025.zip", "author": "\u0130lter Onat Korkmaz;Cagin Ararat;Cem Tekin", "authorids": "~\u0130lter_Onat_Korkmaz1;~Cagin_Ararat1;~Cem_Tekin2", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/cararat;http://kilyos.ee.bilkent.edu.tr/~cemtekin/", "dblp": ";268/5341;98/7659.html", "google_scholar": "qlway8sAAAAJ;;https://scholar.google.com/citations?hl=tr", "orcid": ";0000-0002-6985-7665;", "linkedin": "ilter-onat-korkmaz-352998180/;;", "or_profile": "~\u0130lter_Onat_Korkmaz1;~Cagin_Ararat1;~Cem_Tekin2", "aff": "Bilkent University;Bilkent University;Bilkent University", "aff_domain": "bilkent.edu.tr;bilkent.edu.tr;bilkent.edu.tr", "position": "MS student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nkorkmaz2024bayesian,\ntitle={Bayesian Vector Optimization with Gaussian Processes},\nauthor={{\\.I}lter Onat Korkmaz and Cagin Ararat and Cem Tekin},\nyear={2024},\nurl={https://openreview.net/forum?id=9TJDsOEaBC}\n}", "github": "", "project": "", "reviewers": "bF5L;ahkf;XrYo;vz76", "site": "https://openreview.net/forum?id=9TJDsOEaBC", "pdf_size": 699313, "rating": "5;5;5;6", "confidence": "5;2;3;2", "soundness": "2;2;2;3", "contribution": "2;3;2;3", "presentation": "2;2;3;3", "wc_summary": "75;116;115;64", "wc_strengths": "24;99;59;8", "wc_weaknesses": "59;157;308;6", "wc_questions": "570;135;152;1", "wc_review": "728;507;634;79", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "789;647;635;96", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 92.5, 23.32916629457641 ], "wc_strengths_avg": [ 47.5, 34.98928407384181 ], "wc_weaknesses_avg": [ 132.5, 114.89669272872914 ], "wc_questions_avg": [ 214.5, 213.41801704635904 ], "wc_review_avg": [ 487.0, 248.2710212650683 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 541.75, 264.38549033560827 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17018310508350217774&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Bilkent University", "aff_unique_dep": "", "aff_unique_url": "https://www.bilkent.edu.tr", "aff_unique_abbr": "Bilkent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "T\u00fcrkiye" }, { "id": "9TSv6ZVhvN", "title": "Improving Accelerated Federated Learning with Compression and Importance Sampling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated Learning is a collaborative training framework that leverages heterogeneous data distributed across a vast number of clients. Since it is practically infeasible to request and process all clients during the aggregation step, partial participation must be supported. In this setting, the communication between the server and clients poses a major bottleneck. To reduce communication loads, there are two main approaches: compression and local steps. Recent work by Mishchenko et al. [2022] introduced the new ProxSkip method, which achieves an accelerated rate using the local steps technique. Follow-up works successfully combined local steps acceleration with partial participation [Grudzie\u0144 et al., 2023, Condat et al. 2023] and gradient compression [Condat et al. [2022]. In this paper, we finally present a complete method for Federated Learning that incorporates all necessary ingredients: Local Training, Compression, and Partial Participation. We obtain state-of-the-art convergence guarantees in the considered setting. Moreover, we analyze the general sampling framework for partial participation and derive an importance sampling scheme, which leads to even better performance. We experimentally demonstrate the advantages of the proposed method in practice.", "keywords": "Machine Learning;Optimization;Federated Learning", "primary_area": "optimization", "supplementary_material": "", "author": "Micha\u0142 Grudzie\u0144;Grigory Malinovsky;Peter Richt\u00e1rik", "authorids": "~Micha\u0142_Grudzie\u01441;~Grigory_Malinovsky1;~Peter_Richt\u00e1rik1", "gender": "M;M;M", "homepage": ";https://grigory-malinovsky.github.io;https://richtarik.org", "dblp": ";262/3277.html;62/8001", "google_scholar": "vN2ALVYAAAAJ;4w2W9KQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-4380-5848", "linkedin": "micha%C5%82-grudzie%C5%84-2141a2198/;;richtarik/", "or_profile": "~Micha\u0142_Grudzie\u01441;~Grigory_Malinovsky1;~Peter_Richtarik1", "aff": "University of Oxford;Samsung;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "ox.ac.uk;samsung.com;kaust.edu.sa", "position": "Undergrad student;Intern;Full Professor", "bibtex": "@misc{\ngrudzie{\\'n}2024improving,\ntitle={Improving Accelerated Federated Learning with Compression and Importance Sampling},\nauthor={Micha{\\l} Grudzie{\\'n} and Grigory Malinovsky and Peter Richt{\\'a}rik},\nyear={2024},\nurl={https://openreview.net/forum?id=9TSv6ZVhvN}\n}", "github": "", "project": "", "reviewers": "AQbc;ii5V;SCZT", "site": "https://openreview.net/forum?id=9TSv6ZVhvN", "pdf_size": 590601, "rating": "3;3;8", "confidence": "3;4;2", "soundness": "2;2;4", "contribution": "2;2;2", "presentation": "2;3;4", "wc_summary": "36;32;26", "wc_strengths": "26;43;30", "wc_weaknesses": "62;284;53", "wc_questions": "116;2;2", "wc_review": "240;361;111", "wc_reply_reviewers": "381;0;0", "wc_reply_authors": "3240;1105;537", "reply_reviewers": "2;0;0", "reply_authors": "6;2;1", "rating_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 31.333333333333332, 4.109609335312651 ], "wc_strengths_avg": [ 33.0, 7.2571803523590805 ], "wc_weaknesses_avg": [ 133.0, 106.8363234110946 ], "wc_questions_avg": [ 40.0, 53.74011537017761 ], "wc_review_avg": [ 237.33333333333334, 102.07948972355916 ], "wc_reply_reviewers_avg": [ 127.0, 179.60512242138307 ], "wc_reply_authors_avg": [ 1627.3333333333333, 1163.6655686044662 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 2.160246899469287 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12607570230306687324&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Oxford;Samsung;King Abdullah University of Science and Technology", "aff_unique_dep": ";Samsung;", "aff_unique_url": "https://www.ox.ac.uk;https://www.samsung.com;https://www.kaust.edu.sa", "aff_unique_abbr": "Oxford;Samsung;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United Kingdom;South Korea;Saudi Arabia" }, { "id": "9UGAUQjibp", "title": "Quantized Local Independence Discovery for Fine-Grained Causal Dynamics Learning in Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Incorporating causal relationships between the variables into dynamics learning has emerged as a promising approach to enhance robustness and generalization in reinforcement learning (RL). Recent studies have focused on examining conditional independences and leveraging only relevant state and action variables for prediction. However, such approaches tend to overlook local independence relationships that hold under certain circumstances referred as event. In this work, we present a theoretically-grounded and practical approach to dynamics learning which discovers such meaningful events and infers fine-grained causal relationships. The key idea is to learn a discrete latent variable that represents the pair of event and causal relationships specific to the event via vector quantization. As a result, our method provides a fine-grained understanding of the dynamics by capturing event-specific causal relationships, leading to improved robustness and generalization in RL. Experimental results demonstrate that our method is more robust to unseen states and generalizes well to downstream tasks compared to prior approaches. In addition, we find that our method successfully identifies meaningful events and recovers event-specific causal relationships.", "keywords": "Reinforcement learning;Causal reasoning;Local independence", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Inwoo Hwang;Yunhyeok Kwak;Suhyung Choi;Byoung-Tak Zhang;Sanghack Lee", "authorids": "~Inwoo_Hwang1;~Yunhyeok_Kwak1;~Suhyung_Choi2;~Byoung-Tak_Zhang1;~Sanghack_Lee1", "gender": ";M;M;M;M", "homepage": "https://iwhwang.github.io;https://yun-kwak.github.io;https://www.github.com/conscious-choi;https://bi.snu.ac.kr/~btzhang/;http://www.sanghacklee.me", "dblp": "317/0732;332/4729;376/2355;09/5682;20/1133", "google_scholar": "MuG6Le8AAAAJ;rhyhnRYAAAAJ;jPcpT2MAAAAJ;sYTUOu8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0001-5491-3492;;;0000-0001-7137-6126", "linkedin": ";;;;sanghack-lee-65b52a28/", "or_profile": "~Inwoo_Hwang1;~Yunhyeok_Kwak1;~Suhyung_Choi2;~Byoung-Tak_Zhang1;~Sanghack_Lee1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;MS student;PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nhwang2024quantized,\ntitle={Quantized Local Independence Discovery for Fine-Grained Causal Dynamics Learning in Reinforcement Learning},\nauthor={Inwoo Hwang and Yunhyeok Kwak and Suhyung Choi and Byoung-Tak Zhang and Sanghack Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=9UGAUQjibp}\n}", "github": "", "project": "", "reviewers": "RHPG;Ufsn;eMfR;JfxM;s5uf", "site": "https://openreview.net/forum?id=9UGAUQjibp", "pdf_size": 12317800, "rating": "5;6;6;6;6", "confidence": "3;3;1;3;3", "soundness": "1;3;2;2;3", "contribution": "2;3;3;3;3", "presentation": "1;3;3;3;3", "wc_summary": "144;44;46;52;55", "wc_strengths": "120;71;48;26;53", "wc_weaknesses": "516;21;18;39;180", "wc_questions": "643;56;265;40;210", "wc_review": "1423;192;377;157;498", "wc_reply_reviewers": "550;0;0;18;16", "wc_reply_authors": "1655;302;938;313;742", "reply_reviewers": "3;0;0;1;1", "reply_authors": "4;1;3;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "contribution_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 68.2, 38.107217164206574 ], "wc_strengths_avg": [ 63.6, 31.639216172339037 ], "wc_weaknesses_avg": [ 154.8, 190.32960883688065 ], "wc_questions_avg": [ 242.8, 218.059991745391 ], "wc_review_avg": [ 529.4, 463.73552807607916 ], "wc_reply_reviewers_avg": [ 116.8, 216.7343073904083 ], "wc_reply_authors_avg": [ 790.0, 497.61551422760124 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.0, 1.2649110640673518 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.25, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1663183835689284920&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "De novo Protein Design Using Geometric Vector Field Networks", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19294", "id": "9UIGyJJpay", "author_site": "weian mao, Muzhi Zhu, Zheng Sun, Shuaike Shen, Lin Yuanbo Wu, Hao Chen, Chunhua Shen", "tldr": "", "abstract": "Advances like protein diffusion have marked revolutionary progress in $\\textit{de novo}$ protein design, a central topic in life science. These methods typically depend on protein structure encoders to model residue backbone frames, where atoms do not exist. Most prior encoders rely on atom-wise features, such as angles and distances between atoms, which are not available in this context. Only a few basic encoders, like IPA, have been proposed for this scenario, exposing the frame modeling as a bottleneck. In this work, we introduce the Vector Field Network (VFN), that enables network layers to perform learnable vector computations between coordinates of frame-anchored virtual atoms, thus achieving a higher capability for modeling frames. The vector computation operates in a manner similar to a linear layer, with each input channel receiving 3D virtual atom coordinates instead of scalar values. The multiple feature vectors output by the vector computation are then used to update the residue representations and virtual atom coordinates via attention aggregation. Remarkably, VFN also excels in modeling both frames and atoms, as the real atoms can be treated as the virtual atoms for modeling, positioning VFN as a potential $\\textit{universal encoder}$. In protein diffusion (frame modeling), VFN exhibits a impressive performance advantage over IPA, excelling in terms of both designability ($\\textbf{67.04}$\\% vs. 53.58\\%) and diversity ($\\textbf{66.54}$\\% vs. 51.98\\%). In inverse folding(frame and atom modeling), VFN outperforms the previous SoTA model, PiFold ($\\textbf{54.7}$\\% vs. 51.66\\%), on sequence recovery rate; we also propose a method of equipping VFN with the ESM model, which significantly surpasses the previous ESM-based SoTA ($\\textbf{62.67}$\\% vs. 55.65\\%), LM-Design, by a substantial margin. Code is available at https://github.com/aim-uofa/VFN", "keywords": "Protein design;Protein structure encoder;Inverse folding;Protein diffusion", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Weian Mao;Muzhi Zhu;Zheng Sun;Shuaike Shen;Lin Yuanbo Wu;Hao Chen;Chunhua Shen", "authorids": "~Weian_Mao2;~Muzhi_Zhu1;~Zheng_Sun7;~Shuaike_Shen1;~Lin_Yuanbo_Wu1;~Hao_Chen17;~Chunhua_Shen2", "gender": "M;M;M;;F;;", "homepage": ";https://z-mu-z.github.io/;https://ringhalsun.github.io/;;https://sites.google.com/site/linwuuq/home;;", "dblp": "289/1631;157/1679;;;65/6292-1;;", "google_scholar": "Qu-QXTsAAAAJ;https://scholar.google.com.hk/citations?user=064gBH4AAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.au/citations?user=mMiJUegAAAAJ;;", "orcid": ";;;;0000-0001-6119-058X;;", "linkedin": ";;;;;;", "or_profile": "~Weian_Mao2;~Muzhi_Zhu1;~Zheng_Sun7;~Shuaike_Shen1;~Lin_Yuanbo_Wu1;~Hao_Chen17;~Chunhua_Shen2", "aff": "University of Adelaide;Zhejiang University;Swansea University;;Swansea University;;", "aff_domain": "adelaide.edu.au;zju.edu.cn;swansea.ac.uk;;swansea.ac.uk;;", "position": "PhD student;PhD student;MS student;;Associate Professor;;", "bibtex": "@inproceedings{\nmao2024de,\ntitle={De novo Protein Design Using Geometric Vector Field Networks},\nauthor={Weian Mao and Muzhi Zhu and Zheng Sun and Shuaike Shen and Lin Yuanbo Wu and Hao Chen and Chunhua Shen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9UIGyJJpay}\n}", "github": "", "project": "", "reviewers": "MXeu;AKqf;unsL", "pdf_size": 6252283, "rating": "6;8;8", "confidence": "5;4;4", "soundness": "3;3;3", "contribution": "3;3;2", "presentation": "2;2;3", "wc_summary": "62;89;51", "wc_strengths": "13;65;76", "wc_weaknesses": "352;158;407", "wc_questions": "111;135;86", "wc_review": "538;447;620", "wc_reply_reviewers": "231;231;0", "wc_reply_authors": "2982;2357;1440", "reply_reviewers": "2;1;0", "reply_authors": "6;4;4", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 67.33333333333333, 15.965240019770729 ], "wc_strengths_avg": [ 51.333333333333336, 27.475241379993168 ], "wc_weaknesses_avg": [ 305.6666666666667, 106.80303782612594 ], "wc_questions_avg": [ 110.66666666666667, 20.005554784164875 ], "wc_review_avg": [ 535.0, 70.65880459409618 ], "wc_reply_reviewers_avg": [ 154.0, 108.89444430272832 ], "wc_reply_authors_avg": [ 2259.6666666666665, 633.2699968330165 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 4.666666666666667, 0.9428090415820634 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5870346272431391349&as_sdt=5,38&sciodt=0,38&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=9UIGyJJpay", "pdf": "https://openreview.net/pdf?id=9UIGyJJpay", "email": "adelaide.edu.au;zju.edu.cn;swansea.ac.uk;;swansea.ac.uk;;", "author_num": 7, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Adelaide;Zhejiang University;Swansea University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.adelaide.edu.au;https://www.zju.edu.cn;https://www.swansea.ac.uk", "aff_unique_abbr": "Adelaide;ZJU;Swansea", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "Australia;China;United Kingdom" }, { "title": "Most discriminative stimuli for functional cell type clustering", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19293", "id": "9W6KaAcYlr", "author_site": "Max F. Burg, Thomas Zenkel, Michaela Vystr\u010dilov\u00e1, Jonathan Oesterle, Larissa H\u00f6fling, Konstantin F. Willeke, Jan Lause, Sarah M\u00fcller, Paul Fahey, Zhiwei Ding, Kelli Restivo, Shashwat Sridhar, Tim Gollisch, Philipp Berens, Andreas Tolias, Thomas Euler, Matthias Bethge, Alexander S Ecker", "tldr": "", "abstract": "Identifying cell types and understanding their functional properties is crucial for unraveling the mechanisms underlying perception and cognition. In the retina, functional types can be identified by carefully selected stimuli, but this requires expert domain knowledge and biases the procedure towards previously known cell types. In the visual cortex, it is still unknown what functional types exist and how to identify them. Thus, for unbiased identification of the functional cell types in retina and visual cortex, new approaches are needed. Here we propose an optimization-based clustering approach using deep predictive models to obtain functional clusters of neurons using Most Discriminative Stimuli (MDS). Our approach alternates between stimulus optimization with cluster reassignment akin to an expectation-maximization algorithm. The algorithm recovers functional clusters in mouse retina, marmoset retina and macaque visual area V4. This demonstrates that our approach can successfully find discriminative stimuli across species, stages of the visual system and recording techniques. The resulting most discriminative stimuli can be used to assign functional cell types fast and on the fly, without the need to train complex predictive models or show a large natural scene dataset, paving the way for experiments that were previously limited by experimental time. Crucially, MDS are interpretable: they visualize the distinctive stimulus patterns that most unambiguously identify a specific type of neuron.", "keywords": "clustering;discriminative stimuli;interpretable;optimization;expectation-maximization;functional cell types;digital twins;feature visualization;pre-image search;maximally exciting image", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Max F Burg;Thomas Zenkel;Michaela Vystr\u010dilov\u00e1;Jonathan Oesterle;Larissa H\u00f6fling;Konstantin Friedrich Willeke;Jan Lause;Sarah M\u00fcller;Paul G. Fahey;Zhiwei Ding;Kelli Restivo;Shashwat Sridhar;Tim Gollisch;Philipp Berens;Andreas S. Tolias;Thomas Euler;Matthias Bethge;Alexander S Ecker", "authorids": "~Max_F_Burg1;~Thomas_Zenkel1;~Michaela_Vystr\u010dilov\u00e11;~Jonathan_Oesterle1;~Larissa_H\u00f6fling1;~Konstantin_Friedrich_Willeke1;~Jan_Lause1;~Sarah_M\u00fcller1;~Paul_G._Fahey1;~Zhiwei_Ding1;~Kelli_Restivo1;~Shashwat_Sridhar1;~Tim_Gollisch1;~Philipp_Berens1;~Andreas_S._Tolias1;~Thomas_Euler1;~Matthias_Bethge1;~Alexander_S_Ecker1", "gender": ";M;;M;;M;;F;;;F;M;M;M;;;M;M", "homepage": ";;;;;https://sinzlab.org/team.html;;https://hertie.ai/data-science/team/members/sarah-mueller;;;;;https://www.retina.uni-goettingen.de/;http://www.berenslab.org;;https://eulerlab.de/;https://bethgelab.org;http://eckerlab.org", "dblp": ";;;;;;;;;;;;;78/3560;32/3057;196/3259;77/3005;26/7228", "google_scholar": ";https://scholar.google.de/citations?user=jn2QYvoAAAAJ;;https://scholar.google.de/citations?user=jGoWHKoAAAAJ;s4lDqV0AAAAJ;sc3jZTsAAAAJ;;https://scholar.google.de/citations?user=UGqIaQcAAAAJ;;;;e9m2XqgAAAAJ;https://scholar.google.de/citations?user=yrtgRRkAAAAJ;https://scholar.google.de/citations?user=lPQLk3QAAAAJ;;https://scholar.google.de/citations?user=XeqGapgAAAAJ;https://scholar.google.com/citations?hl=en;VgYU_m8AAAAJ", "orcid": ";;;0000-0001-8919-1445;;0000-0003-4445-6408;;0000-0003-1500-8673;;;;0000-0002-8837-8555;0000-0003-3998-533X;;;0000-0002-4567-6966;;0000-0003-2392-5105", "linkedin": ";;michaela-vystr\u010dilov\u00e1-4b7a601ba/;;;;;;;;kellirestivo/;;;;;thomas-euler-9b67102b2/;;alexecker/", "or_profile": "~Max_F_Burg1;~Thomas_Zenkel1;~Michaela_Vystr\u010dilov\u00e11;~Jonathan_Oesterle1;~Larissa_H\u00f6fling1;~Konstantin_Friedrich_Willeke1;~Jan_Lause1;~Sarah_M\u00fcller1;~Paul_G._Fahey1;~Zhiwei_Ding1;~Kelli_Restivo1;~Shashwat_Sridhar1;~Tim_Gollisch1;~Philipp_Berens1;~Andreas_S._Tolias1;~Thomas_Euler1;~Matthias_Bethge1;~Alexander_S_Ecker1", "aff": ";;Georg-August Universit\u00e4t G\u00f6ttingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;;University of Tuebingen;;University of Tuebingen;;;Baylor College of Medicine;Georg-August Universit\u00e4t G\u00f6ttingen;Georg-August Universit\u00e4t G\u00f6ttingen;University of Tuebingen;Baylor College of Medicine;University of Tuebingen;University of Tuebingen;Max Planck Institute for Dynamics and Self-Organization", "aff_domain": ";;uni-goettingen.de;uni-tuebingen.de;;uni-tuebingen.de;;uni-tuebingen.de;;;bcm.edu;uni-goettingen.de;uni-goettingen.de;uni-tuebingen.de;bcm.edu;uni-tuebingen.de;uni-tuebingen.de;ds.mpg.de", "position": ";;PhD student;Postdoc;;PhD student;;PhD student;;;PhD student;PhD student;Full Professor;Full Professor;Professor;Full Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nburg2024most,\ntitle={Most discriminative stimuli for functional cell type clustering},\nauthor={Max F Burg and Thomas Zenkel and Michaela Vystr{\\v{c}}ilov{\\'a} and Jonathan Oesterle and Larissa H{\\\"o}fling and Konstantin Friedrich Willeke and Jan Lause and Sarah M{\\\"u}ller and Paul G. Fahey and Zhiwei Ding and Kelli Restivo and Shashwat Sridhar and Tim Gollisch and Philipp Berens and Andreas S. Tolias and Thomas Euler and Matthias Bethge and Alexander S Ecker},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9W6KaAcYlr}\n}", "github": "", "project": "", "reviewers": "NLuU;FrCb;KEew;69wH", "pdf_size": 1730049, "rating": "5;5;6;6", "confidence": "2;2;4;2", "soundness": "2;2;3;3", "contribution": "3;2;3;2", "presentation": "2;2;3;3", "wc_summary": "53;81;114;69", "wc_strengths": "25;35;77;30", "wc_weaknesses": "130;92;125;63", "wc_questions": "29;150;30;193", "wc_review": "237;358;346;355", "wc_reply_reviewers": "146;353;267;0", "wc_reply_authors": "1292;825;1450;825", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.25, 22.38721733489895 ], "wc_strengths_avg": [ 41.75, 20.656415468323637 ], "wc_weaknesses_avg": [ 102.5, 27.07858932810201 ], "wc_questions_avg": [ 100.5, 72.6102609828666 ], "wc_review_avg": [ 324.0, 50.42320894191484 ], "wc_reply_reviewers_avg": [ 191.5, 132.782717248895 ], "wc_reply_authors_avg": [ 1098.0, 278.6565987016995 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 18, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15776683461126722528&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "openreview": "https://openreview.net/forum?id=9W6KaAcYlr", "pdf": "https://openreview.net/pdf?id=9W6KaAcYlr", "email": ";;uni-goettingen.de;uni-tuebingen.de;;uni-tuebingen.de;;uni-tuebingen.de;;;bcm.edu;uni-goettingen.de;uni-goettingen.de;uni-tuebingen.de;bcm.edu;uni-tuebingen.de;uni-tuebingen.de;ds.mpg.de", "author_num": 18, "aff_unique_index": "0;1;2;2;3;0;0;2;3;2;2;4", "aff_unique_norm": "Georg-August Universit\u00e4t G\u00f6ttingen;Eberhard Karls University of T\u00fcbingen;University of Tuebingen;Baylor College of Medicine;Max Planck Institute for Dynamics and Self-Organization", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.uni-goettingen.de;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.bcm.edu;https://www.mpids.org", "aff_unique_abbr": "GAU;Uni T\u00fcbingen;Uni T\u00fcbingen;BCM;MPIDS", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;0;0;1;0;0;0;1;0;0;0", "aff_country_unique": "Germany;United States" }, { "title": "Zipformer: A faster and better encoder for automatic speech recognition", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19292", "id": "9WD9KwssyT", "author_site": "Zengwei Yao, Liyong Guo, Xiaoyu Yang, Wei Kang, Fangjun Kuang, Yifan Yang, Zengrui Jin, Long Lin, Daniel Povey", "tldr": "", "abstract": "The Conformer has become the most popular encoder model for automatic speech recognition (ASR). It adds convolution modules to a transformer to learn both local and global dependencies. In this work we describe a faster, more memory-efficient, and better-performing transformer, called Zipformer. Modeling changes include: 1) a U-Net-like encoder structure where middle stacks operate at lower frame rates; 2) reorganized block structure with more modules, within which we re-use attention weights for efficiency; 3) a modified form of LayerNorm called BiasNorm allows us to retain some length information; 4) new activation functions SwooshR and SwooshL work better than Swish. We also propose a new optimizer, called ScaledAdam, which scales the update by each tensor's current scale to keep the relative change about the same, and also explictly learns the parameter scale. It achieves faster converge and better performance than Adam. Extensive experiments on LibriSpeech, Aishell-1, and WenetSpeech datasets demonstrate the effectiveness of our proposed Zipformer over other state-of-the-art ASR models. Our code is publicly available at https://github.com/k2-fsa/icefall.", "keywords": "Zipformer;ScaledAdam;automatic speech recognition", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Zengwei Yao;Liyong Guo;Xiaoyu Yang;Wei Kang;Fangjun Kuang;Yifan Yang;Zengrui Jin;Long Lin;Daniel Povey", "authorids": "~Zengwei_Yao1;~Liyong_Guo1;~Xiaoyu_Yang7;~Wei_Kang3;~Fangjun_Kuang1;~Yifan_Yang11;~Zengrui_Jin1;~Long_Lin1;~Daniel_Povey2", "gender": "M;;M;M;M;M;;F;", "homepage": ";;;https://pkufool.github.io/;http://github.com/csukuangfj;https://yfyeung.github.io/;;https://github.com/liliana404;", "dblp": ";;;;;83/89-5;;;", "google_scholar": "f3Eo9S0AAAAJ;2nME_lQAAAAJ;dQQ5_4wAAAAJ;dyIpw7kAAAAJ;3JCBRi8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;", "orcid": ";;;;;0009-0003-0588-1812;;;", "linkedin": ";;xiaoyu-yang-602271193/;;;yifan-yang-290ba624b;;;", "or_profile": "~Zengwei_Yao1;~Liyong_Guo1;~Xiaoyu_Yang7;~Wei_Kang3;~Fangjun_Kuang1;~Yifan_Yang11;~Zengrui_Jin1;~Long_Lin1;~Daniel_Povey2", "aff": ";;Xiaomi Corp.;Xiaomi Corp.;;Shanghai Jiaotong University;;;", "aff_domain": ";;xiaomi.com;xiaomi.com;;sjtu.edu.cn;;;", "position": ";;Researcher;Researcher;;PhD student;;;", "bibtex": "@inproceedings{\nyao2024zipformer,\ntitle={Zipformer: A faster and better encoder for automatic speech recognition},\nauthor={Zengwei Yao and Liyong Guo and Xiaoyu Yang and Wei Kang and Fangjun Kuang and Yifan Yang and Zengrui Jin and Long Lin and Daniel Povey},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9WD9KwssyT}\n}", "github": "", "project": "", "reviewers": "2d3G;hvPD;6jVU;UyUP", "pdf_size": 511626, "rating": "6;8;8;8", "confidence": "5;4;5;4", "soundness": "3;3;3;3", "contribution": "3;4;3;3", "presentation": "3;3;2;3", "wc_summary": "69;27;143;74", "wc_strengths": "36;56;56;43", "wc_weaknesses": "669;87;280;56", "wc_questions": "44;99;374;33", "wc_review": "818;269;853;206", "wc_reply_reviewers": "0;42;0;0", "wc_reply_authors": "763;437;791;404", "reply_reviewers": "0;2;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 41.601532423698046 ], "wc_strengths_avg": [ 47.75, 8.613216588476108 ], "wc_weaknesses_avg": [ 273.0, 244.20790323001424 ], "wc_questions_avg": [ 137.5, 138.81372410536358 ], "wc_review_avg": [ 536.5, 300.08373831315816 ], "wc_reply_reviewers_avg": [ 10.5, 18.186533479473212 ], "wc_reply_authors_avg": [ 598.75, 178.90552674526296 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4991886084904422681&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "openreview": "https://openreview.net/forum?id=9WD9KwssyT", "pdf": "https://openreview.net/pdf?id=9WD9KwssyT", "email": ";;xiaomi.com;xiaomi.com;;sjtu.edu.cn;;;", "author_num": 9, "aff_unique_index": "0;0;1", "aff_unique_norm": "Xiaomi Corporation;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.xiaomi.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "Xiaomi;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "9Wy6pLNQcG", "title": "RegionSpot: Unleashing the Power of Frozen Foundation Models for Open-World Region Understanding", "track": "main", "status": "Reject", "tldr": "", "abstract": "Understanding the semantics of individual regions or patches within unconstrained images, such as in open-world object detection, represents a critical yet challenging task in computer vision. Building on the success of powerful image-level vision-language (ViL) foundation models like CLIP, recent efforts have sought to harness their capabilities by either training a contrastive model from scratch with an extensive collection of region-label pairs or aligning the outputs of a detection model with image-level representations of region proposals. Despite notable progress, these approaches are plagued by computationally intensive training requirements, susceptibility to data noise, \nand a deficiency in contextual information.\n%\nTo address these limitations, we explore the synergistic potential of off-the-shelf foundation models, leveraging their strengths in localization and semantics. We introduce a novel, generic, and efficient region recognition architecture, named \\textit{\\modelname{}}, designed to integrate position-aware localization knowledge from a localization foundation model (e.g., SAM) with multimodal information extracted from a ViL model (e.g., CLIP). To fully exploit pretrained knowledge while minimizing training overhead, we keep both foundation models frozen, focusing optimization efforts solely on a lightweight attention-based knowledge integration module.\n%\nThrough extensive experiments in the context of open-world object recognition, our \\textit{\\modelname{}} demonstrates significant performance improvements over prior alternatives, while also providing substantial computational savings. \n{For instance, training our model with 3 million data in 1 day using 8 V100 GPUs.} Our model outperforms GLIP by 6.5\\% in mean average precision (mAP), with an even larger margin by 14.8\\% for more challenging and rare categories. Our source code will be made publicly available.", "keywords": "Open world Region Understanding", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Haosen Yang;Chuofan Ma;Bin Wen;Yi Jiang;Zehuan Yuan;Xiatian Zhu", "authorids": "~Haosen_Yang1;~Chuofan_Ma1;~Bin_Wen1;~Yi_Jiang2;~Zehuan_Yuan1;~Xiatian_Zhu3", "gender": "M;;M;M;M;", "homepage": ";https://machuofan.github.io/;;https://enjoyyi.github.io/;https://shallowyuan.github.io/;https://x-up-lab.github.io", "dblp": "245/9949-3;330/3312;;;227/3298;128/7935", "google_scholar": "https://scholar.google.com/citations?hl=en;hgKtgWAAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=6dikuoYAAAAJ;;ZbA-z1cAAAAJ", "orcid": ";;;0000-0002-2133-8719;;0000-0002-9284-2955", "linkedin": ";;;;;", "or_profile": "~Haosen_Yang1;~Chuofan_Ma1;~Bin_Wen1;~Yi_Jiang2;~Zehuan_Yuan1;~Xiatian_Zhu3", "aff": "University of Surrey;University of Hong Kong;;Bytedance;ByteDance Inc.;University of Surrey", "aff_domain": "surrey.ac.uk;hku.hk;;bytedance.com;bytedance.com;surrey.ac.uk", "position": "PhD student;PhD student;;Researcher;Researcher;Associate Professor", "bibtex": "@misc{\nyang2024regionspot,\ntitle={RegionSpot: Unleashing the Power of Frozen Foundation Models for Open-World Region Understanding},\nauthor={Haosen Yang and Chuofan Ma and Bin Wen and Yi Jiang and Zehuan Yuan and Xiatian Zhu},\nyear={2024},\nurl={https://openreview.net/forum?id=9Wy6pLNQcG}\n}", "github": "", "project": "", "reviewers": "3vj6;Tjjg;PEMU;UsjU", "site": "https://openreview.net/forum?id=9Wy6pLNQcG", "pdf_size": 2420418, "rating": "3;5;5;6", "confidence": "4;4;3;4", "soundness": "3;3;2;3", "contribution": "2;2;2;3", "presentation": "3;2;2;4", "wc_summary": "71;20;65;77", "wc_strengths": "39;61;105;71", "wc_weaknesses": "157;215;216;155", "wc_questions": "33;21;37;46", "wc_review": "300;317;423;349", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "882;768;705;498", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 58.25, 22.487496525847426 ], "wc_strengths_avg": [ 69.0, 23.790754506740637 ], "wc_weaknesses_avg": [ 185.75, 29.76050234791073 ], "wc_questions_avg": [ 34.25, 8.98262211161084 ], "wc_review_avg": [ 347.25, 47.14008379288268 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 713.25, 139.53023865814893 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0fp0BEd_PgMJ:scholar.google.com/&scioq=RegionSpot:+Unleashing+the+Power+of+Frozen+Foundation+Models+for+Open-World+Region+Understanding&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "University of Surrey;University of Hong Kong;ByteDance", "aff_unique_dep": ";;", "aff_unique_url": "https://www.surrey.ac.uk;https://www.hku.hk;https://www.bytedance.com", "aff_unique_abbr": "Surrey;HKU;Bytedance", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United Kingdom;China" }, { "id": "9XdLlbxZCC", "title": "MC-JEPA: A Joint-Embedding Predictive Architecture for Self-Supervised Learning of Motion and Content Features", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-supervised learning of visual representations has been focusing on learning content features, which do not capture object motion or location, and focus on identifying and differentiating objects in images and videos. On the other hand, optical flow estimation is a task that does not involve understanding the content of the images on which it is estimated. We unify the two approaches and introduce MC-JEPA, a joint-embedding predictive architecture and self-supervised learning approach to jointly learn optical flow and content features within a shared encoder, demonstrating that the two associated objectives; the optical flow estimation objective and the self-supervised learning objective; benefit from each other and thus learn content features that incorporate motion information. The proposed approach achieves performance on-par with existing unsupervised optical flow benchmarks, as well as with common self-supervised learning approaches on downstream tasks such as semantic segmentation of images and videos.", "keywords": "self-supervised learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/7567d0bbed2fe4d9de9c36c3826dbfba4ba6f6e2.pdf", "author": "Adrien Bardes;Jean Ponce;Yann LeCun", "authorids": "~Adrien_Bardes1;~Jean_Ponce1;~Yann_LeCun1", "gender": "M;M;M", "homepage": ";http://www.di.ens.fr/~ponce/;http://yann.lecun.com", "dblp": "292/3848.html;p/JeanPonce;l/YannLeCun", "google_scholar": "SvRU8F8AAAAJ;https://scholar.google.com.tw/citations?user=vC2vywcAAAAJ;WLN3QrAAAAAJ", "orcid": ";;", "linkedin": "adrien-bardes-48a080129/;;", "or_profile": "~Adrien_Bardes1;~Jean_Ponce1;~Yann_LeCun1", "aff": "Meta Facebook;Ecole Normale Sup\u00e9rieure de Paris;New York University", "aff_domain": "meta.com;ens.fr;nyu.edu", "position": "Researcher;Full Professor;Full Professor", "bibtex": "@misc{\nbardes2024mcjepa,\ntitle={{MC}-{JEPA}: A Joint-Embedding Predictive Architecture for Self-Supervised Learning of Motion and Content Features},\nauthor={Adrien Bardes and Jean Ponce and Yann LeCun},\nyear={2024},\nurl={https://openreview.net/forum?id=9XdLlbxZCC}\n}", "github": "", "project": "", "reviewers": "arP2;q59r;Jf74;RRax", "site": "https://openreview.net/forum?id=9XdLlbxZCC", "pdf_size": 5198063, "rating": "3;5;6;6", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "contribution": "2;3;2;3", "presentation": "2;3;2;2", "wc_summary": "33;117;63;34", "wc_strengths": "21;225;58;78", "wc_weaknesses": "167;302;228;26", "wc_questions": "2;2;103;129", "wc_review": "223;646;452;267", "wc_reply_reviewers": "0;0;0;42", "wc_reply_authors": "506;667;888;403", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.75, 34.098203765007916 ], "wc_strengths_avg": [ 95.5, 77.51290215183535 ], "wc_weaknesses_avg": [ 180.75, 101.32959834125467 ], "wc_questions_avg": [ 59.0, 57.736470276593806 ], "wc_review_avg": [ 397.0, 167.48283494137542 ], "wc_reply_reviewers_avg": [ 10.5, 18.186533479473212 ], "wc_reply_authors_avg": [ 616.0, 183.06692765215678 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11805965224862551060&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Meta;Ecole Normale Sup\u00e9rieure de Paris;New York University", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.ens.fr;https://www.nyu.edu", "aff_unique_abbr": "Meta;ENS Paris;NYU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;France" }, { "id": "9Z0yB8rmQ2", "title": "Lyra: Orchestrating Dual Correction in Automated Theorem Proving", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large Language Models (LLMs) present an intriguing avenue for exploration in the field of formal theorem proving. Nevertheless, their full potential, particularly concerning the mitigation of hallucinations and refinement through prover error messages, remains an area that has yet to be thoroughly investigated. To enhance the effectiveness of LLMs in the field, we introduce the Lyra, a new framework that employs two distinct correction mechanisms: Tool Correction (TC) and Conjecture Correction (CC). To implement Tool Correction in the post-processing of formal proofs, we leverage prior knowledge to utilize predefined prover tools (e.g., Sledgehammer) for guiding the replacement of incorrect tools. Tool Correction significantly contributes to mitigating hallucinations, thereby improving the overall accuracy of the proof. In addition, we introduce Conjecture Correction, an error feedback mechanism designed to interact with prover to refine formal proof conjectures with prover error messages. Compared to the previous refinement framework, the proposed Conjecture Correction refines generation with instruction but does not collect paired (generation, error & refinement) prompts. Our method has achieved state-of-the-art (SOTA) performance on both miniF2F validation (48.0% \u2192 55.3%) and test (45.5% \u2192 51.2%). We also present 3 IMO problems solved by Lyra. We believe Tool Correction (post-process for hallucination mitigation) and Conjecture Correction (subgoal adjustment from interaction with environment) could provide a promising avenue for future research in this field.", "keywords": "Theorem proving;large language model;neuro-symbolic method;automated theorem provers;miniF2F", "primary_area": "neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)", "supplementary_material": "/attachment/f09b60fa30c45e1aa100c31928288548d4ecb1a2.zip", "author": "Chuanyang Zheng;Haiming Wang;Enze Xie;Zhengying Liu;Jiankai Sun;Huajian Xin;Jianhao Shen;Zhenguo Li;Yu Li", "authorids": "~Chuanyang_Zheng3;~Haiming_Wang1;~Enze_Xie1;~Zhengying_Liu2;~Jiankai_Sun6;~Huajian_Xin1;~Jianhao_Shen1;~Zhenguo_Li1;~Yu_Li1", "gender": "M;M;M;M;;M;M;M;M", "homepage": "https://chuanyang-zheng.github.io/;;https://xieenze.github.io/;;;https://xinhuajian.wordpress.com/;;http://www.ee.columbia.edu/~zgli/;https://sites.google.com/view/liyu1995", "dblp": ";97/604;218/5441;241/1782;121/4211;356/3551;217/2324;23/6479;", "google_scholar": "LWwh7K4AAAAJ;zDPqP6AAAAAJ;42MVVPgAAAAJ;http:// DFme0joAAAAJ;726MCb8AAAAJ;E5M9x8wAAAAJ;9fppVAUAAAAJ;XboZC1AAAAAJ;8YHZx-AAAAAJ", "orcid": ";;;;;;;;0000-0002-3664-6722", "linkedin": ";;;;;;;;yuli1995/", "or_profile": "~Chuanyang_Zheng3;~Haiming_Wang1;~Enze_Xie1;~Zhengying_Liu2;~Jiankai_Sun6;~Huajian_Xin1;~Jianhao_Shen1;~Zhenguo_Li1;~Yu_Li1", "aff": "The Chinese University of Hong Kong;SUN YAT-SEN UNIVERSITY;Huawei Noah's Ark Lab;Huawei Technologies Ltd.;Stanford University;University of Edinburgh, University of Edinburgh;;Huawei Noah's Ark Lab;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;sysu.edu.cn;huawei.com;huawei.com;stanford.edu;ed.ac.uk;;huawei.com;cse.cuhk.edu.hk", "position": "PhD student;PhD student;Researcher;Researcher;PhD student;PhD student;;Principal Researcher;Assistant Professor", "bibtex": "@misc{\nzheng2024lyra,\ntitle={Lyra: Orchestrating Dual Correction in Automated Theorem Proving},\nauthor={Chuanyang Zheng and Haiming Wang and Enze Xie and Zhengying Liu and Jiankai Sun and Huajian Xin and Jianhao Shen and Zhenguo Li and Yu Li},\nyear={2024},\nurl={https://openreview.net/forum?id=9Z0yB8rmQ2}\n}", "github": "", "project": "", "reviewers": "bwwN;NmsF;1Jjk;Wc5t", "site": "https://openreview.net/forum?id=9Z0yB8rmQ2", "pdf_size": 1024887, "rating": "6;6;6;6", "confidence": "5;3;4;5", "soundness": "3;3;3;4", "contribution": "3;2;3;2", "presentation": "3;3;3;4", "wc_summary": "42;132;59;159", "wc_strengths": "23;111;37;37", "wc_weaknesses": "183;355;47;45", "wc_questions": "1;76;225;106", "wc_review": "249;674;368;347", "wc_reply_reviewers": "31;795;220;0", "wc_reply_authors": "331;5450;1576;2298", "reply_reviewers": "1;5;2;0", "reply_authors": "2;8;5;6", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 48.821101994936576 ], "wc_strengths_avg": [ 52.0, 34.539832078341085 ], "wc_weaknesses_avg": [ 157.5, 127.00688957690446 ], "wc_questions_avg": [ 102.0, 80.65667982256646 ], "wc_review_avg": [ 409.5, 159.17678850887776 ], "wc_reply_reviewers_avg": [ 261.5, 319.3184147524223 ], "wc_reply_authors_avg": [ 2413.75, 1888.9074057507426 ], "reply_reviewers_avg": [ 2.0, 1.8708286933869707 ], "reply_authors_avg": [ 5.25, 2.165063509461097 ], "replies_avg": [ 39, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17828337707468607238&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2;3;4;2;0", "aff_unique_norm": "Chinese University of Hong Kong;Sun Yat-sen University;Huawei;Stanford University;University of Edinburgh", "aff_unique_dep": ";;Noah's Ark Lab;;", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.sysu.edu.cn;https://www.huawei.com;https://www.stanford.edu;https://www.ed.ac.uk", "aff_unique_abbr": "CUHK;SYSU;Huawei;Stanford;Edinburgh", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Hong Kong SAR;;Stanford", "aff_country_unique_index": "0;0;0;0;1;2;0;0", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "TOSS: High-quality Text-guided Novel View Synthesis from a Single Image", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19291", "id": "9ZUYJpvIys", "author_site": "Yukai Shi, Jianan Wang, He CAO, Boshi Tang, Xianbiao Qi, Tianyu Yang, Yukun Huang, Shilong Liu, Lei Zhang, Heung-Yeung Shum", "tldr": "", "abstract": "In this paper, we present TOSS, which introduces text to the task of novel view synthesis (NVS) from just a single RGB image. \nWhile Zero123 has demonstrated impressive zero-shot open-set NVS capabilities, it treats NVS as a pure image-to-image translation problem. This approach suffers from the challengingly under-constrained nature of single-view NVS: the process lacks means of explicit user control and often result in implausible NVS generations.\nTo address this limitation, TOSS uses text as high-level semantic information to constrain the NVS solution space.\nTOSS fine-tunes text-to-image Stable Diffusion pre-trained on large-scale text-image pairs and introduces modules specifically tailored to image and camera pose conditioning, as well as dedicated training for pose correctness and preservation of fine details. \nComprehensive experiments are conducted with results showing that our proposed TOSS outperforms Zero123 with higher-quality NVS results and faster convergence. We further support these results with comprehensive ablations that underscore the effectiveness and potential of \nthe introduced semantic guidance and architecture design.", "keywords": "novel view synthesis;3D object synthesis;diffusion model", "primary_area": "generative models", "supplementary_material": "/attachment/01aebc58b4b4450b01b5ff07c9d36c98febffd11.zip", "author": "Yukai Shi;Jianan Wang;He CAO;Boshi Tang;Xianbiao Qi;Tianyu Yang;Yukun Huang;Shilong Liu;Lei Zhang;Heung-Yeung Shum", "authorids": "~Yukai_Shi3;~Jianan_Wang2;~He_CAO1;~Boshi_Tang1;~Xianbiao_Qi2;~Tianyu_Yang2;~Yukun_Huang1;~Shilong_Liu1;~Lei_Zhang23;~Heung-Yeung_Shum1", "gender": ";F;M;M;M;M;M;M;M;M", "homepage": "https://shiyukai26.github.io/info/;https://scholar.google.com/citations?user=mt5mvZ8AAAAJ&hl=en;https://github.com/CiaoHe;https://github.com/TangYucopper;https://www.linkedin.com/in/xianbiao-qi-39617727/;https://tianyu-yang.com/;;https://www.lsl.zone;https://www.microsoft.com/en-us/research/people/hshum/;https://www.leizhang.org/", "dblp": ";49/6053,;;;118/3741;120/8076-3.html;186/1316;;;z/LeiZhang", "google_scholar": "oQXfkSQAAAAJ;mt5mvZ8AAAAJ;tLZ2V2kAAAAJ;;odjSydQAAAAJ;BXsWsf8AAAAJ;lHb5gzoAAAAJ;nkSVY3MAAAAJ;;fIlGZToAAAAJ", "orcid": ";;;;;;0000-0002-5322-2884;;;", "linkedin": ";;he-cao/;;;;;;;", "or_profile": "~Yukai_Shi3;~Jianan_Wang2;~He_CAO1;~Boshi_Tang1;~Xianbiao_Qi2;~Tianyu_Yang2;~Yukun_Huang1;~Shilong_Liu1;~Heung-Yeung_Shum1;~Lei_Zhang1", "aff": "Tsinghua University;International Digital Economy Academy (IDEA);Hong Kong University of Science and Technology;Tsinghua University;International Digital Economy Academy;International Digital Economy Academy;University of Hong Kong;NVIDIA;;International Digital Economy Academy", "aff_domain": "mail.tsinghua.edu.cn;idea.edu.cn;ust.hk;mails.tsinghua.edu.cn;idea.edu.cn;idea.edu.cn;hku.hk;nvidia.com;;idea.edu.cn", "position": "PhD student;Researcher;PhD student;MS student;Researcher;Researcher;Postdoc;Research Intern;;Chief Scientist", "bibtex": "@inproceedings{\nshi2024toss,\ntitle={{TOSS}: High-quality Text-guided Novel View Synthesis from a Single Image},\nauthor={Yukai Shi and Jianan Wang and He CAO and Boshi Tang and Xianbiao Qi and Tianyu Yang and Yukun Huang and Shilong Liu and Lei Zhang and Heung-Yeung Shum},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9ZUYJpvIys}\n}", "github": "", "project": "", "reviewers": "LBZY;Givb;wyYi;gFkU", "pdf_size": 8550065, "rating": "6;6;6;6", "confidence": "4;2;4;5", "soundness": "3;3;3;3", "contribution": "3;3;2;3", "presentation": "2;3;3;3", "wc_summary": "126;74;84;40", "wc_strengths": "89;106;105;64", "wc_weaknesses": "236;135;127;122", "wc_questions": "38;21;33;3", "wc_review": "489;336;349;229", "wc_reply_reviewers": "33;14;42;46", "wc_reply_authors": "255;261;449;279", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.0, 30.675723300355934 ], "wc_strengths_avg": [ 91.0, 16.98528775146303 ], "wc_weaknesses_avg": [ 155.0, 46.994680550036726 ], "wc_questions_avg": [ 23.75, 13.47915056670857 ], "wc_review_avg": [ 350.75, 92.4077242442427 ], "wc_reply_reviewers_avg": [ 33.75, 12.336429791475327 ], "wc_reply_authors_avg": [ 311.0, 80.16233529532433 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13024502958196107678&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=9ZUYJpvIys", "pdf": "https://openreview.net/pdf?id=9ZUYJpvIys", "email": "mail.tsinghua.edu.cn;idea.edu.cn;ust.hk;mails.tsinghua.edu.cn;idea.edu.cn;idea.edu.cn;hku.hk;nvidia.com;;idea.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;0;1;1;3;4;1", "aff_unique_norm": "Tsinghua University;International Digital Economy Academy;Hong Kong University of Science and Technology;University of Hong Kong;NVIDIA", "aff_unique_dep": ";;;;NVIDIA Corporation", "aff_unique_url": "https://www.tsinghua.edu.cn;;https://www.ust.hk;https://www.hku.hk;https://www.nvidia.com", "aff_unique_abbr": "THU;IDEA;HKUST;HKU;NVIDIA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;2", "aff_country_unique": "China;;United States" }, { "id": "9ZUz4M55Up", "title": "Revisiting the Lottery Ticket Hypothesis for Pre-trained Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The lottery ticket hypothesis (LTH) suggests the possibility of pruning neural networks at initialization. Our study revisits LTH in the context of transfer learning, unveiling novel insights surpassing prior studies limited to LTH's application in pre-trained networks. To begin, our study shows that multiple pruning-at-initialization methods are likely to find worse pruning masks than a simple magnitude-based pruning method for pre-trained networks, owing to an inaccurate approximation of the influence of each weight. Iterative magnitude pruning (IMP) can find trainable subnetworks (winning tickets) even for pre-trained networks, however, IMP is a costly algorithm that requires multiple training cycles. Given that trainable subnetworks can be identified only when the initial network withstands the training's inherent randomness, and considering the superior resilience of pre-trained networks to this randomness compared to randomly initialized networks, we empirically demonstrate the enhanced efficiency of identifying trainable subnetworks within the framework of transfer learning. By challenging conventional wisdom surrounding gradual magnitude pruning (GMP), we reveal its capability to significantly enhance the trade-off between transfer learning performance and sparsity in terms of pruning-at-initialization. Our experiments, which involve various models such as convolutional neural networks and transformers, across both vision and language domains, demonstrate that GMP can identify trainable subnetworks for pre-trained networks at a significantly lower cost than IMP. For example, for ImageNet pre-trained ResNet-50, at a pruning ratio of 99%, GMP achieves comparable or superior results to IMP on the CIFAR, Caltech-101, Oxford-IIIT Pets, and Stanford Cars datasets, with 42 times less computation than IMP. Ultimately, we provide empirical evidence that the methodological distinction between the LTH-based and conventional pruning methods can be blurred for pre-trained networks.", "keywords": "lottery ticket hypothesis;transfer learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/ec75867c154a5733f28b2b74a342bd509efee0eb.zip", "author": "Saehyung Lee;Se Jung Kwon;Byeongwook Kim;Sungroh Yoon", "authorids": "~Saehyung_Lee1;~Se_Jung_Kwon1;~Byeongwook_Kim1;~Sungroh_Yoon1", "gender": "M;M;;", "homepage": ";;;http://ailab.snu.ac.kr", "dblp": "260/0442;119/5676;220/5405;99/1474", "google_scholar": "nS24h74AAAAJ;https://scholar.google.co.kr/citations?user=8eTxKOkAAAAJ;https://scholar.google.co.kr/citations?user=OjfC7gUAAAAJ;Bphl_fIAAAAJ", "orcid": ";;;0000-0002-2367-197X", "linkedin": ";se-jung-kwon-305503175/;;", "or_profile": "~Saehyung_Lee1;~Se_Jung_Kwon1;~Byeongwook_Kim1;~Sungroh_Yoon1", "aff": "Adobe Systems;NAVER Cloud;NAVER CLOUD;Seoul National University", "aff_domain": "adobe.com;navercorp.com;navercorp.com;snu.ac.kr", "position": "Intern;AI Researcher;Researcher;Full Professor", "bibtex": "@misc{\nlee2024revisiting,\ntitle={Revisiting the Lottery Ticket Hypothesis for Pre-trained Networks},\nauthor={Saehyung Lee and Se Jung Kwon and Byeongwook Kim and Sungroh Yoon},\nyear={2024},\nurl={https://openreview.net/forum?id=9ZUz4M55Up}\n}", "github": "", "project": "", "reviewers": "mi9v;kmMF;V5JA;6QxM", "site": "https://openreview.net/forum?id=9ZUz4M55Up", "pdf_size": 784253, "rating": "3;3;5;5", "confidence": "4;5;3;4", "soundness": "1;2;3;3", "contribution": "1;2;2;3", "presentation": "2;3;3;4", "wc_summary": "75;94;110;109", "wc_strengths": "19;54;121;95", "wc_weaknesses": "175;251;182;313", "wc_questions": "2;17;4;8", "wc_review": "271;416;417;525", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 97.0, 14.19506956657839 ], "wc_strengths_avg": [ 72.25, 38.931831449342326 ], "wc_weaknesses_avg": [ 230.25, 56.25555528123422 ], "wc_questions_avg": [ 7.75, 5.7608593109014565 ], "wc_review_avg": [ 407.25, 90.27838888682052 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mXiDScR1eqEJ:scholar.google.com/&scioq=Revisiting+the+Lottery+Ticket+Hypothesis+for+Pre-trained+Networks&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Adobe;NAVER Corporation;Seoul National University", "aff_unique_dep": "Adobe Systems Incorporated;Cloud Division;", "aff_unique_url": "https://www.adobe.com;https://www.naver.com;https://www.snu.ac.kr", "aff_unique_abbr": "Adobe;NAVER;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;South Korea" }, { "id": "9bSDTTDUIp", "title": "ERM++: An Improved Baseline for Domain Generalization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Multi-source Domain Generalization (DG) measures a classifier's ability to generalize to new distributions of data it was not trained on, given several training domains. While several multi-source DG methods have been proposed, they incur additional complexity during training by using domain labels. Recent work has shown that a well-tuned Empirical Risk Minimization (ERM) training procedure, that is simply minimizing the empirical risk on the source domains, can outperform most existing DG methods. ERM has achieved such strong results while only tuning hyper-parameters such as learning rate, weight decay, and batch size. This paper aims to understand how we can push ERM as a baseline for DG further, thereby providing a stronger baseline for which to benchmark new methods. We call the resulting improved baseline ERM++, and it consists of better utilization of training data, model parameter selection, and weight-space regularization. ERM++ significantly improves the performance of DG on five multi-source datasets by over 5% compared to standard ERM using ResNet-50, and beats state-of-the-art despite being less computationally expensive. We also demonstrate the efficacy of ERM++ on the WILDS-FMOW dataset, a challenging DG benchmark. Finally, we show that with a CLIP-pretrained ViT-B/16, ERM++ outperforms ERM by over 10%, allowing one to take advantage of the stronger pre-training effectively. We will release code upon acceptance.", "keywords": "Domain Generalization;Multi-Source Domain Generalization.", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/75d591d439050ce24e33a5216df536b9ce04f4be.zip", "author": "Piotr Teterwak;Kuniaki Saito;Theodoros Tsiligkaridis;Kate Saenko;Bryan A. Plummer", "authorids": "~Piotr_Teterwak1;~Kuniaki_Saito2;~Theodoros_Tsiligkaridis1;~Kate_Saenko1;~Bryan_A._Plummer1", "gender": "M;M;M;F;M", "homepage": "https://scholar.google.com/citations?user=lUkd1AMAAAAJ&hl=en&oi=ao;;https://sites.google.com/view/theo-t;http://ai.bu.edu;http://bryanplummer.com/", "dblp": "247/6128;182/1957;64/10412;88/2754;163/2330", "google_scholar": "lUkd1AMAAAAJ;https://scholar.google.co.jp/citations?user=2X0cwhkAAAAJ;hVUVOTIAAAAJ;https://scholar.google.com.tw/citations?user=9xDADY4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-5704-7614;", "linkedin": ";;;;", "or_profile": "~Piotr_Teterwak1;~Kuniaki_Saito2;~Theodoros_Tsiligkaridis1;~Kate_Saenko1;~Bryan_Allen_Plummer1", "aff": "Boston University;OMRON SINICX;MIT Lincoln Laboratory, Massachusetts Institute of Technology;Boston University, Boston University;Boston University", "aff_domain": "bu.edu;sinicx.com;ll.mit.edu;bu.edu;bu.edu", "position": "PhD student;Researcher;Senior AI Research Scientist;Full Professor;Assistant Professor", "bibtex": "@misc{\nteterwak2024erm,\ntitle={{ERM}++: An Improved Baseline for Domain Generalization},\nauthor={Piotr Teterwak and Kuniaki Saito and Theodoros Tsiligkaridis and Kate Saenko and Bryan A. Plummer},\nyear={2024},\nurl={https://openreview.net/forum?id=9bSDTTDUIp}\n}", "github": "", "project": "", "reviewers": "Xq2C;Pkyt;72G7;4v1r", "site": "https://openreview.net/forum?id=9bSDTTDUIp", "pdf_size": 19522919, "rating": "3;3;5;5", "confidence": "4;5;3;3", "soundness": "2;2;2;2", "contribution": "2;2;2;2", "presentation": "3;2;3;2", "wc_summary": "48;37;112;43", "wc_strengths": "46;16;32;89", "wc_weaknesses": "115;73;190;284", "wc_questions": "2;14;4;281", "wc_review": "211;140;338;697", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "289;166;299;584", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.0, 30.273751006441206 ], "wc_strengths_avg": [ 45.75, 27.13277538328875 ], "wc_weaknesses_avg": [ 165.5, 80.23247472189799 ], "wc_questions_avg": [ 75.25, 118.87677443470612 ], "wc_review_avg": [ 346.5, 214.43239027721535 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 334.5, 153.2750795139249 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12473608674129674084&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Boston University;OMRON Corporation;Massachusetts Institute of Technology", "aff_unique_dep": ";;Lincoln Laboratory", "aff_unique_url": "https://www.bu.edu;https://www.omron.com;https://web.mit.edu", "aff_unique_abbr": "BU;OMRON;MIT", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Boston", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Japan" }, { "title": "Bootstrapping Variational Information Pursuit with Large Language and Vision Models for Interpretable Image Classification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19290", "id": "9bmTbVaA2A", "author_site": "Aditya Chattopadhyay, Kwan Ho Ryan Chan, Rene Vidal", "tldr": "", "abstract": "Variational Information Pursuit (V-IP) is an interpretable-by-design framework that makes predictions by sequentially selecting a short chain of user-defined, interpretable queries about the data that are most informative for the task. The prediction is based solely on the obtained query answers, which also serve as a faithful explanation for the prediction. Applying the framework to any task requires (i) specification of a query set, and (ii) densely annotated data with query answers to train classifiers to answer queries at test time. This limits V-IP's application to small-scale tasks where manual data annotation is feasible. In this work, we focus on image classification tasks and propose to relieve this bottleneck by leveraging pretrained language and vision models. Specifically, following recent work, we propose to use GPT, a Large Language Model, to propose semantic concepts as queries for a given classification task. To answer these queries, we propose a light-weight Concept Question-Answering network (Concept-QA) which learns to answer binary queries about semantic concepts in images. We design pseudo-labels to train our Concept-QA model using GPT and CLIP (a Vision-Language Model). Empirically, we find our Concept-QA model to be competitive with state-of-the-art VQA models in terms of answering accuracy but with an order of magnitude fewer parameters. This allows for seamless integration of Concept-QA into the V-IP framework as a fast-answering mechanism. We name this method Concept-QA+V-IP. Finally, we show on several datasets that Concept-QA+V-IP produces shorter, interpretable query chains which are more accurate than V-IP trained with CLIP-based answering systems. Code available at https://github.com/adityac94/conceptqa_vip.", "keywords": "Interpretable ML;Explainable AI;Information Pursuit;Large Language Models;Large Multimodal Models;Vision Language Models", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Aditya Chattopadhyay;Kwan Ho Ryan Chan;Rene Vidal", "authorids": "~Aditya_Chattopadhyay1;~Kwan_Ho_Ryan_Chan1;~Rene_Vidal1", "gender": "M;M;", "homepage": ";https://ryanchankh.github.io/;http://www.vision.jhu.edu", "dblp": "207/8574;267/5496;v/ReneVidal", "google_scholar": "aekzv1gAAAAJ;DBXWBqcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";ryanchankh/;rene-vidal-74844928/", "or_profile": "~Aditya_Chattopadhyay1;~Kwan_Ho_Ryan_Chan1;~Rene_Vidal1", "aff": "Johns Hopkins University;University of Pennsylvania ;Amazon", "aff_domain": "jhu.edu;seas.upenn.edu;amazon.com", "position": "PhD student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nchattopadhyay2024bootstrapping,\ntitle={Bootstrapping Variational Information Pursuit with Large Language and Vision Models for Interpretable Image Classification},\nauthor={Aditya Chattopadhyay and Kwan Ho Ryan Chan and Rene Vidal},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9bmTbVaA2A}\n}", "github": "", "project": "", "reviewers": "ZeKa;3dNV;8mpd;8xTQ", "pdf_size": 20655217, "rating": "5;6;6;6", "confidence": "4;3;4;4", "soundness": "3;2;3;3", "contribution": "2;3;2;2", "presentation": "3;3;2;3", "wc_summary": "91;101;117;60", "wc_strengths": "152;77;78;41", "wc_weaknesses": "247;273;186;79", "wc_questions": "109;8;222;44", "wc_review": "599;459;603;224", "wc_reply_reviewers": "0;0;95;0", "wc_reply_authors": "1642;1527;1056;870", "reply_reviewers": "0;0;1;0", "reply_authors": "3;3;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.25, 20.801141795584204 ], "wc_strengths_avg": [ 87.0, 40.37945021913993 ], "wc_weaknesses_avg": [ 196.25, 74.69730584164331 ], "wc_questions_avg": [ 95.75, 81.38296811987138 ], "wc_review_avg": [ 471.25, 154.0785108313291 ], "wc_reply_reviewers_avg": [ 23.75, 41.13620667976084 ], "wc_reply_authors_avg": [ 1273.75, 320.22365231194277 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5154153297737871391&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "openreview": "https://openreview.net/forum?id=9bmTbVaA2A", "pdf": "https://openreview.net/pdf?id=9bmTbVaA2A", "email": "jhu.edu;seas.upenn.edu;amazon.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Johns Hopkins University;University of Pennsylvania;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.jhu.edu;https://www.upenn.edu;https://www.amazon.com", "aff_unique_abbr": "JHU;UPenn;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "AGILE3D: Attention Guided Interactive Multi-object 3D Segmentation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19289", "id": "9cQtXpRshE", "author_site": "Yuanwen Yue, Sabarinath Mahadevan, Jonas Schult, Francis Engelmann, Bastian Leibe, Konrad Schindler, Theodora Kontogianni", "tldr": "", "abstract": "During interactive segmentation, a model and a user work together to delineate objects of interest in a 3D point cloud. In an iterative process, the model assigns each data point to an object (or the background), while the user corrects errors in the resulting segmentation and feeds them back into the model. The current best practice formulates the problem as binary classification and segments objects one at a time. The model expects the user to provide positive clicks to indicate regions wrongly assigned to the background and negative clicks on regions wrongly assigned to the object. Sequentially visiting objects is wasteful since it disregards synergies between objects: a positive click for a given object can, by definition, serve as a negative click for nearby objects. Moreover, a direct competition between adjacent objects can speed up the identification of their common boundary. We introduce AGILE3D, an efficient, attention-based model that (1) supports simultaneous segmentation of multiple 3D objects, (2) yields more accurate segmentation masks with fewer user clicks, and (3) offers faster inference. Our core idea is to encode user clicks as spatial-temporal queries and enable explicit interactions between click queries as well as between them and the 3D scene through a click attention module. Every time new clicks are added, we only need to run a lightweight decoder that produces updated segmentation masks. In experiments with four different 3D point cloud datasets, AGILE3D sets a new state-of-the-art. Moreover, we also verify its practicality in real-world setups with real user studies. Project page: https://ywyue.github.io/AGILE3D.", "keywords": "interactive segmentation;3D instance segmentation;point cloud;attention", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/7f29a60c66bdc5881f680f613bc3c921576b6f6c.zip", "author": "Yuanwen Yue;Sabarinath Mahadevan;Jonas Schult;Francis Engelmann;Bastian Leibe;Konrad Schindler;Theodora Kontogianni", "authorids": "~Yuanwen_Yue1;~Sabarinath_Mahadevan1;~Jonas_Schult1;~Francis_Engelmann1;~Bastian_Leibe3;~Konrad_Schindler1;~Theodora_Kontogianni2", "gender": "M;M;M;;M;M;F", "homepage": "https://ywyue.github.io/;;https://www.vision.rwth-aachen.de/person/219/;;http://www.vision.rwth-aachen.de;https://igp.ethz.ch/personen/person-detail.html?persid=143986;", "dblp": "253/0221;220/3248;228/6936;;41/1228;73/488;191/4648", "google_scholar": "rO_8EHsAAAAJ;https://scholar.google.com/citations?hl=en;iW4T05IAAAAJ;;ZcULDB0AAAAJ;FZuNgqIAAAAJ;QVJIkWwAAAAJ", "orcid": ";;;;0000-0003-4225-0051;0000-0002-3172-9246;0000-0002-8754-8356", "linkedin": ";sabarinath-mahadevan-23b5b846?originalSubdomain=de;;;;konrad-schindler-5b0b22153/;", "or_profile": "~Yuanwen_Yue1;~Sabarinath_Mahadevan1;~Jonas_Schult1;~Francis_Engelmann1;~Bastian_Leibe3;~Konrad_Schindler1;~Theodora_Kontogianni2", "aff": "ETHZ - ETH Zurich;RWTH Aachen University;Rheinisch Westf\u00e4lische Technische Hochschule Aachen;;RWTH Aachen University;Swiss Federal Institute of Technology;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;rwth-aachen.de;rwth-aachen.de;;rwth-aachen.de;ethz.ch;ethz.ch", "position": "Scientific Assistant;PhD student;PhD student;;Full Professor;Professor;Postdoc", "bibtex": "@inproceedings{\nyue2024agiled,\ntitle={{AGILE}3D: Attention Guided Interactive Multi-object 3D Segmentation},\nauthor={Yuanwen Yue and Sabarinath Mahadevan and Jonas Schult and Francis Engelmann and Bastian Leibe and Konrad Schindler and Theodora Kontogianni},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9cQtXpRshE}\n}", "github": "", "project": "", "reviewers": "byjT;3vqw;Gxfu;PRcH", "pdf_size": 50509934, "rating": "3;5;6;8", "confidence": "4;4;3;3", "soundness": "2;2;3;4", "contribution": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "69;52;50;78", "wc_strengths": "73;42;58;116", "wc_weaknesses": "365;106;31;57", "wc_questions": "76;5;1;14", "wc_review": "583;205;140;265", "wc_reply_reviewers": "263;0;0;0", "wc_reply_authors": "4041;859;483;204", "reply_reviewers": "3;0;0;0", "reply_authors": "8;3;2;2", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 62.25, 11.712706775122479 ], "wc_strengths_avg": [ 72.25, 27.535204738661378 ], "wc_weaknesses_avg": [ 139.75, 132.80695576663143 ], "wc_questions_avg": [ 24.0, 30.389142798045487 ], "wc_review_avg": [ 298.25, 170.2400878171766 ], "wc_reply_reviewers_avg": [ 65.75, 113.88234059765368 ], "wc_reply_authors_avg": [ 1396.75, 1544.24939290906 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 3.75, 2.48746859276655 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8320502943378437, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14686933716165665768&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=9cQtXpRshE", "pdf": "https://openreview.net/pdf?id=9cQtXpRshE", "email": "ethz.ch;rwth-aachen.de;rwth-aachen.de;;rwth-aachen.de;ethz.ch;ethz.ch", "author_num": 7, "aff_unique_index": "0;1;1;1;2;0", "aff_unique_norm": "ETH Zurich;RWTH Aachen University;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.rwth-aachen.de;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;RWTH;ETH Zurich", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Aachen", "aff_country_unique_index": "0;1;1;1;0;0", "aff_country_unique": "Switzerland;Germany" }, { "id": "9ceadCJY4B", "title": "Ask Again, Then Fail: Large Language Models\u2019 Vacillations in Judgement", "track": "main", "status": "Reject", "tldr": "", "abstract": "With the emergence of generative conversational large language models (LLMs) like ChatGPT, serving as virtual assistants in various fields, the stability and reliability of their responses have become crucial. However, during usage, it has been observed that these models tend to waver in their judgements when confronted with follow-up questions from users expressing skepticism or disagreement. In this work, we draw inspiration from questioning strategies in education and propose a \\textsc{Follow-up Questioning Mechanism} along with two evaluation metrics to assess the judgement consistency of LLMs before and after exposure to disturbances. We evaluate the judgement consistency of ChatGPT, PaLM2-Bison, and Vicuna-13B under this mechanism across eight reasoning benchmarks. Empirical results show that even when the initial answers are correct, judgement consistency sharply decreases when LLMs face disturbances such as questioning, negation, or misleading. Additionally, we study these models' judgement consistency under various settings (sampling temperature and prompts) to validate this issue further, observing the impact of prompt tone and conducting an in-depth error analysis for deeper behavioral insights. Furthermore, we also explore several prompting methods to mitigate this issue and demonstrate their effectiveness.", "keywords": "Large Language Models;Uncertainty;Evaluation;In-Context Learning;Alignment;Multi-round dialogue;Robustness", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/7152853d31897128e3389fe7cc5498495d0d877f.pdf", "author": "Qiming Xie;Zengzhi Wang;Yi Feng;Rui Xia", "authorids": "~Qiming_Xie1;~Zengzhi_Wang1;~Yi_Feng8;~Rui_Xia1", "gender": ";M;M;M", "homepage": ";https://sinclaircoder.github.io/;https://github.com/mind-yi;http://www.nustm.cn/member/rxia/", "dblp": ";34/133;;", "google_scholar": "6YxHXgEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=Znde6gwAAAAJ", "orcid": ";0000-0002-6146-6248;0009-0003-9827-6733;", "linkedin": ";;;", "or_profile": "~Qiming_Xie1;~Zengzhi_Wang1;~Yi_Feng8;~Rui_Xia1", "aff": "Nanjing University of Science and Technology;;Nanjing University of Science and Technology;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;;njust.edu.cn;njust.edu.cn", "position": "PhD student;;MS student;Full Professor", "bibtex": "@misc{\nxie2024ask,\ntitle={Ask Again, Then Fail: Large Language Models{\\textquoteright} Vacillations in Judgement},\nauthor={Qiming Xie and Zengzhi Wang and Yi Feng and Rui Xia},\nyear={2024},\nurl={https://openreview.net/forum?id=9ceadCJY4B}\n}", "github": "", "project": "", "reviewers": "AVRR;3h1U;bqxm", "site": "https://openreview.net/forum?id=9ceadCJY4B", "pdf_size": 513500, "rating": "5;6;6", "confidence": "4;4;4", "soundness": "3;3;3", "contribution": "2;2;3", "presentation": "3;3;4", "wc_summary": "89;137;110", "wc_strengths": "76;41;76", "wc_weaknesses": "241;35;100", "wc_questions": "1;20;63", "wc_review": "407;233;349", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1782;341;2842", "reply_reviewers": "0;0;0", "reply_authors": "4;1;7", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 112.0, 19.6468827043885 ], "wc_strengths_avg": [ 64.33333333333333, 16.49915822768611 ], "wc_weaknesses_avg": [ 125.33333333333333, 85.98578693921196 ], "wc_questions_avg": [ 28.0, 25.93581821856921 ], "wc_review_avg": [ 329.6666666666667, 72.33870947762954 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1655.0, 1024.9705686831533 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.0, 2.449489742783178 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14812058383281440644&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nust.edu.cn/", "aff_unique_abbr": "NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "9cumTvvlHG", "title": "Implicit Chain of Thought Reasoning via Knowledge Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "To augment language models with the ability to reason, researchers usually prompt or finetune them to produce chain of thought reasoning steps before producing the final answer. However, although people use natural language to reason effectively, it may be that LMs could reason more effectively with some intermediate computation that is not in natural language. In this work, we explore an alternative reasoning approach: instead of explicitly producing the chain of thought reasoning steps, we use the language model\u2019s internal hidden states to perform implicit reasoning. The implicit reasoning steps are distilled from a teacher model trained on explicit chain-of-thought reasoning, and instead of doing reasoning \u201chorizontally\u201d by producing intermediate words one-by-one, we distill it such that the reasoning\nhappens \u201cvertically\u201d among the hidden states in different layers. We conduct experiments on a multi-digit multiplication task and a grade school math problem dataset and find that this approach is able to outperform baselines that directly produce the answer by a large margin.", "keywords": "chain of thought;knowledge distillation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Yuntian Deng;Kiran Prasad;Roland Fernandez;Paul Smolensky;Vishrav Chaudhary;Stuart Shieber", "authorids": "~Yuntian_Deng2;kiranprasad@microsoft.com;~Roland_Fernandez1;~Paul_Smolensky1;~Vishrav_Chaudhary1;~Stuart_Shieber1", "gender": ";;M;M;;M", "homepage": "https://yuntiandeng.com;;https://www.microsoft.com/en-us/research/people/rfernand/;http://cogsci.jhu.edu/directory/paul-smolensky/;;http://www.eecs.harvard.edu/~shieber/", "dblp": "166/1720;;http://dblp.uni-trier.de/pers/hd/f/Fernandez:Roland;48/1105;;", "google_scholar": "tk0e5lYAAAAJ;;4__jyWsAAAAJ;PRtkZzYAAAAJ;;", "orcid": ";;0000-0002-8032-6646;0000-0003-2420-182X;;0000-0002-7733-8195", "linkedin": ";;fernandezroland/;paul-smolensky-b1871183/;;", "or_profile": "~Yuntian_Deng2;kiranprasad@microsoft.com;~Roland_Fernandez1;~Paul_Smolensky1;~Vishrav_Chaudhary1;~Stuart_Shieber1", "aff": "Allen Institute for Artificial Intelligence;;Microsoft Research AI;Johns Hopkins University;;Harvard University", "aff_domain": "allenai.org;;microsoft.com;jhu.edu;;harvard.edu", "position": "Postdoc;;Senior Reseacher;Professor;;Full Professor", "bibtex": "@misc{\ndeng2024implicit,\ntitle={Implicit Chain of Thought Reasoning via Knowledge Distillation},\nauthor={Yuntian Deng and Kiran Prasad and Roland Fernandez and Paul Smolensky and Vishrav Chaudhary and Stuart Shieber},\nyear={2024},\nurl={https://openreview.net/forum?id=9cumTvvlHG}\n}", "github": "", "project": "", "reviewers": "XfmE;kRur;t8BA", "site": "https://openreview.net/forum?id=9cumTvvlHG", "pdf_size": 275248, "rating": "3;3;6", "confidence": "3;4;4", "soundness": "2;2;2", "contribution": "2;2;2", "presentation": "3;2;3", "wc_summary": "91;103;239", "wc_strengths": "41;9;81", "wc_weaknesses": "65;446;269", "wc_questions": "24;27;192", "wc_review": "221;585;781", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "246;580;707", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 144.33333333333334, 67.11846905948383 ], "wc_strengths_avg": [ 43.666666666666664, 29.4542960458327 ], "wc_weaknesses_avg": [ 260.0, 155.6727336433712 ], "wc_questions_avg": [ 81.0, 78.49840762716146 ], "wc_review_avg": [ 529.0, 232.022987366913 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 511.0, 194.42393542634267 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14329277276992928160&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Microsoft;Johns Hopkins University;Harvard University", "aff_unique_dep": ";AI;;", "aff_unique_url": "https://allenai.org;https://www.microsoft.com/en-us/research;https://www.jhu.edu;https://www.harvard.edu", "aff_unique_abbr": "AI2;MSR;JHU;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "9g8h5HwZMy", "title": "Subgraph Diffusion for 3D Molecular Representation Learning: Combining Continuous and Discrete", "track": "main", "status": "Reject", "tldr": "", "abstract": "Molecular representation learning has shown great success in AI-based drug discovery. The 3D geometric structure contains crucial information about the underlying energy function, related to the physical and chemical properties. Recently, denoising diffusion probabilistic models have achieved impressive results in molecular conformation generation. However, the knowledge of pre-trained diffusion models has not been fully exploited in molecular representation learning. In this paper, we study the ability of representation learning inherent in the diffusion model for conformation generation. We introduce a new general diffusion model framework called MaskedDiff for molecular representation learning. Instead of adding noise to atoms like conventional diffusion models, MaskedDiff uses a discrete distribution to select a subset of the atoms to add continuous Gaussian noise at each step during the forward process. Further, we develop a novel subgraph diffusion model termed SUBGDIFF for enhancing the perception of molecular substructure in the denoising network (noise predictor), by incorporating auxiliary subgraph predictors during training. Experiments on molecular conformation generation and 3D molecular property prediction demonstrate the superior performance of our approach.", "keywords": "Diffusion model;Molecular representation learning;Subgraph;Masked vector", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/00780d42048704782dd30c09a50b5b27d5e58bd6.zip", "author": "Jiying Zhang;Zijing Liu;Yu Wang;Zhengyang Qi;Yu Li", "authorids": "~Jiying_Zhang1;~Zijing_Liu1;~Yu_Wang43;~Zhengyang_Qi2;~Yu_Li4", "gender": "M;;F;M;M", "homepage": "https://youjibiying.github.io/;https://github.com/zj-liu;https://github.com/rain305f;https://github.com/MiloQ/;https://yu-li.github.io/", "dblp": "287/9432;205/3211;;;34/2997-3", "google_scholar": "j90eZ0MAAAAJ;;lzsu-5MAAAAJ;;j9lwU7kAAAAJ", "orcid": ";0000-0002-0189-7409;;;", "linkedin": ";;;;", "or_profile": "~Jiying_Zhang1;~Zijing_Liu1;~Yu_Wang43;~Zhengyang_Qi2;~Yu_Li4", "aff": "IDEA;International Digital Economy Academy;Peking University;University of Science and Technology of China;International Digital Economy Academy", "aff_domain": "idea.edu.cn;idea.edu.cn;pku.edu.cn;ustc.edu.cn;idea.edu.cn", "position": "Researcher;Researcher;MS student;MS student;Principal Researcher", "bibtex": "@misc{\nzhang2024subgraph,\ntitle={Subgraph Diffusion for 3D Molecular Representation Learning: Combining Continuous and Discrete},\nauthor={Jiying Zhang and Zijing Liu and Yu Wang and Zhengyang Qi and Yu Li},\nyear={2024},\nurl={https://openreview.net/forum?id=9g8h5HwZMy}\n}", "github": "", "project": "", "reviewers": "yz4M;eamf;aJi2;7f3c", "site": "https://openreview.net/forum?id=9g8h5HwZMy", "pdf_size": 4601085, "rating": "3;5;6;6", "confidence": "5;4;4;3", "soundness": "2;2;3;3", "contribution": "2;2;3;2", "presentation": "2;2;2;3", "wc_summary": "52;78;73;49", "wc_strengths": "49;37;53;48", "wc_weaknesses": "318;99;110;40", "wc_questions": "13;170;104;33", "wc_review": "432;384;340;170", "wc_reply_reviewers": "68;79;0;0", "wc_reply_authors": "3115;3051;1688;1728", "reply_reviewers": "1;1;0;0", "reply_authors": "6;6;3;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 63.0, 12.668859459319927 ], "wc_strengths_avg": [ 46.75, 5.931905258852336 ], "wc_weaknesses_avg": [ 141.75, 105.18168804502046 ], "wc_questions_avg": [ 80.0, 61.99596761080514 ], "wc_review_avg": [ 331.5, 98.75601247519059 ], "wc_reply_reviewers_avg": [ 36.75, 36.9552093756753 ], "wc_reply_authors_avg": [ 2395.5, 688.0176233207984 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.5, 1.5 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844386, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:93SazIYRKncJ:scholar.google.com/&scioq=Subgraph+Diffusion+for+3D+Molecular+Representation+Learning:+Combining+Continuous+and+Discrete&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Institute of Electrical and Electronics Engineers;International Digital Economy Academy;Peking University;University of Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ieee.org;;http://www.pku.edu.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "IEEE;;Peking U;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;2", "aff_country_unique": "United States;;China" }, { "id": "9grjdFDiAj", "title": "Probabilistic Stability of Stochastic Gradient Descent", "track": "main", "status": "Reject", "tldr": "", "abstract": "Characterizing and understanding the stability of Stochastic Gradient Descent (SGD) remains an open problem in deep learning. A common method is to utilize the convergence of statistical moments, esp. the variance, of the parameters to quantify the stability. We revisit the definition of stability for SGD and propose using the \\textit{convergence in probability} condition to define the \\textit{probabilistic stability} of SGD. The probabilistic stability sheds light on a fundamental question in deep learning theory: how SGD selects a meaningful solution for a neural network from an enormous number of possible solutions that may severely overfit. We show that only through the lens of probabilistic stability does SGD exhibit rich and practically relevant phases of learning, such as the phases of the complete loss of stability, incorrect learning where the model captures incorrect data correlation, convergence to low-rank saddles, and correct learning where the model captures the correct correlation. These phase boundaries are precisely quantified by the Lyapunov exponents of the dynamics. The obtained phase diagrams imply that SGD prefers low-rank saddles in a neural network when the underlying gradient is noisy, thereby influencing the learning performance, for better or for worse.", "keywords": "stability;stochastic gradient descent", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Liu Ziyin;Botao Li;Tomer Galanti;Masahito Ueda", "authorids": "~Liu_Ziyin1;~Botao_Li1;~Tomer_Galanti1;~Masahito_Ueda1", "gender": ";M;M;M", "homepage": "https://www.mit.edu/~ziyinl/;;https://tomergalanti.github.io;http://cat.phys.s.u-tokyo.ac.jp/index-e.html", "dblp": ";;198/1490;", "google_scholar": "NpN9oRMAAAAJ;zRB_MlAAAAAJ;;https://scholar.google.co.jp/citations?user=Xpjx9CwAAAAJ", "orcid": ";0000-0003-1631-4373;;0000-0002-5367-1436", "linkedin": ";botao-li-33a0ab185;tomer-galanti-5880b1104/;", "or_profile": "~Liu_Ziyin1;~Botao_Li1;~Tomer_Galanti1;~Masahito_Ueda1", "aff": "Massachusetts Institute of Technology;Laboratoire de Probabilit\u00e9s, Statistique et Mod\u00e9lisation;Texas A&M University - College Station;The University of Tokyo", "aff_domain": "mit.edu;lpsm.paris;tamu.edu;u-tokyo.ac.jp", "position": "Postdoc;Postdoc;Assistant Professor;Full Professor", "bibtex": "@misc{\nziyin2024probabilistic,\ntitle={Probabilistic Stability of Stochastic Gradient Descent},\nauthor={Liu Ziyin and Botao Li and Tomer Galanti and Masahito Ueda},\nyear={2024},\nurl={https://openreview.net/forum?id=9grjdFDiAj}\n}", "github": "", "project": "", "reviewers": "Mfzw;PopN;My7s;bHzM", "site": "https://openreview.net/forum?id=9grjdFDiAj", "pdf_size": 2076494, "rating": "3;5;5;5", "confidence": "3;3;3;4", "soundness": "2;2;3;4", "contribution": "2;2;2;3", "presentation": "2;3;1;2", "wc_summary": "59;91;64;180", "wc_strengths": "39;50;25;62", "wc_weaknesses": "219;70;87;206", "wc_questions": "167;224;417;207", "wc_review": "484;435;593;655", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1557;993;1438;1365", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.5, 48.602983447521 ], "wc_strengths_avg": [ 44.0, 13.656500283747663 ], "wc_weaknesses_avg": [ 145.5, 67.4258852370512 ], "wc_questions_avg": [ 253.75, 96.49708544821445 ], "wc_review_avg": [ 541.75, 86.86591679133997 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1338.25, 210.78113648996202 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8050913484983213429&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Massachusetts Institute of Technology;Laboratoire de Probabilit\u00e9s, Statistique et Mod\u00e9lisation;Texas A&M University;University of Tokyo", "aff_unique_dep": ";Department of Probability, Statistics and Modeling;;", "aff_unique_url": "https://web.mit.edu;;https://www.tamu.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "MIT;;TAMU;UTokyo", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "United States;France;Japan" }, { "id": "9gyDdCKTDJ", "title": "Gaitor: Learning a Unified Representation for Continuous Gait Transition and Terrain Traversal for Quadruped Robots", "track": "main", "status": "Reject", "tldr": "", "abstract": "The current state-of-the-art in quadruped locomotion is able to produce robust motion for terrain traversal but requires the segmentation of a desired trajectory into a discrete set of skills such as trot, crawl and pace. This misses the opportunity to leverage commonalities between individual gait types for efficient learning and are unable to smoothly transition between them. Here we present Gaitor, which creates a learnt representation capturing correlations across multiple distinct gait types resulting in the discovery of smooth transitions between motions. In particular, this representation is compact meaning that information common to all gait types is shared. The emerging structure is interpretable in that it encodes phase correlations between the different gait types which can be leveraged to produce smooth gait transitions. In addition, foot swing characteristics are disentangled and directly addressable. Together with a rudimentary terrain encoding and a learned planner operating in this structured latent representation, Gaitor is able to take motion commands including gait type and characteristics from a user while reacting to uneven terrain. We evaluate Gaitor in both simulated and real-world settings, such as climbing over raised platforms, on an ANYmal C platform. To the best of our knowledge, this is the first work learning an interpretable unified-latent representation for multiple gaits, resulting in smooth and natural looking gait transitions between trot and crawl on a real quadruped robot.", "keywords": "Representation Learning;Learning for Control;Quadruped Robots", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/46c3aaac74ab3f930d6e07d07f05685a0e77eacd.zip", "author": "Alexander Luis Mitchell;Wolfgang Merkt;Aristotelis Papatheodorou;Ioannis Havoutis;Ingmar Posner", "authorids": "~Alexander_Luis_Mitchell1;~Wolfgang_Merkt1;aristotelis@robots.ox.ac.uk;~Ioannis_Havoutis1;~Ingmar_Posner1", "gender": "M;;;;", "homepage": ";http://www.wolfgangmerkt.com/;;;", "dblp": "268/8155;;;;59/542", "google_scholar": "https://scholar.google.co.uk/citations?user=7YV2TGMAAAAJ;WzpoCwkAAAAJ;;;dPk-iwsAAAAJ", "orcid": ";0000-0003-3235-4906;;;0000-0001-6270-700X", "linkedin": ";;;;ingmar-posner-20b49a", "or_profile": "~Alexander_Luis_Mitchell1;~Wolfgang_Merkt1;aristotelis@robots.ox.ac.uk;~Ioannis_Havoutis1;~Ingmar_Posner1", "aff": "University of Oxford;University of Oxford, University of Oxford;;;University of Oxford", "aff_domain": "oxford.ac.uk;robots.ox.ac.uk;;;ox.ac.uk", "position": "Postdoc;Postdoc;;;Full Professor", "bibtex": "@misc{\nmitchell2024gaitor,\ntitle={Gaitor: Learning a Unified Representation for Continuous Gait Transition and Terrain Traversal for Quadruped Robots},\nauthor={Alexander Luis Mitchell and Wolfgang Merkt and Aristotelis Papatheodorou and Ioannis Havoutis and Ingmar Posner},\nyear={2024},\nurl={https://openreview.net/forum?id=9gyDdCKTDJ}\n}", "github": "", "project": "", "reviewers": "QD8G;keBn;A1WD;st69", "site": "https://openreview.net/forum?id=9gyDdCKTDJ", "pdf_size": 3909475, "rating": "3;5;5;5", "confidence": "5;2;3;4", "soundness": "3;4;2;2", "contribution": "3;3;2;2", "presentation": "1;3;2;2", "wc_summary": "118;102;37;160", "wc_strengths": "47;53;39;71", "wc_weaknesses": "327;58;21;320", "wc_questions": "57;3;364;3", "wc_review": "549;216;461;554", "wc_reply_reviewers": "136;36;55;0", "wc_reply_authors": "662;580;820;484", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 104.25, 44.228808484968255 ], "wc_strengths_avg": [ 52.5, 11.779218989389747 ], "wc_weaknesses_avg": [ 181.5, 142.62275414533264 ], "wc_questions_avg": [ 106.75, 150.15054944954414 ], "wc_review_avg": [ 445.0, 137.28983939097606 ], "wc_reply_reviewers_avg": [ 56.75, 49.8366080306435 ], "wc_reply_authors_avg": [ 636.5, 123.25887391989268 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jEOifc3UKW4J:scholar.google.com/&scioq=Gaitor:+Learning+a+Unified+Representation+for+Continuous+Gait+Transition+and+Terrain+Traversal+for+Quadruped+Robots&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Machine Unlearning for Image-to-Image Generative Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19288", "id": "9hjVoPWPnh", "author_site": "Guihong Li, Hsiang Hsu, Chun-Fu Chen, Radu Marculescu", "tldr": "", "abstract": "Machine unlearning has emerged as a new paradigm to deliberately forget data samples from a given model in order to adhere to stringent regulations.\nHowever, existing machine unlearning methods have been primarily focused on classification models, leaving the landscape of unlearning for generative models relatively unexplored.\nThis paper serves as a bridge, addressing the gap by providing a unifying framework of machine unlearning for image-to-image generative models.\nWithin this framework, we propose a computationally-efficient algorithm, underpinned by rigorous theoretical analysis, that demonstrates negligible performance degradation on the retain samples, while effectively removing the information from the forget samples. \nEmpirical studies on two large-scale datasets, ImageNet-1K and Places-365, further show that our algorithm does not rely on the availability of the retain samples, which further complies with data retention policy.\nTo our best knowledge, this work is the first that represents systemic, theoretical, empirical explorations of machine unlearning specifically tailored for image-to-image generative models.", "keywords": "Machine Unlearning;Generative Models;Diffusion Models;GAN;Masked Autoencoder", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Guihong Li;Hsiang Hsu;Chun-Fu Chen;Radu Marculescu", "authorids": "~Guihong_Li1;~Hsiang_Hsu1;~Chun-Fu_Chen1;~Radu_Marculescu2", "gender": "Unspecified;M;M;M", "homepage": "https://liguihong.github.io/;https://hsianghsu.github.io;;https://radum.ece.utexas.edu/", "dblp": "143/6649.html;;48/915;88/3494", "google_scholar": ";https://scholar.google.com.tw/citations?user=JRl3iYIAAAAJ;9gqd5cYAAAAJ;ZCmYP5cAAAAJ", "orcid": "0000-0001-8537-8632;0000-0001-8084-3929;;0000-0003-1826-7646", "linkedin": ";;;", "or_profile": "~Guihong_Li1;~Hsiang_Hsu1;~Chun-Fu_Chen1;~Radu_Marculescu2", "aff": "University of Texas, Austin;JP Morgan & Chase Bank;JPMorganChase, GTAR;University of Texas, Austin", "aff_domain": "utexas.edu;jpmchase.com;jpmchase.com;utexas.edu", "position": "PhD student;Researcher;Executive Director;Full Professor", "bibtex": "@inproceedings{\nli2024machine,\ntitle={Machine Unlearning for Image-to-Image Generative Models},\nauthor={Guihong Li and Hsiang Hsu and Chun-Fu Chen and Radu Marculescu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9hjVoPWPnh}\n}", "github": "", "project": "", "reviewers": "L8mN;uD61;4Xrc;hK5e", "pdf_size": 31020328, "rating": "5;5;6;8", "confidence": "3;4;4;4", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "79;90;74;106", "wc_strengths": "93;55;41;54", "wc_weaknesses": "253;593;48;56", "wc_questions": "49;33;21;202", "wc_review": "474;771;184;418", "wc_reply_reviewers": "0;270;0;0", "wc_reply_authors": "1349;3084;547;1057", "reply_reviewers": "0;1;0;0", "reply_authors": "3;5;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 12.275483697190918 ], "wc_strengths_avg": [ 60.75, 19.421315609401955 ], "wc_weaknesses_avg": [ 237.5, 221.06164298674702 ], "wc_questions_avg": [ 76.25, 73.27815158694985 ], "wc_review_avg": [ 461.75, 209.0722064263923 ], "wc_reply_reviewers_avg": [ 67.5, 116.91342951089922 ], "wc_reply_authors_avg": [ 1509.25, 953.4113422337706 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2939352183806994849&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=9hjVoPWPnh", "pdf": "https://openreview.net/pdf?id=9hjVoPWPnh", "email": "utexas.edu;jpmchase.com;jpmchase.com;utexas.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Texas at Austin;JPMorgan Chase & Co.;JPMorgan Chase", "aff_unique_dep": ";;Global Technology, Analytics, and Research (GTAR)", "aff_unique_url": "https://www.utexas.edu;https://www.jpmorganchase.com;https://www.jpmorganchase.com", "aff_unique_abbr": "UT Austin;JPM;JPM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bayesian Optimization through Gaussian Cox Process Models for Spatio-temporal Data", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19287", "id": "9j1RD9LlWH", "author_site": "Yongsheng Mei, Mahdi Imani, Tian Lan", "tldr": "", "abstract": "Bayesian optimization (BO) has established itself as a leading strategy for efficiently optimizing expensive-to-evaluate functions. Existing BO methods mostly rely on Gaussian process (GP) surrogate models and are not applicable to (doubly-stochastic) Gaussian Cox processes, where the observation process is modulated by a latent intensity function modeled as a GP. In this paper, we propose a novel maximum *a posteriori* inference of Gaussian Cox processes. It leverages the Laplace approximation and change of kernel technique to transform the problem into a new reproducing kernel Hilbert space, where it becomes more tractable computationally. It enables us to obtain both a functional posterior of the latent intensity function and the covariance of the posterior, thus extending existing works that often focus on specific link functions or estimating the posterior mean. Using the result, we propose a BO framework based on the Gaussian Cox process model and further develop a Nystr\u00f6m approximation for efficient computation. Extensive evaluations on various synthetic and real-world datasets demonstrate significant improvement over state-of-the-art inference solutions for Gaussian Cox processes, as well as effective BO with a wide range of acquisition functions designed through the underlying Gaussian Cox process model.", "keywords": "Bayesian optimization;Gaussian Cox process", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Yongsheng Mei;Mahdi Imani;Tian Lan", "authorids": "~Yongsheng_Mei1;~Mahdi_Imani3;~Tian_Lan4", "gender": "M;M;M", "homepage": ";https://www2.seas.gwu.edu/~tlan/;https://imani.lab.northeastern.edu/", "dblp": "261/7750;;176/7532", "google_scholar": "y4Qyp24AAAAJ;;IwSVQXEAAAAJ", "orcid": "0000-0001-7606-8931;;0000-0001-9570-9909", "linkedin": "yongshengmei/;;mahdi-imani-2a362b25/", "or_profile": "~Yongsheng_Mei1;~Tian_Lan4;~Mahdi_Imani2", "aff": "George Washington University;George Washington University;Northeastern University", "aff_domain": "gwu.edu;gwu.edu;northeastern.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmei2024bayesian,\ntitle={Bayesian Optimization through Gaussian Cox Process Models for Spatio-temporal Data},\nauthor={Yongsheng Mei and Mahdi Imani and Tian Lan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9j1RD9LlWH}\n}", "github": "", "project": "", "reviewers": "kMEd;dgpT;bJz1", "pdf_size": 10544913, "rating": "6;8;8", "confidence": "2;4;4", "soundness": "4;3;3", "contribution": "3;3;3", "presentation": "3;4;2", "wc_summary": "177;140;87", "wc_strengths": "87;134;65", "wc_weaknesses": "55;391;193", "wc_questions": "89;17;569", "wc_review": "408;682;914", "wc_reply_reviewers": "56;95;284", "wc_reply_authors": "920;1460;1701", "reply_reviewers": "1;1;3", "reply_authors": "3;3;4", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 134.66666666666666, 36.935379004718804 ], "wc_strengths_avg": [ 95.33333333333333, 28.778850258865837 ], "wc_weaknesses_avg": [ 213.0, 137.8985134075056 ], "wc_questions_avg": [ 225.0, 245.01428529781688 ], "wc_review_avg": [ 668.0, 206.8107024954624 ], "wc_reply_reviewers_avg": [ 145.0, 99.56907150315303 ], "wc_reply_authors_avg": [ 1360.3333333333333, 326.53772965599074 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5833568390443611849&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=9j1RD9LlWH", "pdf": "https://openreview.net/pdf?id=9j1RD9LlWH", "email": "gwu.edu;gwu.edu;northeastern.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "George Washington University;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gwu.edu;https://www.northeastern.edu", "aff_unique_abbr": "GWU;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "9jMoHuqjfg", "title": "Learning to Reach Goals via Diffusion", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Diffusion models are a powerful class of generative models capable of mapping random noise in high-dimensional spaces to a target manifold through iterative denoising. In this work, we present a novel perspective on goal-conditioned reinforcement learning by framing it within the context of diffusion modeling. Analogous to the diffusion process, where Gaussian noise is used to create random trajectories that walk away from the data manifold, we construct trajectories that move away from potential goal states. We then learn a goal-conditioned policy analogous to the score function. This approach, which we call Merlin, can reach predefined or novel goals from an arbitrary initial state without learning a separate value function. We consider three choices for the noise model to replace Gaussian noise in diffusion - reverse play from the buffer, reverse dynamics model, and a novel non-parametric approach. We theoretically justify our approach and validate it on offline goal-reaching tasks. Empirical results are competitive with state-of-the-art methods, which suggests this perspective on diffusion for RL is a simple, scalable, and effective direction for sequential decision-making.", "keywords": "Goal-conditioned reinforcement learning;Offline reinforcement learning;Diffusion modeling", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Vineet Jain;Siamak Ravanbakhsh", "authorids": "~Vineet_Jain1;~Siamak_Ravanbakhsh1", "gender": ";", "homepage": ";", "dblp": "92/3653;", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": ";", "linkedin": ";", "or_profile": "~Vineet_Jain1;~Siamak_Ravanbakhsh1", "aff": "McGill University;", "aff_domain": "mcgill.ca;", "position": "PhD student;", "bibtex": "@misc{\njain2024learning,\ntitle={Learning to Reach Goals via Diffusion},\nauthor={Vineet Jain and Siamak Ravanbakhsh},\nyear={2024},\nurl={https://openreview.net/forum?id=9jMoHuqjfg}\n}", "github": "", "project": "", "reviewers": "5ke8;euBm;6Zbj;i457", "site": "https://openreview.net/forum?id=9jMoHuqjfg", "pdf_size": 11802882, "rating": "3;5;5;5", "confidence": "3;4;3;3", "soundness": "2;3;2;2", "contribution": "2;2;2;2", "presentation": "2;2;2;3", "wc_summary": "71;84;77;97", "wc_strengths": "33;89;37;12", "wc_weaknesses": "155;96;276;111", "wc_questions": "140;2;2;10", "wc_review": "399;271;392;230", "wc_reply_reviewers": "0;0;255;0", "wc_reply_authors": "641;450;1170;557", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;3;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 9.67923034130297 ], "wc_strengths_avg": [ 42.75, 28.340562803162538 ], "wc_weaknesses_avg": [ 159.5, 70.67000778265134 ], "wc_questions_avg": [ 38.5, 58.69199263954155 ], "wc_review_avg": [ 323.0, 73.97634757136905 ], "wc_reply_reviewers_avg": [ 63.75, 110.41823898251593 ], "wc_reply_authors_avg": [ 704.5, 277.1502300197494 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17073276869658529521&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0", "aff_unique_norm": "McGill University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcgill.ca", "aff_unique_abbr": "McGill", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "9jmUwjZi7j", "title": "DreamFuser: Value-guided Diffusion Policy for Offline Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advances in reinforcement learning have underscored the potential of diffusion models, particularly in the context of policy learning. While earlier applications were predominantly focused on single-timestep settings, trajectory-based diffusion policy learning promises significant superiority, especially for low-level control tasks. In this context, we introduce DreamFuser, a trajectory-based value optimization approach that seamlessly blends the merits of diffusion-based trajectory learning and efficient Q function learning over state and noisy action. To address the computational challenges associated with action sampling of diffusion policy during the training phase, we design the DreamFuser based on the Generalized Noisy Action Markov Decision Process (GNMDP), which views the diffusion denoising process as part of the MDP transition. Empirical tests reveal DreamFuser's advantages over existing diffusion policy algorithms, notably in low-level control tasks. When benchmarked against the standard benchmark of offline reinforcement learning D4RL, DreamFuser matches or even outperforms contemporary methods. This work also elucidates the parallels between the optimization process of DreamFuser over GNMDP and Diffusion Policy over MDP, demonstrating its computational and memory advantages.", "keywords": "Trajectory-based Reinforcement Learning; Diffusion Model; Offline Reinforcement Learning;", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/acb2e143858cab7aafa815293691c9f1f4874b5e.zip", "author": "Kairong Luo;CAIWEI XIAO;Zhiao Huang;Zhan Ling;Yunhao Fang;Hao Su", "authorids": "~Kairong_Luo1;~CAIWEI_XIAO1;~Zhiao_Huang1;~Zhan_Ling2;~Yunhao_Fang1;~Hao_Su1", "gender": "M;F;M;M;M;M", "homepage": "https://github.com/thu-yao-01-luo/thu-yao-01-luo;;;;https://seerkfang.github.io/;http://ai.ucsd.edu/~haosu", "dblp": ";;172/1410;254/1980;;09/4945-1", "google_scholar": ";;;vsRxnYAAAAAJ;;1P8Zu04AAAAJ", "orcid": ";;;;;", "linkedin": ";caiwei-xiao;;zhan-ling-069a59149/;yunhao-fang-8b318221a/;", "or_profile": "~Kairong_Luo1;~CAIWEI_XIAO1;~Zhiao_Huang1;~Zhan_Ling2;~Yunhao_Fang1;~Hao_Su1", "aff": "Tsinghua University;;University of California, San Diego, University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "tsinghua.edu.cn;;eng.ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu", "position": "Undergrad student;;PhD student;PhD student;MS student;Associate Professor", "bibtex": "@misc{\nluo2024dreamfuser,\ntitle={DreamFuser: Value-guided Diffusion Policy for Offline Reinforcement Learning},\nauthor={Kairong Luo and CAIWEI XIAO and Zhiao Huang and Zhan Ling and Yunhao Fang and Hao Su},\nyear={2024},\nurl={https://openreview.net/forum?id=9jmUwjZi7j}\n}", "github": "", "project": "", "reviewers": "vgVx;pkRP;6WsC", "site": "https://openreview.net/forum?id=9jmUwjZi7j", "pdf_size": 544922, "rating": "3;3;6", "confidence": "4;3;3", "soundness": "2;2;3", "contribution": "2;2;2", "presentation": "3;1;3", "wc_summary": "54;29;47", "wc_strengths": "86;36;55", "wc_weaknesses": "250;579;203", "wc_questions": "9;2;363", "wc_review": "399;646;668", "wc_reply_reviewers": "186;0;218", "wc_reply_authors": "1156;1542;1487", "reply_reviewers": "1;0;1", "reply_authors": "2;4;3", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 43.333333333333336, 10.530379332620875 ], "wc_strengths_avg": [ 59.0, 20.607442021431645 ], "wc_weaknesses_avg": [ 344.0, 167.27422594849054 ], "wc_questions_avg": [ 124.66666666666667, 168.55134397433784 ], "wc_review_avg": [ 571.0, 121.95354306729537 ], "wc_reply_reviewers_avg": [ 134.66666666666666, 96.11567105431988 ], "wc_reply_authors_avg": [ 1395.0, 170.4836258022062 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yQEgfy0z2sMJ:scholar.google.com/&scioq=DreamFuser:+Value-guided+Diffusion+Policy+for+Offline+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Tsinghua University;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ucsd.edu", "aff_unique_abbr": "THU;UCSD", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "On the Learnability of Watermarks for Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19286", "id": "9k0krNzvlV", "author_site": "Chenchen Gu, XIANG LI, Percy Liang, Tatsunori Hashimoto", "tldr": "", "abstract": "Watermarking of language model outputs enables statistical detection of model-generated text, which can mitigate harms and misuses of language models. Existing watermarking strategies operate by altering the decoder of an existing language model. In this paper, we ask whether language models can directly learn to generate watermarked text, which would have significant implications for the real-world deployment of watermarks. First, learned watermarks could be used to build open models that naturally generate watermarked text, enabling watermarking for open models, where users can control the decoding procedure. Second, if watermarking is used to determine the provenance of generated text, an adversary can hurt the reputation of a victim model by spoofing its watermark and generating damaging watermarked text. To investigate the learnability of watermarks, we propose watermark distillation, which trains a student model to behave like a teacher model that uses decoding-based watermarking. We test our approach on three decoding-based watermarking strategies and various hyperparameter settings, finding that models can learn to generate watermarked text with high detectability. We also find limitations to learnability, including the loss of watermarking capabilities under fine-tuning on normal text and high sample complexity when learning low-distortion watermarks.", "keywords": "watermarking;large language models;distillation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Chenchen Gu;Xiang Lisa Li;Percy Liang;Tatsunori Hashimoto", "authorids": "~Chenchen_Gu1;~Xiang_Lisa_Li1;~Percy_Liang1;~Tatsunori_Hashimoto1", "gender": "M;F;;M", "homepage": "https://chenchenygu.github.io/;https://xiangli1999.github.io;https://cs.stanford.edu/~pliang/;https://thashim.github.io", "dblp": ";40/1491-63;04/1701;", "google_scholar": "kHeBSVYAAAAJ;nzA4P0oAAAAJ;pouyVyUAAAAJ;5ygiTwsAAAAJ", "orcid": ";;;", "linkedin": "chenchen-gu/;;;", "or_profile": "~Chenchen_Gu1;~Xiang_Lisa_Li1;~Percy_Liang1;~Tatsunori_Hashimoto1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Undergrad student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ngu2024on,\ntitle={On the Learnability of Watermarks for Language Models},\nauthor={Chenchen Gu and Xiang Lisa Li and Percy Liang and Tatsunori Hashimoto},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9k0krNzvlV}\n}", "github": "", "project": "", "reviewers": "2p7U;GhKC;hxrQ;metd", "pdf_size": 569282, "rating": "5;6;6;6", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "contribution": "3;3;3;2", "presentation": "3;2;2;4", "wc_summary": "37;81;208;74", "wc_strengths": "14;94;121;72", "wc_weaknesses": "101;177;198;224", "wc_questions": "72;1;131;68", "wc_review": "224;353;658;438", "wc_reply_reviewers": "0;0;46;12", "wc_reply_authors": "820;655;1206;1032", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 100.0, 64.55617708631762 ], "wc_strengths_avg": [ 75.25, 39.39146481155531 ], "wc_weaknesses_avg": [ 175.0, 45.8530260724415 ], "wc_questions_avg": [ 68.0, 46.02716589146023 ], "wc_review_avg": [ 418.25, 158.00375786670392 ], "wc_reply_reviewers_avg": [ 14.5, 18.83480820183736 ], "wc_reply_authors_avg": [ 928.25, 208.74191601113563 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4306363265805586600&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=9k0krNzvlV", "pdf": "https://openreview.net/pdf?id=9k0krNzvlV", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "9k27IITeAZ", "title": "ChunkAttention: Efficient Attention on KV Cache with Chunking Sharing and Batching", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-attention is an essential component of GPT-style models and a significant cause of LLM inference latency for long sequences. In multi-tenant LLM inference servers, the compute and memory operation cost of self-attention can be amortized by making use of the probability that sequences from users may share long prompt prefixes. This paper introduces ChunkAttention, a unique self-attention kernel built on chunking, sharing the KV cache, and batching the attention computation. ChunkAttention recognizes matching prompt prefixes across several sequences and shares their KV cache in memory by chunking the KV cache and structuring it into the auxiliary prefix tree. To significantly improve the memory reuse of KV cache and consequently the speed of self-attention for long shared prompts, we design an efficient computation kernel on this new storage structure, where two-phased partitioning is implemented to reduce memory operations on shared KV cache during self-attention. Experiments show that ChunkAttention can speed up self-attention of long shared prompts 1.6-3 times, with lengths ranging from 1024 to 8192.", "keywords": "large language model;model inference;self attention", "primary_area": "infrastructure, software libraries, hardware, etc.", "supplementary_material": "/attachment/34a6aefe728c3e9a526d796dc036dc6b0d6a9571.zip", "author": "Lu Ye;Ze Tao;Yong Huang;Yang Li", "authorids": "~Lu_Ye1;~Ze_Tao2;~Yong_Huang4;~Yang_Li83", "gender": "M;M;;M", "homepage": "https://scholar.google.com/citations?user=kRjMKvEAAAAJ&hl=en;https://github.com/Toudsour;https://www.linkedin.com/in/yong-huang-818698bb/;https://leon.github.io", "dblp": ";;;", "google_scholar": "kRjMKvEAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lu_Ye1;~Ze_Tao2;~Yong_Huang4;~Yang_Li83", "aff": ";Xi'an Jiaotong University;;", "aff_domain": ";xjtu.edu.cn;;", "position": ";MS student;;", "bibtex": "@misc{\nye2024chunkattention,\ntitle={ChunkAttention: Efficient Attention on {KV} Cache with Chunking Sharing and Batching},\nauthor={Lu Ye and Ze Tao and Yong Huang and Yang Li},\nyear={2024},\nurl={https://openreview.net/forum?id=9k27IITeAZ}\n}", "github": "", "project": "", "reviewers": "oC3j;ee1U;sEJt;FRg7", "site": "https://openreview.net/forum?id=9k27IITeAZ", "pdf_size": 566347, "rating": "3;5;5;5", "confidence": "5;3;5;3", "soundness": "2;3;3;2", "contribution": "3;2;4;2", "presentation": "2;3;3;2", "wc_summary": "91;94;67;44", "wc_strengths": "40;19;63;53", "wc_weaknesses": "111;145;204;69", "wc_questions": "102;9;28;166", "wc_review": "344;267;362;332", "wc_reply_reviewers": "54;0;20;32", "wc_reply_authors": "415;85;151;162", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 74.0, 20.23610634484806 ], "wc_strengths_avg": [ 43.75, 16.452583383772897 ], "wc_weaknesses_avg": [ 132.25, 49.403314666123364 ], "wc_questions_avg": [ 76.25, 62.3873985673389 ], "wc_review_avg": [ 326.25, 35.83556194620087 ], "wc_reply_reviewers_avg": [ 26.5, 19.56399754651385 ], "wc_reply_authors_avg": [ 203.25, 125.75049701691043 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7802398420830154308&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Xi'an Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.xjtu.edu.cn", "aff_unique_abbr": "XJTU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "9k4Yvb75ED", "title": "EquiAV: Single-modal Equivariance Promotes Audio-Visual Contrastive Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Advancements in audio-visual representation learning have showcased its effectiveness in acquiring rich and comprehensive representations by leveraging both auditory and visual modalities. Recent works have attempted to improve performance using contrastive learning or masked modeling techniques. However, the effort to maximize the impact of data augmentations for learning semantically rich representation has remained relatively narrow. Without a proper strategy for utilizing data augmentation, the model can be adversely affected or fail to achieve sufficient performance gains. To address this limitation, we present EquiAV, a novel framework that integrates single-modal equivariant contrastive learning with audio-visual contrastive learning. In the proposed framework, audio-visual correspondence and rich modality-specific representations are learned in separate latent spaces. In particular, augmentation-related and modality-specific information is learned in the intra-modal latent space by making the representations equivariant to data augmentation. Extensive ablation studies verify that our framework is the most suitable architecture for maximizing the benefits of the augmentation while ensuring model robustness to strong augmentation. EquiAV outperforms the existing audio-visual self-supervised pre-training methods on audio-visual event classification and zero-shot audio-visual retrieval tasks.", "keywords": "Audio-Visual Contrastive Learning;Multimodal Representation Learning;Equivariant Contrastive Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Jongsuk Kim;Hyeongkeun Lee;Kyeongha Rho;Junmo Kim;Joon Son Chung", "authorids": "~Jongsuk_Kim1;~Hyeongkeun_Lee1;~Kyeongha_Rho1;~Junmo_Kim1;~Joon_Son_Chung1", "gender": "M;M;M;;M", "homepage": ";https://khrho325.github.io;https://siit.kaist.ac.kr/Faculty;https://mmai.io/joon/;https://siit.kaist.ac.kr/", "dblp": "325/1370;264/9498;40/240-2.html;160/2692.html;330/3774", "google_scholar": "rFmAVN4AAAAJ;;https://scholar.google.com.tw/citations?user=GdQtWNQAAAAJ;https://scholar.google.co.uk/citations?user=JJ_LQ0YAAAAJ;C1O5NFQAAAAJ", "orcid": ";;;0000-0001-7741-7275;", "linkedin": ";;;;", "or_profile": "~Hyeongkeun_Lee1;~Kyeongha_Rho1;~Junmo_Kim1;~Joon_Son_Chung1;~Jong_Suk_Kim1", "aff": "Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;MS student;Associate Professor;Associate Professor;PhD student", "bibtex": "@misc{\nkim2024equiav,\ntitle={Equi{AV}: Single-modal Equivariance Promotes Audio-Visual Contrastive Learning},\nauthor={Jongsuk Kim and Hyeongkeun Lee and Kyeongha Rho and Junmo Kim and Joon Son Chung},\nyear={2024},\nurl={https://openreview.net/forum?id=9k4Yvb75ED}\n}", "github": "", "project": "", "reviewers": "4TFL;eQDc;3ZPL;A7KE", "site": "https://openreview.net/forum?id=9k4Yvb75ED", "pdf_size": 4747337, "rating": "3;3;3;6", "confidence": "4;5;2;3", "soundness": "3;2;3;3", "contribution": "1;2;1;3", "presentation": "3;3;3;3", "wc_summary": "137;121;58;90", "wc_strengths": "129;65;20;143", "wc_weaknesses": "387;156;295;47", "wc_questions": "29;65;5;336", "wc_review": "682;407;378;616", "wc_reply_reviewers": "140;0;0;7", "wc_reply_authors": "1687;1080;1053;981", "reply_reviewers": "2;0;0;1", "reply_authors": "3;2;2;2", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.5, 30.26962173533062 ], "wc_strengths_avg": [ 89.25, 49.630509769697106 ], "wc_weaknesses_avg": [ 221.25, 129.93532044829072 ], "wc_questions_avg": [ 108.75, 132.92925750187578 ], "wc_review_avg": [ 520.75, 130.7581259425203 ], "wc_reply_reviewers_avg": [ 36.75, 59.67987516742976 ], "wc_reply_authors_avg": [ 1200.25, 283.3455266984111 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QCXIDQNANV8J:scholar.google.com/&scioq=EquiAV:+Single-modal+Equivariance+Promotes+Audio-Visual+Contrastive+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Time Fairness in Online Knapsack Problems", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19285", "id": "9kG7TwgLYu", "author_site": "Adam Lechowicz, Rik Sengupta, Bo Sun, Shahin Kamali, Mohammad Hajiesmaili", "tldr": "", "abstract": "The online knapsack problem is a classic problem in the field of online algorithms. Its canonical version asks how to pack items of different values and weights arriving online into a capacity-limited knapsack so as to maximize the total value of the admitted items. Although optimal competitive algorithms are known for this problem, they may be fundamentally unfair, i.e., individual items may be treated inequitably in different ways. We formalize a practically-relevant notion of time fairness which effectively models a trade off between static and dynamic pricing in a motivating application such as cloud resource allocation, and show that existing algorithms perform poorly under this metric. We propose a parameterized deterministic algorithm where the parameter precisely captures the Pareto-optimal trade-off between fairness (static pricing) and competitiveness (dynamic pricing). We show that randomization is theoretically powerful enough to be simultaneously competitive and fair; however, it does not work well in experiments. To further improve the trade-off between fairness and competitiveness, we develop a nearly-optimal learning-augmented algorithm which is fair, consistent, and robust (competitive), showing substantial performance improvements in numerical experiments.", "keywords": "fairness;online knapsack;learning-augmented algorithm;Pareto-optimality;robustness;consistency", "primary_area": "optimization", "supplementary_material": "/attachment/6e62237334e258d8dddb48f6242a120848a5903b.pdf", "author": "Adam Lechowicz;Rik Sengupta;Bo Sun;Shahin Kamali;Mohammad Hajiesmaili", "authorids": "~Adam_Lechowicz1;~Rik_Sengupta1;~Bo_Sun8;~Shahin_Kamali1;~Mohammad_Hajiesmaili1", "gender": "Non-Binary;M;;M;M", "homepage": "https://adamlechowicz.github.io;https://people.cs.umass.edu/~rsengupta/;;https://www.eecs.yorku.ca/~kamalis/;https://groups.cs.umass.edu/hajiesmaili/", "dblp": "307/5199;151/8711;;59/577.html;49/7911", "google_scholar": "fZ2-jm0AAAAJ;hK7NUBIAAAAJ;;hQXlVLsAAAAJ;XCGuYKIAAAAJ", "orcid": "0000-0002-7774-9939;0000-0002-9238-5408;;0000-0003-1404-2212;", "linkedin": ";rik-sengupta-ab00bb224/;;shahin-kamali-4a3b376?originalSubdomain=ca;", "or_profile": "~Adam_Lechowicz1;~Rik_Sengupta1;~Bo_Sun8;~Shahin_Kamali1;~Mohammad_Hajiesmaili1", "aff": "University of Massachusetts Amherst;University of Massachusetts at Amherst;;York University;College of Information and Computer Science, University of Massachusetts, Amherst", "aff_domain": "cs.umass.edu;umass.edu;;yorku.ca;cics.umass.edu", "position": "PhD student;PhD student;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlechowicz2024time,\ntitle={Time Fairness in Online Knapsack Problems},\nauthor={Adam Lechowicz and Rik Sengupta and Bo Sun and Shahin Kamali and Mohammad Hajiesmaili},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9kG7TwgLYu}\n}", "github": "", "project": "", "reviewers": "rQKw;zsDR;37F8", "pdf_size": 1607054, "rating": "6;6;8", "confidence": "3;4;2", "soundness": "3;3;3", "contribution": "3;3;3", "presentation": "3;3;4", "wc_summary": "60;178;153", "wc_strengths": "42;108;86", "wc_weaknesses": "40;155;79", "wc_questions": "30;4;22", "wc_review": "172;445;340", "wc_reply_reviewers": "34;55;14", "wc_reply_authors": "923;662;88", "reply_reviewers": "2;1;1", "reply_authors": "2;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 130.33333333333334, 50.769632218045025 ], "wc_strengths_avg": [ 78.66666666666667, 27.438820836342234 ], "wc_weaknesses_avg": [ 91.33333333333333, 47.75167245471327 ], "wc_questions_avg": [ 18.666666666666668, 10.873004286866726 ], "wc_review_avg": [ 319.0, 112.43664882946307 ], "wc_reply_reviewers_avg": [ 34.333333333333336, 16.73983937265296 ], "wc_reply_authors_avg": [ 557.6666666666666, 348.7791405587337 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14561090357017339709&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=9kG7TwgLYu", "pdf": "https://openreview.net/pdf?id=9kG7TwgLYu", "email": "cs.umass.edu;umass.edu;;yorku.ca;cics.umass.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Massachusetts Amherst;York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umass.edu;https://www.yorku.ca", "aff_unique_abbr": "UMass Amherst;York U", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Canada" }, { "id": "9kLDrE5rsW", "title": "Temporal graph models fail to capture global temporal dynamics", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "A recently released Temporal Graph Benchmark is analyzed in the context of Dynamic Link Property Prediction. We outline our observations and propose a trivial optimization-free baseline of \"recently popular nodes\" outperforming other methods on medium and large-size datasets in the Temporal Graph Benchmark. We propose two measures based on Wasserstein distance which can quantify the strength of short-term and long-term global dynamics of datasets. By analyzing our unexpectedly strong baseline, we show how standard negative sampling evaluation can be unsuitable for datasets with strong temporal dynamics. We also show how simple negative-sampling can lead to model degeneration during training, resulting in impossible to rank, fully saturated predictions of temporal graph networks. We propose improved negative sampling schemes for both training and evaluation and prove their usefulness. We conduct a comparison with a model trained non-contrastively without negative sampling. Our results provide a challenging baseline and indicate that temporal graph network architectures need deep rethinking for usage in problems with significant global dynamics, such as social media, cryptocurrency markets or e-commerce. We open-source the code for baselines, measures and proposed negative sampling schemes.", "keywords": "temporal graph;dynamic graph;graph neural networks;temporal graph neural networks", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Michal Daniluk;Jacek Dabrowski", "authorids": "~Michal_Daniluk1;~Jacek_Dabrowski1", "gender": "M;M", "homepage": ";", "dblp": ";", "google_scholar": ";https://scholar.google.pl/citations?user=1rbW6yUAAAAJ", "orcid": ";", "linkedin": "https://pl.linkedin.com/in/michaldaniluk91;ponythewhite/", "or_profile": "~Michal_Daniluk1;~Jacek_Dabrowski1", "aff": ";Synerise S.A.", "aff_domain": ";synerise.com", "position": ";Principal Researcher", "bibtex": "@misc{\ndaniluk2024temporal,\ntitle={Temporal graph models fail to capture global temporal dynamics},\nauthor={Michal Daniluk and Jacek Dabrowski},\nyear={2024},\nurl={https://openreview.net/forum?id=9kLDrE5rsW}\n}", "github": "", "project": "", "reviewers": "mQmi;C9Fh;BqDA;Chai", "site": "https://openreview.net/forum?id=9kLDrE5rsW", "pdf_size": 473608, "rating": "1;3;3;6", "confidence": "2;4;4;3", "soundness": "1;1;2;3", "contribution": "2;1;2;3", "presentation": "1;1;2;3", "wc_summary": "60;81;78;82", "wc_strengths": "33;21;58;81", "wc_weaknesses": "608;230;208;22", "wc_questions": "126;2;144;100", "wc_review": "827;334;488;285", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 75.25, 8.926785535678562 ], "wc_strengths_avg": [ 48.25, 23.14492384951828 ], "wc_weaknesses_avg": [ 267.0, 212.81212371479216 ], "wc_questions_avg": [ 93.0, 54.817880294662984 ], "wc_review_avg": [ 483.5, 211.99351405172754 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2955402316445243, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15199979378178198245&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Synerise", "aff_unique_dep": "", "aff_unique_url": "https://www.synerise.com", "aff_unique_abbr": "Synerise", "aff_country_unique_index": "0", "aff_country_unique": "Poland" }, { "id": "9lvyCHhQix", "title": "Diversity-aware Continual Learning with Latent Knowledge Hypergraph", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Continual learning (CL) refers to the ability of models to learn from non-stationary data distribution while transferring and protecting past knowledge. Existing literature in CL has mainly focused on overcoming catastrophic forgetting. However, they often overlook a critical trade-off between parameter efficiency and capacity saturation. Almost all of the existing approaches including architecture-stable and architecture-growing methods struggle to balance parameter efficiency and capacity saturation. This makes them vulnerable to long-term task-incremental CL under storage constraints. In this paper, we propose a novel CL approach that addresses the trade-off between parameter efficiency and capacity saturation by dynamically expanding the model's weight space in proportion to the actual capacity increase needed by each new task. Specifically, our approach introduces a unique knowledge hypergraph structure that captures the latent knowledge across tasks and leverages it to measure task diversity and estimate the capacity increase required for each new task. Moreover, we introduce new constraints to ensure parameter efficiency during inference and a fine-grained parameter generator to create task-specific sub-networks that ensure a constant number of trainable parameters over time while accommodating the evolving complexities of tasks. Extensive experiment results show that the proposed approach achieves state-of-the-art results on several benchmark CL datasets, while maintaining low parameter counts.", "keywords": "continual learning;hypernetwork;hypergraph;diversity awareness", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Jiayi Chen;Kishlay Jha;Aidong Zhang", "authorids": "~Jiayi_Chen4;~Kishlay_Jha2;~Aidong_Zhang2", "gender": "F;M;F", "homepage": "https://jia-yi-chen.github.io/;https://engineering.uiowa.edu/people/kishlay-jha;https://engineering.virginia.edu/faculty/aidong-zhang", "dblp": "42/1159;177/7445;z/AidongZhang.html", "google_scholar": "f3Iz6qoAAAAJ;8GhLfu8AAAAJ;O8XxkE4AAAAJ", "orcid": "0000-0003-0217-6352;0000-0003-0826-445X;0000-0001-9723-3246", "linkedin": ";kishlayjha/;", "or_profile": "~Jiayi_Chen4;~Kishlay_Jha2;~Aidong_Zhang2", "aff": "University of Virginia;University of Iowa;University of Virginia", "aff_domain": "cs.virginia.edu;uiowa.edu;virginia.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nchen2024diversityaware,\ntitle={Diversity-aware Continual Learning with Latent Knowledge Hypergraph},\nauthor={Jiayi Chen and Kishlay Jha and Aidong Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=9lvyCHhQix}\n}", "github": "", "project": "", "reviewers": "j4Dh;8ANK;RNxE;9KLn", "site": "https://openreview.net/forum?id=9lvyCHhQix", "pdf_size": 2600665, "rating": "3;3;5;5", "confidence": "5;3;4;4", "soundness": "1;2;3;3", "contribution": "2;2;3;2", "presentation": "3;1;3;2", "wc_summary": "93;112;44;63", "wc_strengths": "103;76;37;31", "wc_weaknesses": "410;241;109;135", "wc_questions": "127;101;131;4", "wc_review": "733;530;321;233", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 78.0, 26.277366686941825 ], "wc_strengths_avg": [ 61.75, 29.422567868899545 ], "wc_weaknesses_avg": [ 223.75, 118.35407682036137 ], "wc_questions_avg": [ 90.75, 51.39248485916983 ], "wc_review_avg": [ 454.25, 193.74387087079683 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:L78khaq-8dQJ:scholar.google.com/&scioq=Diversity-aware+Continual+Learning+with+Latent+Knowledge+Hypergraph&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Virginia;University of Iowa", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.uiowa.edu", "aff_unique_abbr": "UVA;UIowa", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DataInf: Efficiently Estimating Data Influence in LoRA-tuned LLMs and Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19284", "id": "9m02ib92Wz", "author_site": "Yongchan Kwon, Eric Wu, Kevin Wu, James Y Zou", "tldr": "", "abstract": "Quantifying the impact of training data points is crucial for understanding the outputs of machine learning models and for improving the transparency of the AI pipeline. The influence function is a principled and popular data attribution method, but its computational cost often makes it challenging to use. This issue becomes more pronounced in the setting of large language models and text-to-image models. In this work, we propose DataInf, an efficient influence approximation method that is practical for large-scale generative AI models. Leveraging an easy-to-compute closed-form expression, DataInf outperforms existing influence computation algorithms in terms of computational and memory efficiency. Our theoretical analysis shows that DataInf is particularly well-suited for parameter-efficient fine-tuning techniques such as LoRA. Through systematic empirical evaluations, we show that DataInf accurately approximates influence scores and is orders of magnitude faster than existing methods. In applications to RoBERTa-large, Llama-2-13B-chat, and stable-diffusion-v1.5 models, DataInf effectively identifies the most influential fine-tuning examples better than other approximate influence scores. Moreover, it can help to identify which data points are mislabeled.", "keywords": "Influence function;Data valuation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/e078e037cea2844184989983823731a351cdeacf.zip", "author": "Yongchan Kwon;Eric Wu;Kevin Wu;James Zou", "authorids": "~Yongchan_Kwon1;~Eric_Wu3;~Kevin_Wu1;~James_Zou1", "gender": ";M;;", "homepage": ";;https://kevinwu.ai;", "dblp": ";;;", "google_scholar": ";6pIliiAAAAAJ;s4dCi5sAAAAJ;23ZXZvEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yongchan_Kwon1;~Eric_Wu3;~Kevin_Wu1;~James_Zou1", "aff": ";Stanford University;Stanford University;Stanford University", "aff_domain": ";stanford.edu;stanford.edu;stanford.edu", "position": ";PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkwon2024datainf,\ntitle={DataInf: Efficiently Estimating Data Influence in Lo{RA}-tuned {LLM}s and Diffusion Models},\nauthor={Yongchan Kwon and Eric Wu and Kevin Wu and James Zou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9m02ib92Wz}\n}", "github": "", "project": "", "reviewers": "kkF9;oMqQ;gXtX;NdTy", "pdf_size": 3982524, "rating": "6;6;6;6", "confidence": "3;2;3;3", "soundness": "3;3;3;2", "contribution": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "56;27;275;23", "wc_strengths": "35;30;19;17", "wc_weaknesses": "134;26;69;108", "wc_questions": "109;13;70;37", "wc_review": "334;96;433;185", "wc_reply_reviewers": "10;10;0;0", "wc_reply_authors": "725;313;416;743", "reply_reviewers": "1;1;0;0", "reply_authors": "3;3;1;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.25, 104.55710162394519 ], "wc_strengths_avg": [ 25.25, 7.495832175282475 ], "wc_weaknesses_avg": [ 84.25, 40.81896005534683 ], "wc_questions_avg": [ 57.25, 36.08583517115822 ], "wc_review_avg": [ 262.0, 130.29773597419106 ], "wc_reply_reviewers_avg": [ 5.0, 5.0 ], "wc_reply_authors_avg": [ 549.25, 188.41228065070493 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4678751591021020846&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=9m02ib92Wz", "pdf": "https://openreview.net/pdf?id=9m02ib92Wz", "email": ";stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "9mX0AZVEet", "title": "Improving Diffusion Models for Inverse Problems Using Optimal Posterior Covariance", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent diffusion models provide a promising alternative zero-shot solution to noisy linear inverse problems without retraining for specific inverse problems. In this paper, we propose the first unified framework for diffusion-based zero-shot methods from the view of approximating conditional posterior mean for the reverse process. We reveal that recent diffusion-based zero-shot methods are equivalent to making isotropic Gaussian approximation to intractable posterior distributions over clean images given diffused noisy images, with only difference in handcrafted design of isotropic posterior covariances. Inspired by this finding, we develop the optimal posterior covariance of the posterior distribution via maximum likelihood estimation. We provide a general solution based on three approaches specifically designed for posterior covariance optimization, by training from scratch and using pre-trained models with and without reverse covariances. Remarkably, the proposed framework can be achieved in a plug-and-play fashion based on pre-trained unconditional diffusion models by converting reverse covariances or via Monte Carlo estimation without reverse covariances. Experimental results demonstrate that the proposed framework significantly outperforms existing zero-shot methods and enhances the robustness to hyper-parameters.", "keywords": "Generative models;Inverse problems;Diffusion models", "primary_area": "generative models", "supplementary_material": "/attachment/ebc9ae332842f7f74263d5ba0b69be7ce8767fab.zip", "author": "Xinyu Peng;Ziyang Zheng;Wenrui Dai;Nuoqian Xiao;Chenglin Li;Junni Zou;Hongkai Xiong", "authorids": "~Xinyu_Peng1;~Ziyang_Zheng2;~Wenrui_Dai1;~Nuoqian_Xiao1;~Chenglin_Li2;~Junni_Zou1;~Hongkai_Xiong1", "gender": "M;M;;;M;F;M", "homepage": "https://github.com/xypeng9903;;;;https://min.sjtu.edu.cn/En/FacultyShow/4?Vid=17;http://www.cs.sjtu.edu.cn/~zou-jn;http://min.sjtu.edu.cn", "dblp": ";;16/5135.html;;;91/4613;21/3569", "google_scholar": ";pcgDcMmDJbwC;Xg8MhyAAAAAJ;;ltW2JMcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;bB16iN4AAAAJ", "orcid": ";0000-0001-9923-8016;;;;;0000-0003-4552-0029", "linkedin": "xinyu-peng-328918246/;;;;;;", "or_profile": "~Xinyu_Peng1;~Ziyang_Zheng2;~Wenrui_Dai1;~Nuoqian_Xiao1;~Chenglin_Li2;~Junni_Zou1;~Hongkai_Xiong1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Associate Professor;;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\npeng2024improving,\ntitle={Improving Diffusion Models for Inverse Problems Using Optimal Posterior Covariance},\nauthor={Xinyu Peng and Ziyang Zheng and Wenrui Dai and Nuoqian Xiao and Chenglin Li and Junni Zou and Hongkai Xiong},\nyear={2024},\nurl={https://openreview.net/forum?id=9mX0AZVEet}\n}", "github": "", "project": "", "reviewers": "rzGL;8Q2y;FU6y", "site": "https://openreview.net/forum?id=9mX0AZVEet", "pdf_size": 40237100, "rating": "5;5;8", "confidence": "4;4;5", "soundness": "3;2;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "45;72;109", "wc_strengths": "35;68;247", "wc_weaknesses": "95;112;245", "wc_questions": "4;529;92", "wc_review": "179;781;693", "wc_reply_reviewers": "0;290;31", "wc_reply_authors": "1179;1688;1876", "reply_reviewers": "0;1;1", "reply_authors": "2;4;3", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.33333333333333, 26.233989826601334 ], "wc_strengths_avg": [ 116.66666666666667, 93.13908357337905 ], "wc_weaknesses_avg": [ 150.66666666666666, 67.06381703687582 ], "wc_questions_avg": [ 208.33333333333334, 229.5740016252324 ], "wc_review_avg": [ 551.0, 265.4857183855031 ], "wc_reply_reviewers_avg": [ 107.0, 130.0179474790564 ], "wc_reply_authors_avg": [ 1581.0, 294.43618437051293 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14269481151172449337&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9n9q0R9Gyw", "title": "Retrieval-augmented Text-to-3D Generation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Text-to-3D generation using using neural networks has been confronted with a fundamental difficulty regarding the scale and quality of 3D data. Score distillation sampling based on 2D diffusion models addresses this issue effectively; however, it also introduces 3D inconsistencies that plague generated 3D scenes due to a lack of robust 3D prior knowledge and awareness. In this study, we propose a novel framework for retrieval-augmented text-to-3D generation that is capable of generating superior-quality 3D objects with decent geometry. After we employ a particle-based variational inference framework, we augment the conventional target distribution in SDS-based techniques with an empirical distribution of retrieved 3D assets. Furthermore, based on the retrieved 3D assets, we propose the two effective methods: a lightweight adaptation of a 2D prior model for reducing its inherent bias toward certain camera viewpoints, and delta distillation to regularize artifacts of generated 3D contents. Our experimental results show that our method not only exhibits state-of-the-art quality in text-to-3D generation but also significantly enhances the geometry compared to the baseline.", "keywords": "diffusion models;NeRF;3d synthesis", "primary_area": "generative models", "supplementary_material": "/attachment/c39a7cc106e4888a0676c8fdf37ee17c7c231196.zip", "author": "Junyoung Seo;Susung Hong;Wooseok Jang;Min-Seop Kwak;Hyeonsu Kim;Doyup Lee;Seungryong Kim", "authorids": "~Junyoung_Seo1;~Susung_Hong1;~Wooseok_Jang2;~Min-Seop_Kwak1;~Hyeonsu_Kim2;~Doyup_Lee1;~Seungryong_Kim1", "gender": "M;M;;M;F;M;M", "homepage": "https://j0seo.github.io;https://susunghong.github.io/;https://github.com/woo1726;;https://ines-hyeonsu-kim.github.io;;https://cvlab.korea.ac.kr/members/faculty", "dblp": "209/9340;330/5127;30/4458;338/9125;367/9344;205/2368;141/9955", "google_scholar": "orJRvmEAAAAJ;HigIHvUAAAAJ;;;tBcqfncAAAAJ;https://scholar.google.co.kr/citations?user=5rAj44kAAAAJ;cIK1hS8AAAAJ", "orcid": ";;;;0009-0003-3695-0243;;", "linkedin": ";;;matthewmatics96;;;", "or_profile": "~Junyoung_Seo1;~Susung_Hong1;~Wooseok_Jang2;~Min-Seop_Kwak1;~Hyeonsu_Kim2;~Doyup_Lee1;~Seungryong_Kim1", "aff": "Korea University;Korea University;Korea University;Korea Advanced Institute of Science & Technology;Korea University;Runway;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;kaist.ac.kr;korea.ac.kr;runwayml.com;korea.ac.kr", "position": "PhD student;Undergrad student;MS student;PhD student;Undergrad student;Researcher;Assistant Professor", "bibtex": "@misc{\nseo2024retrievalaugmented,\ntitle={Retrieval-augmented Text-to-3D Generation},\nauthor={Junyoung Seo and Susung Hong and Wooseok Jang and Min-Seop Kwak and Hyeonsu Kim and Doyup Lee and Seungryong Kim},\nyear={2024},\nurl={https://openreview.net/forum?id=9n9q0R9Gyw}\n}", "github": "", "project": "", "reviewers": "aEzM;U742;3ZvE", "site": "https://openreview.net/forum?id=9n9q0R9Gyw", "pdf_size": 18814750, "rating": "5;5;5", "confidence": "5;4;4", "soundness": "2;2;3", "contribution": "2;3;3", "presentation": "2;2;3", "wc_summary": "67;163;97", "wc_strengths": "55;104;51", "wc_weaknesses": "161;658;155", "wc_questions": "43;37;2", "wc_review": "326;962;305", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 109.0, 40.099875311526844 ], "wc_strengths_avg": [ 70.0, 24.097026095903757 ], "wc_weaknesses_avg": [ 324.6666666666667, 235.71498797394185 ], "wc_questions_avg": [ 27.333333333333332, 18.080068829760823 ], "wc_review_avg": [ 531.0, 304.8835843399903 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7d0dYifEf7EJ:scholar.google.com/&scioq=Retrieval-augmented+Text-to-3D+Generation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;2;0", "aff_unique_norm": "Korea University;Korea Advanced Institute of Science and Technology;Runway", "aff_unique_dep": ";;", "aff_unique_url": "https://www.korea.ac.kr;https://www.kaist.ac.kr;https://www.runwayml.com", "aff_unique_abbr": "KU;KAIST;Runway", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "id": "9nT8ouPui8", "title": "On Memorization in Diffusion Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Due to their capacity to generate novel and high-quality samples, diffusion models have attracted significant research interest in recent years. Notably, the typical training objective of diffusion models, i.e., denoising score matching, has a closed-form optimal solution that can only generate training-data replicating samples. This indicates that a memorization behavior is theoretically expected, which contradicts the common generalization ability of state-of-the-art diffusion models, and thus calls for a deeper understanding. Looking into this, we first observe that memorization behaviors tend to occur on smaller-sized datasets, which motivates our definition of effective model memorization (EMM), a metric measuring the maximum size of training data at which a model approximates its theoretical optimum. Then, we quantify the impact of the influential factors on these memorization behaviors in terms of EMM, focusing primarily on data distribution, model configuration, and training procedure. Besides comprehensive empirical results identifying the influential factors, we surprisingly find that conditioning training data on uninformative random labels can significantly trigger the memorization in diffusion models. Our study holds practical significance for diffusion model users and offers clues to theoretical research in deep generative models.", "keywords": "Diffusion Models;Memorization", "primary_area": "generative models", "supplementary_material": "/attachment/036527d261d51e762ec07f0af264526133fba1c5.zip", "author": "Xiangming Gu;Chao Du;Tianyu Pang;Chongxuan Li;Min Lin;Ye Wang", "authorids": "~Xiangming_Gu1;~Chao_Du1;~Tianyu_Pang1;~Chongxuan_Li1;~Min_Lin1;~Ye_Wang3", "gender": "M;M;M;M;M;M", "homepage": "https://guxm2021.github.io;https://duchao0726.github.io/;https://p2333.github.io/;http://ml.cs.tsinghua.edu.cn/~chongxuan;https://linmin.me;https://smcnus.comp.nus.edu.sg/", "dblp": "276/5844;75/7523;202/2550;161/9965;;44/6292-7", "google_scholar": "BkxEuIoAAAAJ;QOp7xW0AAAAJ;wYDbtFsAAAAJ;UKMcQn4AAAAJ;BGONmkIAAAAJ;https://scholar.google.com.sg/citations?user=CdgLLL8AAAAJ", "orcid": ";0000-0003-1244-6336;0000-0003-0639-6176;0000-0002-0912-9076;;0000-0002-0123-1260", "linkedin": "xiangming-gu/;duchao/;%E5%A4%A9%E5%AE%87-%E5%BA%9E-b3999017a/;;min-lin-08a3a422/;", "or_profile": "~Xiangming_Gu1;~Chao_Du1;~Tianyu_Pang1;~Chongxuan_Li1;~Min_Lin1;~Ye_Wang3", "aff": "National University of Singapore;Sea AI Lab;Sea AI Lab;Renmin University of China;Sea AI Lab;National University of Singapore", "aff_domain": "nus.edu.sg;sea.com;sea.com;ruc.edu.cn;sea.com;nus.edu.sg", "position": "PhD student;Senior Research Scientist;Senior Research Scientist;Associate Professor;Principal Researcher;Associate Professor", "bibtex": "@misc{\ngu2024on,\ntitle={On Memorization in Diffusion Models},\nauthor={Xiangming Gu and Chao Du and Tianyu Pang and Chongxuan Li and Min Lin and Ye Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=9nT8ouPui8}\n}", "github": "", "project": "", "reviewers": "gEHa;MQLD;sac3;p3eW;QP3y", "site": "https://openreview.net/forum?id=9nT8ouPui8", "pdf_size": 1226281, "rating": "3;5;5;5;6", "confidence": "4;4;2;4;3", "soundness": "3;3;2;3;3", "contribution": "1;3;2;2;2", "presentation": "3;4;2;3;3", "wc_summary": "24;74;79;93;100", "wc_strengths": "74;37;51;170;126", "wc_weaknesses": "236;11;101;334;268", "wc_questions": "56;19;98;4;204", "wc_review": "390;141;329;601;698", "wc_reply_reviewers": "0;36;111;0;0", "wc_reply_authors": "856;534;1500;770;1297", "reply_reviewers": "0;1;1;0;0", "reply_authors": "2;2;3;2;2", "rating_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 74.0, 26.69082239272518 ], "wc_strengths_avg": [ 91.6, 49.53624935337758 ], "wc_weaknesses_avg": [ 190.0, 117.38654096616017 ], "wc_questions_avg": [ 76.2, 71.69490916376141 ], "wc_review_avg": [ 431.8, 198.17709252080573 ], "wc_reply_reviewers_avg": [ 29.4, 43.116586135732035 ], "wc_reply_authors_avg": [ 991.4, 354.5789615868375 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4082482904638631, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2503430110501581051&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;2;1;0", "aff_unique_norm": "National University of Singapore;Sea AI Lab;Renmin University of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;;http://www.ruc.edu.cn", "aff_unique_abbr": "NUS;;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;0", "aff_country_unique": "Singapore;;China" }, { "id": "9nXgWT12tb", "title": "Correlated Attention in Transformers for Multivariate Time Series", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multivariate time series (MTS) analysis prevail in real-world applications such as finance, climate science and healthcare. The various self-attention mechanisms, the backbone of the state-of-the-art Transformer-based models, efficiently discover the temporal dependencies, yet cannot well capture the intricate cross-correlation between different features of MTS data, which inherently stems from complex dynamical systems in practice. To this end, we propose a novel correlated attention mechanism, which not only efficiently captures feature-wise dependencies, but can also be seamlessly integrated within the encoder blocks of existing well-known Transformers to gain efficiency improvement. In particular, correlated attention operates across feature channels to compute cross-covariance matrices between queries and keys with different lag values, and selectively aggregate representations at the sub-series level. This architecture facilitates automated discovery and representation learning of not only instantaneous but also lagged cross-correlations, while inherently capturing time series auto-correlation. When combined with prevalent Transformer baselines, correlated attention mechanism constitutes a better alternative for encoder-only architectures, which are suitable for a wide range of tasks including imputation, anomaly detection and classification. Extensive experiments on the aforementioned tasks consistently underscore the advantages of correlated attention mechanism in enhancing base Transformer models, and demonstrate our state-of-the-art results in imputation, anomaly detection and classification.", "keywords": "Transformer;multivariate time series;deep learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/f2e5b20ec68fc0d5845f952195abc8b9ee7348d2.pdf", "author": "Quang Minh Nguyen;Lam M. Nguyen;Subhro Das", "authorids": "~Quang_Minh_Nguyen1;~Lam_M._Nguyen1;~Subhro_Das1", "gender": "M;;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": "quang-m-nguyen-191122b3/;;", "or_profile": "~Quang_Minh_Nguyen1;~Lam_M._Nguyen1;~Subhro_Das1", "aff": "Massachusetts Institute of Technology;;", "aff_domain": "mit.edu;;", "position": "PhD student;;", "bibtex": "@misc{\nnguyen2024correlated,\ntitle={Correlated Attention in Transformers for Multivariate Time Series},\nauthor={Quang Minh Nguyen and Lam M. Nguyen and Subhro Das},\nyear={2024},\nurl={https://openreview.net/forum?id=9nXgWT12tb}\n}", "github": "", "project": "", "reviewers": "KQRi;rWhs;mjBo;M5Eq", "site": "https://openreview.net/forum?id=9nXgWT12tb", "pdf_size": 749721, "rating": "5;6;6;6", "confidence": "3;2;3;3", "soundness": "2;3;2;3", "contribution": "2;3;1;2", "presentation": "3;3;3;3", "wc_summary": "73;170;16;132", "wc_strengths": "77;270;33;104", "wc_weaknesses": "69;210;111;318", "wc_questions": "427;197;44;82", "wc_review": "646;847;204;636", "wc_reply_reviewers": "89;0;137;20", "wc_reply_authors": "2240;1155;1584;1406", "reply_reviewers": "1;0;1;1", "reply_authors": "7;5;5;6", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.75, 58.49946580952684 ], "wc_strengths_avg": [ 121.0, 89.6799866190891 ], "wc_weaknesses_avg": [ 177.0, 96.16392254894764 ], "wc_questions_avg": [ 187.5, 149.30924284852563 ], "wc_review_avg": [ 583.25, 234.58194197337525 ], "wc_reply_reviewers_avg": [ 61.5, 54.68317840067456 ], "wc_reply_authors_avg": [ 1596.25, 401.7028597110058 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 5.75, 0.82915619758885 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17876364347666406101&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "9nddtu94uX", "title": "PlatoLM: Teaching LLMs via a Socratic Questioning User Simulator", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The unparalleled performance of closed-sourced ChatGPT has sparked efforts towards its democratization, with notable strides made by leveraging real user and ChatGPT conversations, as evidenced by Vicuna. However, due to challenges in gathering conversations involving human participation, current endeavors like Baize and UltraChat aim to automatically generate conversational data. They primarily rely on ChatGPT conducting roleplay to simulate human behaviors based on instructions rather than genuine learning from humans, resulting in limited scope, diminished diversity, and an absence of genuine multi-round conversational dynamics. To address the above issues, we target human questions extracted from genuine human-machine conversations as a learning goal and train a user simulator called Socratic to produce a high-quality human-centric synthetic conversation dataset. Subsequently, this dataset was used to train our assistant model, named PlatoLM. PlatoLM achieves the SOTA performance among 7B models (including LLaMA-2-7B-chat and Vicuna-7B) in both Vicuna-Bench and pairwise comparison in MT-Bench; the effectiveness of PlatoLM is also evidenced by manual evaluation.", "keywords": "Large Language Model;User Simulation;Human Computer Interaction", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/65555c22ee2606bad7996d1fe101f345329957b8.zip", "author": "Chuyi Kong;Yaxin FAN;Xiang Wan;Feng Jiang;Benyou Wang", "authorids": "~Chuyi_Kong1;~Yaxin_FAN2;~Xiang_Wan1;~Feng_Jiang4;~Benyou_Wang2", "gender": "Not Specified;M;M;M;M", "homepage": ";https://fanyaxin.top/;http://www.sribd.cn/teacher/28;;https://wabyking.github.io/old.html", "dblp": ";234/9447;;75/1693-7;169/1793", "google_scholar": "w5vcgWYAAAAJ;N0oiLQwAAAAJ;;zrxpiWYAAAAJ;Jk4vJU8AAAAJ", "orcid": ";;;0000-0002-3465-311X;0000-0002-1501-9914", "linkedin": ";;;;", "or_profile": "~Chuyi_Kong1;~Yaxin_FAN2;~Xiang_Wan1;~Feng_Jiang4;~Benyou_Wang2", "aff": "Hong Kong Baptist University;Soochow University;Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "hkbu.edu.hk;suda.edu.cn;sribd.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;PhD student;Principal Researcher;Postdoc;Assistant Professor", "bibtex": "@misc{\nanonymous2024platolm,\ntitle={Plato{LM}: Teaching {LLM}s via a Socratic Questioning User Simulator},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=9nddtu94uX}\n}", "github": "", "project": "", "reviewers": "GXfd;FWRq;Z8HQ;6WiD", "site": "https://openreview.net/forum?id=9nddtu94uX", "pdf_size": 2136631, "rating": "6;6;6;8", "confidence": "5;3;4;3", "soundness": "2;3;2;3", "contribution": "1;3;2;3", "presentation": "3;3;3;3", "wc_summary": "96;81;44;69", "wc_strengths": "14;129;77;104", "wc_weaknesses": "230;113;61;59", "wc_questions": "17;69;244;31", "wc_review": "357;392;426;263", "wc_reply_reviewers": "0;0;27;109", "wc_reply_authors": "822;932;1300;1181", "reply_reviewers": "0;0;1;2", "reply_authors": "2;3;3;4", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.5, 19.03286631067428 ], "wc_strengths_avg": [ 81.0, 42.83106349368411 ], "wc_weaknesses_avg": [ 115.75, 69.42396920372674 ], "wc_questions_avg": [ 90.25, 90.78374028426015 ], "wc_review_avg": [ 359.5, 60.82146002851296 ], "wc_reply_reviewers_avg": [ 34.0, 44.68221122549778 ], "wc_reply_authors_avg": [ 1058.75, 190.56675339628367 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7100133306028201699&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Hong Kong Baptist University;Soochow University;Shenzhen Research Institute of Big Data;Chinese University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.soochow.edu.cn;http://www.sribd.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "HKBU;Soochow U;;CUHK", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Hong Kong SAR;;Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Mastering Symbolic Operations: Augmenting Language Models with Compiled Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19283", "id": "9nsNyN0vox", "author_site": "Yixuan Weng, Minjun Zhu, Fei Xia, Bin Li, Shizhu He, Kang Liu, Jun Zhao", "tldr": "", "abstract": "Language models' (LMs) proficiency in handling deterministic symbolic reasoning and rule-based tasks remains limited due to their dependency implicit learning on textual data. To endow LMs with genuine rule comprehension abilities, we propose \"Neural Comprehension\" - a framework that synergistically integrates compiled neural networks (CoNNs) into the standard transformer architecture. CoNNs are neural modules designed to explicitly encode rules through artificially generated attention weights. By incorporating CoNN modules, the Neural Comprehension framework enables LMs to accurately and robustly execute rule-intensive symbolic tasks. Extensive experiments demonstrate the superiority of our approach over existing techniques in terms of length generalization, efficiency, and interpretability for symbolic operations. Furthermore, it can be applied to LMs across different model scales, outperforming tool-calling methods in arithmetic reasoning tasks while maintaining superior inference efficiency. Our work highlights the potential of seamlessly unifying explicit rule learning via CoNNs and implicit pattern learning in LMs, paving the way for true symbolic comprehension capabilities. The code is released at: \\url{https://github.com/wengsyx/Neural-Comprehension}.", "keywords": "Language Models;Compiled Neural Networks;Neural Comprehension;Symbolic Operations;Length Generalization", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/973363d708001b999bd7968b8230071cc264af34.zip", "author": "Yixuan Weng;Minjun Zhu;Fei Xia;Bin Li;Shizhu He;Kang Liu;Jun Zhao", "authorids": "~Yixuan_Weng1;~Minjun_Zhu2;~Fei_Xia4;~Bin_Li14;~Shizhu_He2;~Kang_Liu1;~Jun_Zhao4", "gender": "M;F;M;M;M;M;M", "homepage": "https://wengsyx.github.io/;;https://github.com/Alex0xf;https://libincn.top;https://heshizhu.github.io/;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": "298/8205;271/6029;79/1081;89/6764-83;136/8650;42/4903.html;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": "O1XsDEMAAAAJ;cm2ub2kAAAAJ;;2ZIBEWgAAAAJ;zBPIt3QAAAAJ;DtZCfl0AAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "orcid": "0000-0002-9720-8689;;0009-0002-4609-9950;0000-0002-6508-5071;;;", "linkedin": ";;;;;;", "or_profile": "~Yixuan_Weng1;~Minjun_Zhu2;~Fei_Xia4;~Bin_Li14;~Shizhu_He2;~Kang_Liu1;~Jun_Zhao4", "aff": "Institute of Automation, Chinese Academy of Sciences;Westlake University;;Hunan University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;westlake.edu;;hnu.edu.cn;ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn", "position": "MS student;PhD student;;PhD student;Associate Researcher;Professor;Full Professor", "bibtex": "@inproceedings{\nweng2024mastering,\ntitle={Mastering Symbolic Operations: Augmenting Language Models with Compiled Neural Networks},\nauthor={Yixuan Weng and Minjun Zhu and Fei Xia and Bin Li and Shizhu He and Kang Liu and Jun Zhao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9nsNyN0vox}\n}", "github": "", "project": "", "reviewers": "68he;uMuv;M9Rq;NkHa", "pdf_size": 2344693, "rating": "5;6;6;8", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "contribution": "3;3;2;3", "presentation": "2;2;2;3", "wc_summary": "77;140;105;72", "wc_strengths": "97;80;42;81", "wc_weaknesses": "75;166;161;40", "wc_questions": "153;2;2;106", "wc_review": "402;388;310;299", "wc_reply_reviewers": "0;0;47;33", "wc_reply_authors": "1336;1673;1819;1165", "reply_reviewers": "0;0;1;1", "reply_authors": "3;3;4;3", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.5, 27.060118255469618 ], "wc_strengths_avg": [ 75.0, 20.211382931407737 ], "wc_weaknesses_avg": [ 110.5, 54.45410911951457 ], "wc_questions_avg": [ 65.75, 65.88009942311866 ], "wc_review_avg": [ 349.75, 45.68574723039998 ], "wc_reply_reviewers_avg": [ 20.0, 20.603397778036516 ], "wc_reply_authors_avg": [ 1498.25, 260.19163610692794 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11908960632930766199&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=9nsNyN0vox", "pdf": "https://openreview.net/pdf?id=9nsNyN0vox", "email": "ia.ac.cn;westlake.edu;;hnu.edu.cn;ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Westlake University;Hunan University", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;https://www.westlake.edu.cn;http://www.hunu.edu.cn/", "aff_unique_abbr": "CAS;WU;HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9o7KuFcsps", "title": "Unified Anomaly Detection via Multi-Scale Contrasted Memory", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep anomaly detection (AD) aims to provide robust and efficient classifiers for one-class (OC) and outlier-exposure (OE) settings. However current models still struggle on edge-case normal samples and are often unable to keep high performance over different scales of anomalies. Additionally, there is a lack of a unified framework that efficiently addresses both OC and OE settings. To address these limitations, we present a novel two-stage method which leverages multi-scale normal prototypes during training to compute an anomaly deviation score. First, we employ a novel memory-augmented contrastive learning (CL) to jointly learn representations and memory modules across multiple scales. This allows us to effectively capture subtle features of normal data while adapting to varying levels of anomaly complexity.\nThen, we train an efficient anomaly distance-based detector that computes spatial deviation maps between the learned prototypes and incoming observations.\nOur model outperforms the state-of-the-art on a wide range of anomalies, including object, style, and local anomalies, as well as face presentation attacks. Notably, it stands as the first model capable of maintaining exceptional performance across both OC and OE settings.", "keywords": "anomaly detection;self-supervised learning;unbalanced outlier-exposure;hopfield memory", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/36e1233ea52b1e5835a2a33d2cc49fa24bbc71d5.pdf", "author": "Lo\u00efc J\u00e9z\u00e9quel;Ngoc-Son Vu;Jean Beaudet;Aymeric Histace", "authorids": "~Lo\u00efc_J\u00e9z\u00e9quel1;~Ngoc-Son_Vu2;~Jean_Beaudet1;~Aymeric_Histace1", "gender": "M;M;;M", "homepage": ";;;https://aymeric.histace.free.fr", "dblp": "290/7143;11/8109;;72/6341", "google_scholar": "sq4F5egAAAAJ;Fw14qXwAAAAJ;;https://scholar.google.fr/citations?user=y0MU8CAAAAAJ", "orcid": "0000-0002-4896-0081;;;", "linkedin": ";;;", "or_profile": "~Lo\u00efc_J\u00e9z\u00e9quel1;~Ngoc-Son_Vu2;~Jean_Beaudet1;~Aymeric_Histace1", "aff": "Ecole Nationale Sup\u00e9rieure de l'Electronique et de ses Applications;Ecole Nationale Sup\u00e9rieure de l'Electronique et de ses Applications;;ETIS", "aff_domain": "ensea.fr;ensea.fr;;ensea.fr", "position": "PhD student;Associate Professor;;Full Professor", "bibtex": "@misc{\nj{\\'e}z{\\'e}quel2024unified,\ntitle={Unified Anomaly Detection via Multi-Scale Contrasted Memory},\nauthor={Lo{\\\"\\i}c J{\\'e}z{\\'e}quel and Ngoc-Son Vu and Jean Beaudet and Aymeric Histace},\nyear={2024},\nurl={https://openreview.net/forum?id=9o7KuFcsps}\n}", "github": "", "project": "", "reviewers": "t6Z1;9JQe;xRL9", "site": "https://openreview.net/forum?id=9o7KuFcsps", "pdf_size": 1108844, "rating": "5;5;6", "confidence": "5;4;3", "soundness": "2;2;3", "contribution": "2;2;2", "presentation": "3;2;3", "wc_summary": "89;88;100", "wc_strengths": "35;103;69", "wc_weaknesses": "315;501;72", "wc_questions": "116;8;72", "wc_review": "555;700;313", "wc_reply_reviewers": "29;0;14", "wc_reply_authors": "1167;1411;677", "reply_reviewers": "1;0;1", "reply_authors": "2;3;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 92.33333333333333, 5.436502143433363 ], "wc_strengths_avg": [ 69.0, 27.760883751542686 ], "wc_weaknesses_avg": [ 296.0, 175.65306715227035 ], "wc_questions_avg": [ 65.33333333333333, 44.34210439550904 ], "wc_review_avg": [ 522.6666666666666, 159.63778444410403 ], "wc_reply_reviewers_avg": [ 14.333333333333334, 11.841546445554407 ], "wc_reply_authors_avg": [ 1085.0, 305.21249428335443 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yUIhXttEPQcJ:scholar.google.com/&scioq=Unified+Anomaly+Detection+via+Multi-Scale+Contrasted+Memory&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Ecole Nationale Sup\u00e9rieure de l'Electronique et de ses Applications;ETIS", "aff_unique_dep": ";", "aff_unique_url": "https://www.enssea.fr;", "aff_unique_abbr": "ENSEA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France;" }, { "title": "Video Language Planning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19282", "id": "9pKtcJcMP3", "author_site": "Yilun Du, Sherry Yang, Pete Florence, Fei Xia, Ayzaan Wahid, brian ichter, Pierre Sermanet, Tianhe Yu, Pieter Abbeel, Joshua B Tenenbaum, Leslie Kaelbling, Andy Zeng, Jonathan Tompson", "tldr": "", "abstract": "We are interested in enabling visual planning for complex long-horizon tasks in the space of generated videos and language, leveraging recent advances in large generative models pretrained on Internet-scale data. To this end, we present video language planning (VLP), an algorithm that consists of a tree search procedure, where we train (i) vision-language models to serve as both policies and value functions, and (ii) text-to-video models as dynamics models. VLP takes as input a long-horizon task instruction and current image observation, and outputs a long video plan that provides detailed multimodal (video and language) specifications that describe how to complete the final task. VLP scales with increasing computation budget where more computation time results in improved video plans, and is able to synthesize long-horizon video plans across different robotics domains -- from multi-object rearrangement, to multi-camera bi-arm dexterous manipulation. Generated video plans can be translated into real robot actions via goal-conditioned policies, conditioned on each intermediate frame of the generated video. Experiments show that VLP substantially improves long-horizon task success rates compared to prior methods on both simulated and real robots (across 3 hardware platforms).", "keywords": "Planning;Hierarchical Planning;Language Models;Video Models;Long-Horizon Planning", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "", "author": "Yilun Du;Sherry Yang;Pete Florence;Fei Xia;Ayzaan Wahid;brian ichter;Pierre Sermanet;Tianhe Yu;Pieter Abbeel;Joshua B. Tenenbaum;Leslie Pack Kaelbling;Andy Zeng;Jonathan Tompson", "authorids": "~Yilun_Du1;~Sherry_Yang1;~Pete_Florence1;~Fei_Xia1;~Ayzaan_Wahid1;~brian_ichter1;~Pierre_Sermanet1;~Tianhe_Yu1;~Pieter_Abbeel2;~Joshua_B._Tenenbaum1;~Leslie_Pack_Kaelbling1;~Andy_Zeng3;~Jonathan_Tompson1", "gender": ";F;;M;M;;;M;M;;F;M;M", "homepage": "https://yilundu.github.io;https://sherryy.github.io;http://www.peteflorence.com/;;https://ayzaan.com;;https://sermanet.github.io/;https://cs.stanford.edu/~tianheyu/;https://people.eecs.berkeley.edu/~pabbeel/;;http://people.csail.mit.edu/lpk/;http://jonathantompson.com;http://andyzeng.github.io/", "dblp": "204/4379;;;;;;28/6457;192/1797;;t/JoshuaBTenenbaum;k/LesliePackKaelbling;139/0769;http://dblp.uni-trier.de/pers/hd/z/Zeng:Andy", "google_scholar": ";7c1B_fIAAAAJ;;pqP5_PgAAAAJ;;-w5DuHgAAAAJ;0nPi5YYAAAAJ;;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;;IcasIiwAAAAJ;U_Jw8DUAAAAJ;q7nFtUcAAAAJ", "orcid": ";;;0000-0003-4343-1444;;;;;;;0000-0001-6054-7145;;", "linkedin": ";;;;;;sermanet/;;;;;;", "or_profile": "~Yilun_Du1;~Sherry_Yang1;~Pete_Florence1;~Fei_Xia1;~Ayzaan_Wahid1;~brian_ichter1;~Pierre_Sermanet1;~Tianhe_Yu1;~Pieter_Abbeel2;~Joshua_B._Tenenbaum1;~Leslie_Pack_Kaelbling1;~Jonathan_Tompson1;~Andy_Zeng1", "aff": "Massachusetts Institute of Technology;University of California, Berkeley;Google;Google;Robotics at Google;Google;Google;Google Brain;Covariant;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Google DeepMind;Google", "aff_domain": "mit.edu;berkeley.edu;google.com;google.com;google.com;google.com;google.com;google.com;covariant.ai;mit.edu;mit.edu;google.com;google.com", "position": "PhD student;Student;Research Scientist;Researcher;Software Engineer;Research Scientist;Research Scientist;Research Scientist;Founder;Professor;Full Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\ndu2024video,\ntitle={Video Language Planning},\nauthor={Yilun Du and Sherry Yang and Pete Florence and Fei Xia and Ayzaan Wahid and brian ichter and Pierre Sermanet and Tianhe Yu and Pieter Abbeel and Joshua B. Tenenbaum and Leslie Pack Kaelbling and Andy Zeng and Jonathan Tompson},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9pKtcJcMP3}\n}", "github": "", "project": "", "reviewers": "AYpb;MAiq;4qts;uBLi", "pdf_size": 5468375, "rating": "6;6;8;8", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "contribution": "2;3;4;3", "presentation": "3;4;3;4", "wc_summary": "73;93;83;135", "wc_strengths": "20;66;126;119", "wc_weaknesses": "353;91;88;147", "wc_questions": "70;22;280;137", "wc_review": "516;272;577;538", "wc_reply_reviewers": "274;0;34;0", "wc_reply_authors": "850;367;407;403", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 96.0, 23.600847442411894 ], "wc_strengths_avg": [ 82.75, 43.01961761801237 ], "wc_weaknesses_avg": [ 169.75, 108.37752303868179 ], "wc_questions_avg": [ 127.25, 97.18892683840068 ], "wc_review_avg": [ 475.75, 119.64609270678253 ], "wc_reply_reviewers_avg": [ 77.0, 114.58184847522752 ], "wc_reply_authors_avg": [ 506.75, 198.78678904796465 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -1.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "openreview": "https://openreview.net/forum?id=9pKtcJcMP3", "pdf": "https://openreview.net/pdf?id=9pKtcJcMP3", "email": "mit.edu;berkeley.edu;google.com;google.com;google.com;google.com;google.com;google.com;covariant.ai;mit.edu;mit.edu;google.com;google.com", "author_num": 13, "aff_unique_index": "0;1;2;2;2;2;2;2;3;0;0;2;2", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley;Google;Covariant", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu;https://www.google.com;", "aff_unique_abbr": "MIT;UC Berkeley;Google;", "aff_campus_unique_index": "1;2;2;2;2;2;2;2", "aff_campus_unique": ";Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;2;0", "aff_country_unique": "United States;;United Kingdom" }, { "id": "9pe38WpsbX", "title": "MuDreamer: Learning Predictive World Models without Reconstruction", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The DreamerV3 agent recently demonstrated state-of-the-art performance in diverse domains, learning powerful world models in latent space using a pixel reconstruction loss. However, while the reconstruction loss is essential to Dreamer's performance, it also necessitates modeling unnecessary information. Consequently, Dreamer sometimes fails to perceive crucial elements which are necessary for task-solving, significantly limiting its potential. In this paper, we present MuDreamer, a reinforcement learning agent that builds upon the DreamerV3 algorithm by learning a predictive world model without the need for reconstructing input signals. Rather than relying on pixel reconstruction, hidden representations are instead learned by predicting the environment value function and previously selected actions. Similar to predictive self-supervised methods for images, we find that the use of batch normalization is crucial to prevent learning collapse. We also study the effect of KL balancing between model posterior and prior losses on convergence speed and learning stability. We evaluate MuDreamer on the widely used DeepMind Visual Control Suite and achieves performance comparable to DreamerV3. MuDreamer also demonstrates promising results on the Atari100k benchmark. Research code will be made available publicly.", "keywords": "Model-Based Reinforcement Learning;Dreamer;Reconstruction-Free", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/c218d7d4916d4f567814e4bbc29379c6061b3190.pdf", "author": "Maxime Burchi;Radu Timofte", "authorids": "~Maxime_Burchi1;~Radu_Timofte1", "gender": ";M", "homepage": "https://burchim.github.io/;https://www.informatik.uni-wuerzburg.de/computervision/", "dblp": "302/0270;24/8616", "google_scholar": "7S_l2eAAAAAJ;https://scholar.google.ch/citations?user=u3MwH5kAAAAJ", "orcid": ";0000-0002-1478-0402", "linkedin": ";https://ch.linkedin.com/in/radutimofte", "or_profile": "~Maxime_Burchi1;~Radu_Timofte1", "aff": "Bayerische Julius-Maximilians-Universit\u00e4t W\u00fcrzburg;Bayerische Julius-Maximilians-Universit\u00e4t W\u00fcrzburg", "aff_domain": "uni-wuerzburg.de;uni-wuerzburg.de", "position": "PhD student;Full Professor", "bibtex": "@misc{\nburchi2024mudreamer,\ntitle={MuDreamer: Learning Predictive World Models without Reconstruction},\nauthor={Maxime Burchi and Radu Timofte},\nyear={2024},\nurl={https://openreview.net/forum?id=9pe38WpsbX}\n}", "github": "", "project": "", "reviewers": "QumB;5xED;8AeP", "site": "https://openreview.net/forum?id=9pe38WpsbX", "pdf_size": 3319017, "rating": "3;5;5", "confidence": "5;3;4", "soundness": "2;3;3", "contribution": "2;2;1", "presentation": "2;3;4", "wc_summary": "65;60;85", "wc_strengths": "58;34;76", "wc_weaknesses": "168;224;217", "wc_questions": "17;5;159", "wc_review": "308;323;537", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "282;615;593", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 70.0, 10.801234497346433 ], "wc_strengths_avg": [ 56.0, 17.204650534085253 ], "wc_weaknesses_avg": [ 203.0, 24.91318258807306 ], "wc_questions_avg": [ 60.333333333333336, 69.93965653015144 ], "wc_review_avg": [ 389.3333333333333, 104.59551721857979 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 496.6666666666667, 152.05773757213265 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7866069089508150540&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of W\u00fcrzburg", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-wuerzburg.de", "aff_unique_abbr": "JMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "W\u00fcrzburg", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "9qtswuW5ux", "title": "Unsupervised graph neural networks with recurrent features for solving combinatorial optimization problems", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, graph neural networks (GNNs) have gained considerable attention as a promising approach to tackle combinatorial optimization problems.\nWe introduce a novel algorithm, dubbed QRF-GNN in the following, that leverages the power of GNNs to efficiently solve combinatorial problems which have quadratic unconstrained binary optimization (QUBO) formulation.\nIt relies on unsupervised learning and minimizes the loss function derived from QUBO relaxation.\nThe key components of the architecture are the recurrent use of intermediate GNN predictions, parallel convolutional layers and combination of artificial node features as input.\nThe performance of the algorithm was evaluated on benchmark datasets for maximum cut and graph coloring problems.\nResults of experiments show that QRF-GNN surpasses existing graph neural network based approaches and is comparable to the state-of-the-art conventional heuristics.", "keywords": "graph neural networks;combinatorial optimization;recurrent neural networks;maximum cut problem;graph coloring problem", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Daria Pugacheva;Yuriy Zotov;Andrei Ermakov;Igor Lyskov", "authorids": "~Daria_Pugacheva1;~Yuriy_Zotov1;~Andrei_Ermakov1;~Igor_Lyskov1", "gender": "F;M;M;M", "homepage": ";;;", "dblp": "383/6092;;;", "google_scholar": "https://scholar.google.ru/citations?hl=ru;;;", "orcid": "0000-0002-4285-1001;;;0000-0002-6111-2060", "linkedin": ";https://linkedin.com/in/yura-zotov-75a84193/;andrey-ermakov-8a2848202;", "or_profile": "~Daria_Pugacheva1;~Yuriy_Zotov1;~Andrei_Ermakov1;~Igor_Lyskov1", "aff": "AIRI;;National Research University Higher School of Economics;", "aff_domain": "airi.net;;edu.hse;", "position": "Researcher;;MS student;", "bibtex": "@misc{\npugacheva2024unsupervised,\ntitle={Unsupervised graph neural networks with recurrent features for solving combinatorial optimization problems},\nauthor={Daria Pugacheva and Yuriy Zotov and Andrei Ermakov and Igor Lyskov},\nyear={2024},\nurl={https://openreview.net/forum?id=9qtswuW5ux}\n}", "github": "", "project": "", "reviewers": "VZaD;oXir;vpKg;Nnir", "site": "https://openreview.net/forum?id=9qtswuW5ux", "pdf_size": 689549, "rating": "3;3;5;6", "confidence": "4;4;2;2", "soundness": "2;2;3;2", "contribution": "2;2;2;2", "presentation": "2;3;3;2", "wc_summary": "101;31;73;54", "wc_strengths": "40;37;42;78", "wc_weaknesses": "219;94;4;21", "wc_questions": "230;5;192;13", "wc_review": "590;167;311;166", "wc_reply_reviewers": "111;107;192;18", "wc_reply_authors": "2789;887;1153;204", "reply_reviewers": "1;1;2;1", "reply_authors": "6;3;2;2", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 64.75, 25.674647027758727 ], "wc_strengths_avg": [ 49.25, 16.69393602479655 ], "wc_weaknesses_avg": [ 84.5, 84.69504117715512 ], "wc_questions_avg": [ 110.0, 101.92889678594584 ], "wc_review_avg": [ 308.5, 172.89953730418137 ], "wc_reply_reviewers_avg": [ 107.0, 61.567036634874675 ], "wc_reply_authors_avg": [ 1258.25, 949.1499815624504 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OND0PEK0xVIJ:scholar.google.com/&scioq=Unsupervised+graph+neural+networks+with+recurrent+features+for+solving+combinatorial+optimization+problems&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Artificial Intelligence Research Institute;National Research University Higher School of Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.airi.jp;https://hse.ru", "aff_unique_abbr": "AIRI;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Japan;Russian Federation" }, { "title": "Domain-Agnostic Molecular Generation with Chemical Feedback", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19281", "id": "9rPyHyjfwP", "author_site": "Yin Fang, Ningyu Zhang, Zhuo Chen, Lingbing Guo, Xiaohui Fan, Huajun Chen", "tldr": "", "abstract": "The generation of molecules with desired properties has become increasingly popular, revolutionizing the way scientists design molecular structures and providing valuable support for chemical and drug design. However, despite the potential of language models in molecule generation, they face challenges such as generating syntactically or chemically flawed molecules, having narrow domain focus, and struggling to create diverse and feasible molecules due to limited annotated data or external molecular databases.\nTo tackle these challenges, we introduce MolGen, a pre-trained molecular language model tailored specifically for molecule generation. Through the reconstruction of over 100 million molecular SELFIES, MolGen internalizes structural and grammatical insights. This is further enhanced by domain-agnostic molecular prefix tuning, fostering robust knowledge transfer across diverse domains. Importantly, our chemical feedback paradigm steers the model away from \"molecular hallucinations\", ensuring alignment between the model's estimated probabilities and real-world chemical preferences. Extensive experiments on well-known benchmarks underscore MolGen's optimization capabilities in properties such as penalized logP, QED, and molecular docking. Additional analyses confirm its proficiency in accurately capturing molecule distributions, discerning intricate structural patterns, and efficiently exploring the chemical space (https://github.com/zjunlp/MolGen).", "keywords": "molecule generation;pre-trained language models;SELFIES;natural products;self-feedback", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/3227006229cb78a850fdd90e59f6767a967b5a45.zip", "author": "Yin Fang;Ningyu Zhang;Zhuo Chen;Lingbing Guo;Xiaohui Fan;Huajun Chen", "authorids": "~Yin_Fang1;~Ningyu_Zhang1;~Zhuo_Chen3;~Lingbing_Guo1;~Xiaohui_Fan1;~Huajun_Chen1", "gender": "F;M;;M;;M", "homepage": "https://github.com/Fangyinfff;https://person.zju.edu.cn/en/ningyu;;https://guolingbing.github.io/;https://person.zju.edu.cn/en/fanxh;", "dblp": "231/7716;139/4181-1.html;;228/2586;133/8797;94/5089", "google_scholar": "4rWspjsAAAAJ;xQDOPvsAAAAJ;;og4v8cMAAAAJ;;", "orcid": "0000-0001-9538-848X;0000-0002-1970-0678;;;0000-0002-6336-3007;", "linkedin": ";ningyuzhang/;;;;", "or_profile": "~Yin_Fang1;~Ningyu_Zhang1;~Zhuo_Chen3;~Lingbing_Guo1;~Xiaohui_Fan1;~Huajun_Chen1", "aff": "Zhejiang University;Zhejiang University;;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Associate Professor;;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfang2024domainagnostic,\ntitle={Domain-Agnostic Molecular Generation with Chemical Feedback},\nauthor={Yin Fang and Ningyu Zhang and Zhuo Chen and Lingbing Guo and Xiaohui Fan and Huajun Chen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9rPyHyjfwP}\n}", "github": "", "project": "", "reviewers": "Hnvb;Vj81;13gf;A1g6", "pdf_size": 9422963, "rating": "6;6;8;8", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "1;3;3;3", "wc_summary": "80;63;91;116", "wc_strengths": "28;67;35;89", "wc_weaknesses": "578;143;256;11", "wc_questions": "23;145;107;2", "wc_review": "709;418;489;218", "wc_reply_reviewers": "157;14;20;0", "wc_reply_authors": "1266;838;932;113", "reply_reviewers": "2;1;1;0", "reply_authors": "4;3;3;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 87.5, 19.241881404893856 ], "wc_strengths_avg": [ 54.75, 24.641174890820444 ], "wc_weaknesses_avg": [ 247.0, 209.8535203421663 ], "wc_questions_avg": [ 69.25, 58.789348525051714 ], "wc_review_avg": [ 458.5, 175.47150765865095 ], "wc_reply_reviewers_avg": [ 47.75, 63.49163330707441 ], "wc_reply_authors_avg": [ 787.25, 420.51835572302906 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4693474697199891995&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=9rPyHyjfwP", "pdf": "https://openreview.net/pdf?id=9rPyHyjfwP", "email": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9rV9cp7KRH", "title": "Incentivized Collaborative Learning: Architectural Design and Insights", "track": "main", "status": "Reject", "tldr": "", "abstract": "Collaborations among various entities, such as companies, research labs, AI agents, and edge devices, have become increasingly crucial for achieving machine learning tasks that cannot be accomplished by a single entity alone. This is likely due to factors such as security constraints, privacy concerns, and limitations in computation resources. As a result, collaborative learning (CL) research has been gaining momentum. However, a significant challenge in practical applications of CL is how to effectively incentivize multiple entities to collaborate before any collaboration occurs. In this study, we propose ICL, an architectural framework for incentivized collaborative learning, and provide insights into the critical issue of when and why incentives can improve collaboration performance. Then, we apply the concepts of ICL to specific use cases in federated learning, assisted learning, and multi-armed bandit, corroborated with both theoretical and experimental results.", "keywords": "collaborative learning;incentive;modeling", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/7704433bb6b4da5ea843eae61b49570b845eac49.zip", "author": "Xinran Wang;Qi Le;Ahmad Faraz Khan;Jie Ding;Ali Anwar", "authorids": "~Xinran_Wang3;~Qi_Le1;~Ahmad_Faraz_Khan1;~Jie_Ding2;~Ali_Anwar1", "gender": "F;M;M;M;M", "homepage": "https://wang8740.github.io;https://www.linkedin.com/in/qi-le-60a8811a0/;https://afkd98.github.io/;http://jding.org;https://chalianwar.github.io/", "dblp": ";;;94/1825-2;69/9027-1", "google_scholar": "u8gID6EAAAAJ;;VjGylKsAAAAJ;ZyqvoqcAAAAJ;o3eOVbgAAAAJ", "orcid": ";;0009-0009-3867-5656;;", "linkedin": "wang-xinran;;ahmadfarazkhandurrani/;;", "or_profile": "~Xinran_Wang3;~Qi_Le1;~Ahmad_Faraz_Khan1;~Jie_Ding2;~Ali_Anwar1", "aff": "University of Minnesota - Twin Cities;University of Minnesota - Twin Cities;Virginia Polytechnic Institute and State University;University of Minnesota - Twin Cities;University of Minnesota", "aff_domain": "umn.edu;umn.edu;vt.edu;umn.edu;umn.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\nwang2024incentivized,\ntitle={Incentivized Collaborative Learning: Architectural Design and Insights},\nauthor={Xinran Wang and Qi Le and Ahmad Faraz Khan and Jie Ding and Ali Anwar},\nyear={2024},\nurl={https://openreview.net/forum?id=9rV9cp7KRH}\n}", "github": "", "project": "", "reviewers": "npuS;R699;cras", "site": "https://openreview.net/forum?id=9rV9cp7KRH", "pdf_size": 9443415, "rating": "3;5;5", "confidence": "4;4;4", "soundness": "2;3;2", "contribution": "2;2;2", "presentation": "2;2;1", "wc_summary": "96;99;76", "wc_strengths": "77;67;59", "wc_weaknesses": "355;52;153", "wc_questions": "3;562;39", "wc_review": "531;780;327", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "917;1837;537", "reply_reviewers": "0;0;0", "reply_authors": "2;3;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 90.33333333333333, 10.208928554075703 ], "wc_strengths_avg": [ 67.66666666666667, 7.363574011458175 ], "wc_weaknesses_avg": [ 186.66666666666666, 125.96913202138936 ], "wc_questions_avg": [ 201.33333333333334, 255.4529745808849 ], "wc_review_avg": [ 546.0, 185.24038436582882 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1097.0, 545.7716250105594 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:z3pPZleR0L8J:scholar.google.com/&scioq=Incentivized+Collaborative+Learning:+Architectural+Design+and+Insights&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Minnesota;Virginia Tech", "aff_unique_dep": ";", "aff_unique_url": "https://www.minnesota.edu;https://www.vt.edu", "aff_unique_abbr": "UMN;VT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Twin Cities;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9rXBGpLMxV", "title": "xMLP: Revolutionizing Private Inference with Exclusive Square Activation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Private Inference (PI) enables deep neural networks (DNNs) to work on private data without leaking sensitive information by exploiting cryptographic primitives such as multi-party computation (MPC) and homomorphic encryption (HE).\nHowever, the use of non-linear activations such as ReLU in DNNs can lead to impractically high PI latency in existing PI systems, as ReLU requires the use of costly MPC computations, such as Garbled Circuits.\nSince square activations can be processed by Beaver's triples hundreds of times faster compared to ReLU, they are more friendly to PI tasks, but using them leads to a notable drop in model accuracy.\nThis paper starts by exploring the reason for such an accuracy drop after using square activations, and concludes that this is due to an ``information compounding\u2019\u2019 effect. Leveraging this insight, we propose xMLP, a novel DNN architecture that uses square activations exclusively while maintaining parity in both accuracy and efficiency with ReLU-based DNNs. \nOur experiments on CIFAR-100 and ImageNet show that xMLP models consistently achieve better performance than ResNet models with fewer activation layers and parameters while maintaining consistent performance with its ReLU-based variants.\nRemarkably, when compared to state-of-the-art PI Models, xMLP demonstrates superior performance, achieving a 0.58\\% increase in accuracy with 7$\\times$ faster PI speed. Moreover, it delivers a significant accuracy improvement of 4.96\\% while maintaining the same PI latency.\nWhen offloading PI to the GPU, xMLP is up to 700$\\times$ faster than the previous state-of-the-art PI model with comparable accuracy.", "keywords": "Privacy Preserving Machine Learning;Private Inference;Multi-Party Computation;Deep Learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Jiajie Li;Jinjun Xiong", "authorids": "~Jiajie_Li2;~Jinjun_Xiong1", "gender": "M;", "homepage": "https://jiajie.li;https://www.xlab-ub.com", "dblp": ";81/1130", "google_scholar": "oMCzOmoAAAAJ;tRt1xPYAAAAJ", "orcid": ";0000-0002-2620-4859", "linkedin": "li-jia-jie/;jinjun-xiong-314774/", "or_profile": "~Jiajie_Li2;~Jinjun_Xiong1", "aff": "State University of New York at Buffalo;State University of New York at Buffalo", "aff_domain": "buffalo.edu;buffalo.edu", "position": "PhD student;Professor", "bibtex": "@misc{\nli2024xmlp,\ntitle={x{MLP}: Revolutionizing Private Inference with Exclusive Square Activation},\nauthor={Jiajie Li and Jinjun Xiong},\nyear={2024},\nurl={https://openreview.net/forum?id=9rXBGpLMxV}\n}", "github": "", "project": "", "reviewers": "Bz3S;9z1L;gNjC;TxCL", "site": "https://openreview.net/forum?id=9rXBGpLMxV", "pdf_size": 758534, "rating": "3;3;5;5", "confidence": "5;5;3;4", "soundness": "2;3;2;2", "contribution": "2;2;2;1", "presentation": "3;3;3;3", "wc_summary": "52;71;100;38", "wc_strengths": "11;111;89;25", "wc_weaknesses": "260;600;70;119", "wc_questions": "5;42;3;46", "wc_review": "328;824;262;228", "wc_reply_reviewers": "93;170;114;0", "wc_reply_authors": "314;722;356;372", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;3;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.25, 23.23117517475171 ], "wc_strengths_avg": [ 59.0, 42.02380277890139 ], "wc_weaknesses_avg": [ 262.25, 207.0994628191971 ], "wc_questions_avg": [ 24.0, 20.062402647738878 ], "wc_review_avg": [ 410.5, 241.42648984732392 ], "wc_reply_reviewers_avg": [ 94.25, 61.26326386995717 ], "wc_reply_authors_avg": [ 441.0, 163.61234672236688 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hBmZs37tiWMJ:scholar.google.com/&scioq=xMLP:+Revolutionizing+Private+Inference+with+Exclusive+Square+Activation&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "State University of New York at Buffalo", "aff_unique_dep": "", "aff_unique_url": "https://www.buffalo.edu", "aff_unique_abbr": "SUNY Buffalo", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Buffalo", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "9rzEPbs4Wg", "title": "Improving Generalization and Safety of Deep Neural Networks with Masked Anchoring", "track": "main", "status": "Reject", "tldr": "", "abstract": "Anchoring is a recent architecture and task-agnostic technique that can produce state-of-the-art epistemic uncertainty estimates, and improve extrapolation capabilities. However, the differences between anchored models and non-anchored variants is not well studied -- as there is little insight into the kinds of functions anchoring induces and how they behave under distribution shifts. In this paper, we analyze and improve anchoring as a training protocol for deep neural networks, evaluating them on important tasks of out of distribution generalization, task adaptation, anomaly detection and calibration. We pinpoint the impact of anchoring on generalization as being inversely related to the sensitivity of the model to the distribution of residuals. We further improve this sensitivity using a new technique called Random Anchor Masking (RAM) that significantly improves the quality of anchored models. We build evidence for the superiority of RAM-training using a range of benchmarks of varying size, using neural networks of varying complexity and scale.", "keywords": "Anomaly Detection;OOD Generalization;ML Safety;Anchoring;Deep Neural Networks", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/ba233f79a4e1de1dd6a5cc117e4384bbbffd0376.pdf", "author": "Vivek Narayanaswamy;Kowshik Thopalli;Rushil Anirudh;Jayaraman J. Thiagarajan", "authorids": "~Vivek_Narayanaswamy1;~Kowshik_Thopalli1;~Rushil_Anirudh1;~Jayaraman_J._Thiagarajan3", "gender": "M;M;M;M", "homepage": ";https://kowshikthopalli.github.io/;https://rushila.com/;https://jjthiagarajan.com", "dblp": "230/4531;224/0052;136/5391;16/7803", "google_scholar": "7h2Ui6YAAAAJ;https://scholar.google.com/citations?hl=en;WkoIlpQAAAAJ;cMz65_oAAAAJ", "orcid": ";;0000-0002-4186-3502;", "linkedin": ";;rushilanirudh/;", "or_profile": "~Vivek_Narayanaswamy1;~Kowshik_Thopalli1;~Rushil_Anirudh1;~Jayaraman_J._Thiagarajan2", "aff": "Lawrence Livermore National Labs;Lawrence Livermore National Labs;Amazon;Lawrence Livermore National Labs", "aff_domain": "llnl.gov;llnl.gov;amazon.com;llnl.gov", "position": "Researcher;Postdoc;Applied Scientist;Computer Scientist", "bibtex": "@misc{\nnarayanaswamy2024improving,\ntitle={Improving Generalization and Safety of Deep Neural Networks with Masked Anchoring},\nauthor={Vivek Narayanaswamy and Kowshik Thopalli and Rushil Anirudh and Jayaraman J. Thiagarajan},\nyear={2024},\nurl={https://openreview.net/forum?id=9rzEPbs4Wg}\n}", "github": "", "project": "", "reviewers": "auhR;8fzY;GTiz;rVkq;BVnY;LRUp", "site": "https://openreview.net/forum?id=9rzEPbs4Wg", "pdf_size": 794700, "rating": "5;6;6;6;6;8", "confidence": "4;3;4;3;3;3", "soundness": "2;3;3;3;3;4", "contribution": "2;3;3;3;3;4", "presentation": "2;3;3;3;3;3", "wc_summary": "73;33;94;102;59;355", "wc_strengths": "81;31;94;71;37;239", "wc_weaknesses": "323;36;92;207;81;367", "wc_questions": "44;8;5;5;66;238", "wc_review": "521;108;285;385;243;1199", "wc_reply_reviewers": "404;0;23;120;75;58", "wc_reply_authors": "792;575;740;1285;783;714", "reply_reviewers": "1;0;1;1;2;1", "reply_authors": "2;2;2;3;2;2", "rating_avg": [ 6.166666666666667, 0.8975274678557507 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "contribution_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 119.33333333333333, 107.79713457333744 ], "wc_strengths_avg": [ 92.16666666666667, 69.42722008613687 ], "wc_weaknesses_avg": [ 184.33333333333334, 125.3763224146498 ], "wc_questions_avg": [ 61.0, 82.38527376499597 ], "wc_review_avg": [ 456.8333333333333, 355.2053005735635 ], "wc_reply_reviewers_avg": [ 113.33333333333333, 135.44207453947075 ], "wc_reply_authors_avg": [ 814.8333333333334, 222.06561843042903 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 2.1666666666666665, 0.3726779962499649 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5252257314388904, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Umc_zKTaF2kJ:scholar.google.com/&scioq=Improving+Generalization+and+Safety+of+Deep+Neural+Networks+with+Masked+Anchoring&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Lawrence Livermore National Laboratory;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.llnl.gov;https://www.amazon.com", "aff_unique_abbr": "LLNL;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "9tNhV5kTSc", "title": "How do agents invest strategically under persistent improvement?", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper studies algorithmic decision-making under human's strategic behavior, where a decision-maker uses an algorithm to make decisions about human agents, and the latter with information about the algorithm may exert effort strategically and improve to receive favorable decisions. Unlike prior works that assume agents benefit from their efforts immediately, we consider realistic scenarios where the impacts of these efforts are persistent and agents benefit from efforts by making improvements gradually. However, the agent's utility also diminishes as time goes on. We first develop a dynamic model to characterize persistent improvements and based on this construct a Stackelberg game to model the interplay between agents and the decision-maker. We analytically characterize the equilibrium strategies and identify conditions under which agents have incentives to improve. With the dynamics, we then study how the decision-maker can design an optimal policy to incentivize the largest improvements inside the agent population. We also extend the model to settings where (1) agents may be dishonest and game the algorithm into making favorable but erroneous decisions; (2) honest efforts are forgettable and not sufficient to guarantee persistent improvements. With the extended models, we further examine conditions under which agents prefer honest efforts over dishonest behavior and the impacts of forgettable efforts.", "keywords": "Strategic Classification;Stackelberg Game;Population Dynamics", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/ee27e133776715e834b3a5e5aab05b904ae56f69.zip", "author": "Tian Xie;Xuwei Tan;Xueru Zhang", "authorids": "~Tian_Xie4;~Xuwei_Tan1;~Xueru_Zhang2", "gender": "M;M;F", "homepage": "https://www.linkedin.com/in/tianxie1999/;https://engineering.osu.edu/people/tan.1206;https://xueruzhang.github.io/", "dblp": ";308/6151;", "google_scholar": ";IpTmmIYAAAAJ;PNBO_a4AAAAJ", "orcid": ";;", "linkedin": "tianxie1999/;;", "or_profile": "~Tian_Xie4;~Xuwei_Tan1;~Xueru_Zhang2", "aff": "Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University", "aff_domain": "osu.edu;osu.edu;osu.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nxie2024how,\ntitle={How do agents invest strategically under persistent improvement?},\nauthor={Tian Xie and Xuwei Tan and Xueru Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=9tNhV5kTSc}\n}", "github": "", "project": "", "reviewers": "6GuR;m2xs;gzfu", "site": "https://openreview.net/forum?id=9tNhV5kTSc", "pdf_size": 386779, "rating": "1;5;5", "confidence": "3;5;2", "soundness": "1;3;2", "contribution": "1;2;2", "presentation": "2;3;3", "wc_summary": "208;79;52", "wc_strengths": "50;73;47", "wc_weaknesses": "341;206;94", "wc_questions": "52;84;25", "wc_review": "651;442;218", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 113.0, 68.07348970047003 ], "wc_strengths_avg": [ 56.666666666666664, 11.61416759345623 ], "wc_weaknesses_avg": [ 213.66666666666666, 100.98294685517727 ], "wc_questions_avg": [ 53.666666666666664, 24.115462996914562 ], "wc_review_avg": [ 437.0, 176.80686261190957 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:u8ubvAys_KkJ:scholar.google.com/&scioq=How+do+agents+invest+strategically+under+persistent+improvement%3F&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "9tQfBNxX16", "title": "Towards efficient deep spiking neural networks construction with spiking activity based pruning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Spiking neural networks (SNNs) drawing inspiration from the biological nervous system possess the distinctive advantage of being biologically interpretable and energy-efficient. In recent years, there has been a rise in deep and large-scale SNNs structures that exhibit high performance across various complex datasets. However, within these structures, a significant number of redundant structural units are often present, compelling the need to compress the network models of SNNs to more effectively harness their low-power advantage. Currently, most model compression techniques for SNNs are based on unstructured pruning of individual connections, which requires specific hardware support. Receptive field cells in the biological visual system have influenced a crucial concept in deep learning: convolutional kernels. Hence, we propose a structured pruning approach based on the activity levels of convolutional kernels named Spiking Channel Activity-based (SCA) network pruning framework. Inspired by synaptic plasticity mechanisms, our method dynamically adjusts the network's structure by pruning and regenerating convolutional kernels during training, enhancing the model's adaptation to the current target task. While maintaining model performance, this approach refines the network architecture, ultimately reducing computational load and accelerating the inference process. We conducted experiments on static datasets including CIFAR10, CIFAR100 and DVS-CIFAR10. Experimental results demonstrate that this method incurs only about 2% accuracy loss while retaining 20% of the channels. This indicates that structured dynamic sparse learning methods can better facilitate the application of deep SNNs in low-power and high-efficiency scenarios.", "keywords": "spiking neural networks;pruning;network structure;power consumption", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/d51c7b629ced13c4a3eaf3b2cf04a3420803818d.zip", "author": "Yaxin Li;Jiangrong Shen;Hongming Xu;Long Chen;Gang Pan;Qiang Zhang;Qi Xu", "authorids": "~Yaxin_Li4;~Jiangrong_Shen1;~Hongming_Xu3;~Long_Chen18;~Gang_Pan1;~Qiang_Zhang13;~Qi_Xu1", "gender": ";F;M;M;;M;M", "homepage": ";;https://xhm1014.github.io/index.html;https://iris.ucl.ac.uk/iris/browse/profile?upi=LCHEI54;;https://faculty.dlut.edu.cn/2017022144/en/index.htm;https://www.researchgate.net/profile/Qi_Xu43", "dblp": "143/0251-3;208/3564;150/7585-2;64/5725-19.html;;72/3527-8;", "google_scholar": ";3XK6COkAAAAJ;nErn9W8AAAAJ;J_v0xb8AAAAJ;;https://scholar.google.de/citations?hl=de;dGEcAuYAAAAJ", "orcid": "0000-0003-0160-8950;;0000-0002-1305-0010;0000-0001-8552-859X;;0000-0003-0609-0337;0000-0001-9245-5544", "linkedin": ";;;;;;", "or_profile": "~Yaxin_Li4;~Jiangrong_Shen1;~Hongming_Xu3;~Long_Chen18;~Gang_Pan1;~Qiang_Zhang13;~Qi_Xu1", "aff": "Dalian University of Technology;Zhejiang University;Dalian University of Technology;Imperial College London;;Dalian University of Technology;School of Computer Science and Technology", "aff_domain": "dlut.edu.cn;zju.edu.cn;dlut.edu.cn;ic.ac.uk;;dlut.edu.cn;dlut.edu.cn", "position": "MS student;Postdoc;Associate Professor;Postdoc;;Full Professor;Associate Professor", "bibtex": "@misc{\nli2024towards,\ntitle={Towards efficient deep spiking neural networks construction with spiking activity based pruning},\nauthor={Yaxin Li and Jiangrong Shen and Hongming Xu and Long Chen and Gang Pan and Qiang Zhang and Qi Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=9tQfBNxX16}\n}", "github": "", "project": "", "reviewers": "AYqQ;JMPU;vHij;zKkP", "site": "https://openreview.net/forum?id=9tQfBNxX16", "pdf_size": 886686, "rating": "3;3;5;5", "confidence": "4;4;5;4", "soundness": "2;3;3;2", "contribution": "2;2;2;2", "presentation": "3;3;4;2", "wc_summary": "75;40;83;70", "wc_strengths": "40;17;53;44", "wc_weaknesses": "196;137;183;115", "wc_questions": "37;289;123;86", "wc_review": "348;483;442;315", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.0, 16.263455967290593 ], "wc_strengths_avg": [ 38.5, 13.275918047351754 ], "wc_weaknesses_avg": [ 157.75, 33.0104150231408 ], "wc_questions_avg": [ 133.75, 94.68203367059667 ], "wc_review_avg": [ 397.0, 68.09184973254875 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8902417123251783381&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;2;0;3", "aff_unique_norm": "Dalian University of Technology;Zhejiang University;Imperial College London;School of Computer Science and Technology", "aff_unique_dep": ";;;Computer Science and Technology", "aff_unique_url": "http://www.dlut.edu.cn/;https://www.zju.edu.cn;https://www.imperial.ac.uk;", "aff_unique_abbr": "DUT;ZJU;ICL;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United Kingdom;" }, { "id": "9ux2cgxw6O", "title": "LOVECon: Text-driven Training-free Long Video Editing with ControlNet", "track": "main", "status": "Reject", "tldr": "", "abstract": "Leveraging pre-trained conditional diffusion models for video editing without further tuning has gained increasing attention due to its promise in film production, advertising, etc. Yet, seminal works in this line fall short in generation length, temporal coherence, or fidelity to the source video. This paper aims to bridge the gap, establishing a simple and effective baseline for training-free diffusion model-based long video editing. As suggested by prior arts, we build the pipeline upon ControlNet, which excels at various image editing tasks based on text prompts. To break down the length constraints caused by limited computational memory, we split the long video into consecutive windows and develop a novel cross-window attention mechanism to ensure the consistency of global style and maximize the smoothness among windows. To achieve more accurate control, we extract the information from the source video via DDIM inversion and integrate the outcomes into the latent feature maps of the generations. We also incorporate a video frame interpolation model to mitigate frame-level flickering issues further. Extensive empirical studies verify the superior efficacy of our method over competing baselines across scenarios, including replacing attributes of foreground objects, style transfer, and background replacement. In particular, our method manages to edit videos with up to 128 frames according to user requirements.", "keywords": "Video editing;Diffusion models;Training-free", "primary_area": "generative models", "supplementary_material": "/attachment/dc950b2a21404113366bc65150acc9194a6c097c.zip", "author": "Zhenyi Liao;Zhijie Deng", "authorids": "~Zhenyi_Liao1;~Zhijie_Deng1", "gender": "M;M", "homepage": "https://github.com/L-Justice1998;https://thudzj.github.io/", "dblp": "359/1044;209/4959", "google_scholar": ";J3dR0sUAAAAJ", "orcid": ";0000-0002-0932-1631", "linkedin": ";", "or_profile": "~Zhenyi_Liao1;~Zhijie_Deng1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;Assistant Professor", "bibtex": "@misc{\nliao2024lovecon,\ntitle={{LOVEC}on: Text-driven Training-free Long Video Editing with ControlNet},\nauthor={Zhenyi Liao and Zhijie Deng},\nyear={2024},\nurl={https://openreview.net/forum?id=9ux2cgxw6O}\n}", "github": "", "project": "", "reviewers": "WhZ8;59Nz;EDK1", "site": "https://openreview.net/forum?id=9ux2cgxw6O", "pdf_size": 20544191, "rating": "5;5;5", "confidence": "5;4;4", "soundness": "3;2;2", "contribution": "2;2;2", "presentation": "3;3;3", "wc_summary": "64;80;92", "wc_strengths": "55;2;67", "wc_weaknesses": "144;89;241", "wc_questions": "69;2;1", "wc_review": "332;173;401", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "533;340;433", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.66666666666667, 11.469767022723502 ], "wc_strengths_avg": [ 41.333333333333336, 28.241026106633512 ], "wc_weaknesses_avg": [ 158.0, 62.838417124134075 ], "wc_questions_avg": [ 24.0, 31.822423959633664 ], "wc_review_avg": [ 302.0, 95.46727187890099 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 435.3333333333333, 78.80919292118712 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9923968201600933195&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "9v5uZPWZoV", "title": "Not Just Pretty Pictures: Toward Interventional Data Augmentation Using Text-to-Image Generators", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural image classifiers are known to undergo severe performance degradation when exposed to inputs that exhibit covariate shift with respect to the training distribution. A general interventional data augmentation (IDA) mechanism that simulates arbitrary interventions over spurious variables has often been conjectured as a theoretical solution to this problem and approximated to varying degrees of success. In this work, we study how well modern Text-to-Image (T2I) generators and associated image editing techniques can solve the problem of IDA. We experiment across a diverse collection of benchmarks in domain generalization, ablating across key dimensions of T2I generation, including interventional prompts, conditioning mechanisms, and post-hoc filtering, showing that it substantially outperforms previously state-of-the-art image augmentation techniques independently of how each dimension is configured. We discuss the comparative advantages of using T2I for image editing versus synthesis, also finding that a simple retrieval baseline presents a surprisingly effective alternative, which raises interesting questions about how generative models should be evaluated in the context of domain generalization.", "keywords": "Text-to-Image Generators;Diffusion Models;Synthetic Data;Distribution Shift;Domain Generalization", "primary_area": "generative models", "supplementary_material": "", "author": "Jianhao Yuan;Francesco Pinto;Adam Davies;Philip Torr", "authorids": "~Jianhao_Yuan2;~Francesco_Pinto1;~Adam_Davies2;~Philip_Torr1", "gender": "M;Non-Binary;;M", "homepage": ";https://ahdavies6.github.io/;http://www.robots.ox.ac.uk/~tvg/;https://yuanjianhao508.github.io/", "dblp": "281/7477;;;", "google_scholar": "rqAdo2MAAAAJ;vqkOH7gAAAAJ;;BUJPCegAAAAJ", "orcid": ";0000-0002-0610-2732;;", "linkedin": "francesco-pinto-42a389b1?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BishkY8oUQ8OTPPeV0SSCdw%3D%3D;adamhdavies/;;", "or_profile": "~Francesco_Pinto1;~Adam_Davies2;~Philip_Torr1;~JIANHAO_YUAN1", "aff": "University of Oxford;University of Illinois, Urbana Champaign;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;illinois.edu;ox.ac.uk;robots.ox.ac.uk", "position": "PhD student;PhD student;Full Professor;PhD student", "bibtex": "@misc{\nyuan2024not,\ntitle={Not Just Pretty Pictures: Toward Interventional Data Augmentation Using Text-to-Image Generators},\nauthor={Jianhao Yuan and Francesco Pinto and Adam Davies and Philip Torr},\nyear={2024},\nurl={https://openreview.net/forum?id=9v5uZPWZoV}\n}", "github": "", "project": "", "reviewers": "3cd6;MWyF;nrcC;bsQg", "site": "https://openreview.net/forum?id=9v5uZPWZoV", "pdf_size": 49054750, "rating": "5;5;6;6", "confidence": "4;4;5;4", "soundness": "3;3;2;2", "contribution": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "218;39;92;118", "wc_strengths": "59;17;94;37", "wc_weaknesses": "146;309;13;11", "wc_questions": "10;82;12;344", "wc_review": "433;447;211;510", "wc_reply_reviewers": "66;71;0;400", "wc_reply_authors": "1186;1173;446;1490", "reply_reviewers": "1;1;0;1", "reply_authors": "2;4;2;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 116.75, 65.0206697904597 ], "wc_strengths_avg": [ 51.75, 28.560243346302215 ], "wc_weaknesses_avg": [ 119.75, 122.19528427889514 ], "wc_questions_avg": [ 112.0, 137.04743704279917 ], "wc_review_avg": [ 400.25, 113.04727993189398 ], "wc_reply_reviewers_avg": [ 134.25, 155.96854650858293 ], "wc_reply_authors_avg": [ 1073.75, 383.9872230947275 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4464747621104770790&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Oxford;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://illinois.edu", "aff_unique_abbr": "Oxford;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "9vZ8UjP2Mz", "title": "Exploring the Generalization Capabilities of AID-based Bi-level Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Bi-level optimization has achieved considerable success in contemporary machine learning applications, especially for given proper hyperparameters. However, due to the two-level optimization structure, commonly, researchers focus on two types of bi-level optimization methods: approximate implicit differentiation (AID)-based and iterative differentiation (ITD)-based approaches. ITD-based methods can be readily transformed into single-level optimization problems, facilitating the study of their generalization capabilities. In contrast, AID-based methods cannot be easily transformed similarly but must stay in the two-level structure, leaving their generalization properties enigmatic. In this paper, although the outer-level function is nonconvex, we ascertain the uniform stability of AID-based methods, which achieves similar results to a single-level nonconvex problem. We conduct a convergence analysis for a carefully chosen step size to maintain stability. Combining the convergence and stability results, we give the generalization ability of AID-based bi-level optimization methods. Furthermore, we carry out an ablation study of the parameters and assess the performance of these methods on real-world tasks. Our experimental results corroborate the theoretical findings, demonstrating the effectiveness and potential applications of these methods.", "keywords": "Generalization; Bi-level Optimization", "primary_area": "learning theory", "supplementary_material": "/attachment/6fdfb91e52c0d916612146d0aeb4faa8622af831.pdf", "author": "Congliang Chen;Li Shen;zhiqiang xu;Wei Liu;Zhi-Quan Luo;Peilin Zhao", "authorids": "~Congliang_Chen1;~Li_Shen1;~zhiqiang_xu1;~Wei_Liu3;~Zhi-Quan_Luo1;~Peilin_Zhao2", "gender": "M;M;M;M;M;", "homepage": ";https://sites.google.com/site/mathshenli/home;https://scholar.google.com/citations?user=0R20iBMAAAAJ&hl=en;https://sites.google.com/view/cuweiliu;;", "dblp": "205/7138;91/3680-8;72/51-3.html;49/3283-5;;84/8411", "google_scholar": "O1P1-EAAAAAJ;yVhgENIAAAAJ;;AjxoEpIAAAAJ;dW3gcXoAAAAJ;https://scholar.google.com.hk/citations?user=HPeX_YcAAAAJ", "orcid": ";;0000-0002-5693-8933;0000-0002-3865-8145;;0000-0001-8543-3953", "linkedin": ";;;;;", "or_profile": "~Congliang_Chen1;~Li_Shen1;~zhiqiang_xu1;~Wei_Liu3;~Zhi-Quan_Luo1;~Peilin_Zhao2", "aff": "The Chinese University of Hong Kong(Shenzhen);JD Explore Academy;Mohamed bin Zayed University of Artificial Intelligence;Tencent;The Chinese University of Hong Kong, Shenzhen;Tencent", "aff_domain": "cuhk.edu.cn;jd.com;mbzuai.ac.ae;tencent.com;cuhk.edu.cn;tencent.com", "position": "PhD student;Researcher;Assistant Professor;Distinguished Scientist;Full Professor;Researcher", "bibtex": "@misc{\nchen2024exploring,\ntitle={Exploring the Generalization Capabilities of {AID}-based Bi-level Optimization},\nauthor={Congliang Chen and Li Shen and zhiqiang xu and Wei Liu and Zhi-Quan Luo and Peilin Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=9vZ8UjP2Mz}\n}", "github": "", "project": "", "reviewers": "FTaq;R6J2;JTQs;CjiH", "site": "https://openreview.net/forum?id=9vZ8UjP2Mz", "pdf_size": 1156955, "rating": "3;3;6;8", "confidence": "3;3;3;3", "soundness": "2;2;3;3", "contribution": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "159;80;35;98", "wc_strengths": "133;48;28;101", "wc_weaknesses": "724;149;109;135", "wc_questions": "6;421;109;71", "wc_review": "1022;698;281;405", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1148;804;203;600", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.0, 44.48033273256845 ], "wc_strengths_avg": [ 77.5, 41.692325432865935 ], "wc_weaknesses_avg": [ 279.25, 257.17734639738393 ], "wc_questions_avg": [ 151.75, 159.75508599102565 ], "wc_review_avg": [ 601.5, 286.12278832696984 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 688.75, 342.0609412078497 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-O-IdThMmvMJ:scholar.google.com/&scioq=Exploring+the+Generalization+Capabilities+of+AID-based+Bi-level+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;0;3", "aff_unique_norm": "Chinese University of Hong Kong;JD;Mohamed bin Zayed University of Artificial Intelligence;Tencent", "aff_unique_dep": ";JD Explore Academy;;Tencent Holdings Limited", "aff_unique_url": "https://www.cuhk.edu.cn;;https://mbzuai.ac.ae;https://www.tencent.com", "aff_unique_abbr": "CUHK;;MBZUAI;Tencent", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;2;0;0;0", "aff_country_unique": "China;;United Arab Emirates" }, { "id": "9vkgAaCI3F", "title": "Balancing Stability and Plasticity in Continual Learning: the readout-decomposition of activation change (RDAC) framework", "track": "main", "status": "Reject", "tldr": "", "abstract": "Continual learning (CL) algorithms strive to equip neural networks with the ability to acquire new knowledge while preserving prior information. However, the stability-plasticity trade-off remains a central challenge in CL. This paper introduces a framework that dissects this trade-off, offering valuable insights into CL algorithms.\nThe framework first addresses the stability-plasticity dilemma and its relation to catastrophic forgetting. It presents the Readout-Decomposition of Activation Change (RDAC) framework that relates learning-induced activation changes in the range of prior readouts to the degree of stability, and changes in the null space to the degree of plasticity. \nIn deep non-linear networks tackling split-CIFAR-110 tasks, the framework was used to explain the stability-plasticity trade-offs of the popular regularization algorithms Synaptic intelligence (SI), Elastic-weight consolidation (EWC), and learning without Forgetting (LwF) and replay based algorithms Gradient episodic memory (GEM), and data replay. GEM and data replay excelled in preserving both stability and plasticity, while SI, EWC, and LwF traded off plasticity for stability. The inability of the regularization algorithms to maintain plasticity was linked to them restricting the change of activations in the null space of the prior readout. For one-hidden-layer linear neural networks, we additionally derived a gradient decomposition algorithm to restrict activation change only in the range of the prior readouts, to maintain high stability while not further sacrificing plasticity. \nResults demonstrate that the algorithm maintains stability without significant plasticity loss.\nThe RDAC framework not only informs the behavior of existing CL algorithms but also paves the way for novel CL approaches. Finally, it sheds light on the connection between learning-induced activation/representation changes and the stability-plasticity dilemma, also offering insights into representational drift in biological systems.", "keywords": "continual learning;stability-plasticity trade-off;representational drift;task-incremental learning;readout misalignment;interpretability", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/56cf2b801fb4c69f3519ab8aa6637ac9c2dffced.zip", "author": "Daniel Anthes;Sushrut Thorat;Peter K\u00f6nig;Tim C Kietzmann", "authorids": "~Daniel_Anthes1;~Sushrut_Thorat1;~Peter_K\u00f6nig1;~Tim_C_Kietzmann1", "gender": ";M;M;M", "homepage": ";https://sushrutthorat.com/;https://www.ikw.uni-osnabrueck.de/en/research_groups/neurobiopsychology.html;https://www.kietzmannlab.org", "dblp": ";169/0786;;", "google_scholar": "YPdEhboAAAAJ;https://scholar.google.it/citations?user=MPFzJQgAAAAJ;Ieubd0EAAAAJ;JXcWFkgAAAAJ", "orcid": ";0000-0003-2276-5621;0000-0003-3654-5267;0000-0001-8076-6062", "linkedin": ";;;", "or_profile": "~Daniel_Anthes1;~Sushrut_Thorat1;~Peter_K\u00f6nig1;~Tim_C_Kietzmann1", "aff": "University of Osnabr\u00fcck;University of Osnabr\u00fcck;Universit\u00e4t Osnabr\u00fcck;Universit\u00e4t Osnabr\u00fcck", "aff_domain": "uos.de;uos.de;uni-osnabrueck.de;uni-osnabrueck.de", "position": "PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@misc{\nanthes2024balancing,\ntitle={Balancing Stability and Plasticity in Continual Learning: the readout-decomposition of activation change ({RDAC}) framework},\nauthor={Daniel Anthes and Sushrut Thorat and Peter K{\\\"o}nig and Tim C Kietzmann},\nyear={2024},\nurl={https://openreview.net/forum?id=9vkgAaCI3F}\n}", "github": "", "project": "", "reviewers": "gbcF;RvDh;a3Rp;djfe", "site": "https://openreview.net/forum?id=9vkgAaCI3F", "pdf_size": 1329158, "rating": "5;5;5;6", "confidence": "5;5;2;1", "soundness": "2;2;3;3", "contribution": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "134;176;69;71", "wc_strengths": "81;79;59;83", "wc_weaknesses": "54;132;324;31", "wc_questions": "104;262;50;4", "wc_review": "373;649;502;189", "wc_reply_reviewers": "0;44;0;0", "wc_reply_authors": "329;577;523;95", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.7853571071357126 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 112.5, 45.0249930594109 ], "wc_strengths_avg": [ 75.5, 9.630680142129112 ], "wc_weaknesses_avg": [ 135.25, 115.22450911156011 ], "wc_questions_avg": [ 105.0, 97.30878685915265 ], "wc_review_avg": [ 428.25, 169.16171996051588 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 381.0, 189.12958520548815 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7276068751089989, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:db2DrzxzUXAJ:scholar.google.com/&scioq=Balancing+Stability+and+Plasticity+in+Continual+Learning:+the+readout-decomposition+of+activation+change+(RDAC)+framework&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Osnabr\u00fcck", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-osnabrueck.de", "aff_unique_abbr": "UOS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Entropy is not Enough for Test-Time Adaptation: From the Perspective of Disentangled Factors", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19280", "id": "9w3iw8wDuE", "author_site": "Jonghyun Lee, Dahuin Jung, Saehyung Lee, Junsung Park, Juhyeon Shin, Uiwon Hwang, Sungroh Yoon", "tldr": "", "abstract": "Test-time adaptation (TTA) fine-tunes pre-trained deep neural networks for unseen test data. The primary challenge of TTA is limited access to the entire test dataset during online updates, causing error accumulation. To mitigate it, TTA methods have utilized the model output's entropy as a confidence metric that aims to determine which samples have a lower likelihood of causing error. Through experimental studies, however, we observed the unreliability of entropy as a confidence metric for TTA under biased scenarios and theoretically revealed that it stems from the neglect of the influence of latent disentangled factors of data on predictions. Building upon these findings, we introduce a novel TTA method named Destroy Your Object (DeYO), which leverages a newly proposed confidence metric named Pseudo-Label Probability Difference (PLPD). PLPD quantifies the influence of the shape of an object on prediction by measuring the difference between predictions before and after applying an object-destructive transformation. DeYO consists of sample selection and sample weighting, which employ entropy and PLPD concurrently. For robust adaptation, DeYO prioritizes samples that dominantly incorporate shape information when making predictions. Our extensive experiments demonstrate the consistent superiority of DeYO over baseline methods across various scenarios, including biased and wild. Project page is publicly available at https://whitesnowdrop.github.io/DeYO/.", "keywords": "Test-time adaptation;Roustness", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Jonghyun Lee;Dahuin Jung;Saehyung Lee;Junsung Park;Juhyeon Shin;Uiwon Hwang;Sungroh Yoon", "authorids": "~Jonghyun_Lee1;~Dahuin_Jung2;~Saehyung_Lee1;~Junsung_Park1;~Juhyeon_Shin1;~Uiwon_Hwang1;~Sungroh_Yoon1", "gender": "M;F;M;M;F;M;", "homepage": ";https://hai.ssu.ac.kr/;;http://data.snu.ac.kr/;https://github.com/newjh12;https://sites.google.com/view/uiwon-hwang;http://ailab.snu.ac.kr", "dblp": ";224/0158;260/0442;;;207/8512;99/1474", "google_scholar": ";https://scholar.google.co.kr/citations?user=wleS-UQAAAAJ;nS24h74AAAAJ;;;https://scholar.google.co.kr/citations?user=CJ8-pGIAAAAJ;Bphl_fIAAAAJ", "orcid": ";;;;;0000-0001-5054-2236;0000-0002-2367-197X", "linkedin": "jonghyun-lee-0886061a3/;;;;;uiwon-hwang/;", "or_profile": "~Jonghyun_Lee1;~Dahuin_Jung2;~Saehyung_Lee1;~Junsung_Park1;~Juhyeon_Shin1;~Uiwon_Hwang1;~Sungroh_Yoon1", "aff": "Seoul National University;Seoul National University;Adobe Systems;Seoul National University;Seoul National University;Yonsei University - Mirae Campus;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;adobe.com;snu.ac.kr;snu.ac.kr;yonsei.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Intern;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlee2024entropy,\ntitle={Entropy is not Enough for Test-Time Adaptation: From the Perspective of Disentangled Factors},\nauthor={Jonghyun Lee and Dahuin Jung and Saehyung Lee and Junsung Park and Juhyeon Shin and Uiwon Hwang and Sungroh Yoon},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9w3iw8wDuE}\n}", "github": "", "project": "", "reviewers": "7TAy;xMUg;NrgV;M673", "pdf_size": 1515837, "rating": "6;6;8;8", "confidence": "3;5;5;4", "soundness": "3;3;2;4", "contribution": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "55;38;100;71", "wc_strengths": "46;70;16;57", "wc_weaknesses": "107;44;494;74", "wc_questions": "86;13;10;4", "wc_review": "294;165;620;206", "wc_reply_reviewers": "14;5;274;0", "wc_reply_authors": "1452;612;2861;1179", "reply_reviewers": "1;1;2;0", "reply_authors": "4;3;6;4", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.0, 22.83637449333847 ], "wc_strengths_avg": [ 47.25, 19.942103700462496 ], "wc_weaknesses_avg": [ 179.75, 182.79547997694036 ], "wc_questions_avg": [ 28.25, 33.49906715119094 ], "wc_review_avg": [ 321.25, 178.66921251295648 ], "wc_reply_reviewers_avg": [ 73.25, 116.01158347337562 ], "wc_reply_authors_avg": [ 1526.0, 828.1766115992409 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.25, 1.0897247358851685 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4357779222588905269&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=9w3iw8wDuE", "pdf": "https://openreview.net/pdf?id=9w3iw8wDuE", "email": "snu.ac.kr;snu.ac.kr;adobe.com;snu.ac.kr;snu.ac.kr;yonsei.ac.kr;snu.ac.kr", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2;0", "aff_unique_norm": "Seoul National University;Adobe;Yonsei University", "aff_unique_dep": ";Adobe Systems Incorporated;", "aff_unique_url": "https://www.snu.ac.kr;https://www.adobe.com;https://www.yonsei.ac.kr", "aff_unique_abbr": "SNU;Adobe;Yonsei", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mirae", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "South Korea;United States" }, { "id": "9wSWiavGwU", "title": "SwapTransformer: Highway Overtaking Tactical Planner Model via Imitation Learning on OSHA Dataset", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper investigates the high-level decision-making problem in highway scenarios regarding lane changing and over-taking other slower vehicles. In particular, this paper aims to improve the Travel Assist feature for automatic overtaking and lane changes on highways. About 9 million samples including lane images and other dynamic objects are collected in simulation. This data; Overtaking on Simulated HighwAys (OSHA) dataset is released to tackle this challenge. To solve this problem, an architecture called SwapTransformer is designed and implemented as an imitation learning approach on the OSHA dataset. Moreover, auxiliary tasks such as future points and car distance network predictions are proposed to aid the model in better understanding the surrounding environment. The performance of the proposed solution is compared with a multi-layer perceptron (MLP) and multi-head self-attention networks as baselines in a simulation environment. We also demonstrate the performance of the model with and without auxiliary tasks. All models are evaluated based on different metrics such as time to finish each lap, number of overtakes, and speed difference with speed limit. The evaluation shows that the SwapTransformer model outperforms other models in different traffic densities in the inference phase.", "keywords": "Autonomous driving;Imitation learning;highway;overtaking;machine learning;transformer", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/22416a94694b96b41ac47558d98e24683a8a1bcc.zip", "author": "Alireza Shamsoshoara;Safin B Salih;Pedram Aghazadeh", "authorids": "~Alireza_Shamsoshoara1;safin.salih@vw.com;pedram.aghazadeh@vw.com", "gender": "M;;", "homepage": "https://alirezashamsoshoara.github.io/;;", "dblp": ";;", "google_scholar": "1IDrN5QAAAAJ;;", "orcid": "0000-0003-4087-8304;;", "linkedin": "alireza-shamsoshoara/;;", "or_profile": "~Alireza_Shamsoshoara1;safin.salih@vw.com;pedram.aghazadeh@vw.com", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@misc{\nshamsoshoara2024swaptransformer,\ntitle={SwapTransformer: Highway Overtaking Tactical Planner Model via Imitation Learning on {OSHA} Dataset},\nauthor={Alireza Shamsoshoara and Safin B Salih and Pedram Aghazadeh},\nyear={2024},\nurl={https://openreview.net/forum?id=9wSWiavGwU}\n}", "github": "", "project": "", "reviewers": "kokN;Qxdb;8dui", "site": "https://openreview.net/forum?id=9wSWiavGwU", "pdf_size": 14859760, "rating": "5;5;5", "confidence": "3;3;3", "soundness": "2;2;2", "contribution": "2;2;2", "presentation": "2;2;3", "wc_summary": "185;286;160", "wc_strengths": "32;129;39", "wc_weaknesses": "61;218;295", "wc_questions": "25;72;209", "wc_review": "303;705;703", "wc_reply_reviewers": "10;0;373", "wc_reply_authors": "377;1187;2238", "reply_reviewers": "1;0;2", "reply_authors": "1;2;5", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 210.33333333333334, 54.46915538989832 ], "wc_strengths_avg": [ 66.66666666666667, 44.16886786967591 ], "wc_weaknesses_avg": [ 191.33333333333334, 97.37327947417378 ], "wc_questions_avg": [ 102.0, 78.05553578489271 ], "wc_review_avg": [ 570.3333333333334, 189.0349761875358 ], "wc_reply_reviewers_avg": [ 127.66666666666667, 173.52489414746486 ], "wc_reply_authors_avg": [ 1267.3333333333333, 761.8706510221331 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18112353373146535003&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4 }, { "title": "Provably Efficient CVaR RL in Low-rank MDPs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19279", "id": "9x6yrFAPnx", "author_site": "Yulai Zhao, Wenhao Zhan, Xiaoyan Hu, Ho-fung Leung, Farzan Farnia, Wen Sun, Jason Lee", "tldr": "", "abstract": "We study risk-sensitive Reinforcement Learning (RL), where we aim to maximize\nthe Conditional Value at Risk (CVaR) with a fixed risk tolerance $\\tau$. \nPrior theoretical work studying risk-sensitive RL focuses on the tabular Markov Decision Processes (MDPs) setting. \nTo extend CVaR RL to settings where state space is large, function approximation must be deployed. \nWe study CVaR RL in low-rank MDPs with nonlinear function approximation. Low-rank MDPs assume the underlying transition kernel admits a low-rank decomposition, but unlike prior linear models, low-rank MDPs do not assume the feature or state-action representation is known. \nWe propose a novel Upper Confidence Bound (UCB) bonus-driven algorithm to carefully balance the interplay between exploration, exploitation, and representation learning in CVaR RL. \nWe prove that our algorithm achieves a sample complexity of $\\tilde{O}\\left(\\frac{H^7 A^2 d^4}{\\tau^2 \\epsilon^2}\\right)$ to yield an $\\epsilon$-optimal CVaR, where $H$ is the length of each episode, $A$ is the capacity of action space, and $d$ is the dimension of representations.\nComputational-wise, we design a novel discretized Least-Squares Value Iteration (LSVI) algorithm for the CVaR objective as the planning oracle and show that we can find the near-optimal policy in a polynomial running time with a Maximum Likelihood Estimation oracle. \nTo our knowledge, this is the first provably efficient CVaR RL algorithm in low-rank MDPs.", "keywords": "reinforment learning theory;risk-sensitive reinforment learning;Conditional Value at Risk", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Yulai Zhao;Wenhao Zhan;Xiaoyan Hu;Ho-fung Leung;Farzan Farnia;Wen Sun;Jason D. Lee", "authorids": "~Yulai_Zhao1;~Wenhao_Zhan1;~Xiaoyan_Hu2;~Ho-fung_Leung1;~Farzan_Farnia1;~Wen_Sun1;~Jason_D._Lee1", "gender": "M;M;M;M;M;;M", "homepage": "https://yulaizhao.com/;;https://yannxiaoyanhu.github.io;http://www.cse.cuhk.edu.hk/~lhf/;https://www.cse.cuhk.edu.hk/~farnia/;https://wensun.github.io;https://jasondlee88.github.io/", "dblp": "64/6357-2;275/3558;;l/HofungLeung;132/7757;;88/3262", "google_scholar": "r-mWYj0AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=JDErdKcAAAAJ;GYPCqcYAAAAJ;iOLC30YAAAAJ;GR_DsT0AAAAJ", "orcid": "0000-0002-6930-3590;;0000-0002-5766-1059;0000-0003-4914-2934;0000-0002-6049-9232;;", "linkedin": "yulaizhao/;;xiaoyan-hu-9a26661b9/;ho-fung-leung-1a73135/;farzan-farnia-00798335;;", "or_profile": "~Yulai_Zhao1;~Wenhao_Zhan1;~Xiaoyan_Hu2;~Ho-fung_Leung1;~Farzan_Farnia1;~Wen_Sun1;~Jason_D._Lee1", "aff": "Princeton University;Princeton University;The Chinese University of Hong Kong; ;The Chinese University of Hong Kong;Cornell University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;cse.cuhk.edu.hk;outlook.com;cuhk.edu.hk;cornell.edu;princeton.edu", "position": "PhD student;PhD student;PhD student;Independent Researcher;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhao2024provably,\ntitle={Provably Efficient {CV}aR {RL} in Low-rank {MDP}s},\nauthor={Yulai Zhao and Wenhao Zhan and Xiaoyan Hu and Ho-fung Leung and Farzan Farnia and Wen Sun and Jason D. Lee},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9x6yrFAPnx}\n}", "github": "", "project": "", "reviewers": "hVSJ;yZ3N;i64s;htXD", "pdf_size": 491888, "rating": "6;6;6;6", "confidence": "4;3;3;2", "soundness": "3;3;3;3", "contribution": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "69;110;63;81", "wc_strengths": "45;47;93;31", "wc_weaknesses": "301;194;145;20", "wc_questions": "35;25;168;24", "wc_review": "450;376;469;156", "wc_reply_reviewers": "145;47;0;0", "wc_reply_authors": "1188;826;874;476", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.75, 18.08832496390973 ], "wc_strengths_avg": [ 54.0, 23.345235059857504 ], "wc_weaknesses_avg": [ 165.0, 100.94800641914628 ], "wc_questions_avg": [ 63.0, 60.774172145739676 ], "wc_review_avg": [ 362.75, 124.32090532167146 ], "wc_reply_reviewers_avg": [ 48.0, 59.198817555758666 ], "wc_reply_authors_avg": [ 841.0, 252.46187831036985 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9310462942782500564&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=9x6yrFAPnx", "pdf": "https://openreview.net/pdf?id=9x6yrFAPnx", "email": "princeton.edu;princeton.edu;cse.cuhk.edu.hk;outlook.com;cuhk.edu.hk;cornell.edu;princeton.edu", "author_num": 7, "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "Princeton University;Chinese University of Hong Kong;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.cuhk.edu.hk;https://www.cornell.edu", "aff_unique_abbr": "Princeton;CUHK;Cornell", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "United States;China" }, { "id": "9yKzVMxlkw", "title": "TiG-BEV: Multi-view BEV 3D Object Detection via Target Inner-Geometry Learning", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "To achieve accurate multi-view 3D object detection, existing methods propose to benefit camera-based detectors with spatial cues provided by the LiDAR modal\u0002ity, e.g., depth supervision and bird-eye-view (BEV) feature distillation. However, they employ a direct point-to-point mimicry from LiDAR to camera, which suf\u0002fers from the modality gap between 2D-3D features. In this paper, we propose the Target Inner-Geometry learning scheme to enhance camera-based BEV detectors from both depth and BEV feature by leveraging the LiDAR modality, termed as TiG-BEV. Firstly, we introduce an inner-depth supervision module to learn the low-level relative depth relations in each object. This equips camera-based de\u0002tectors with a deeper understanding of object-level spatial structures. Secondly, we design an inner-feature BEV distillation module to imitate the high-level se\u0002mantics of different keypoints within foreground targets. To further alleviate the domain gap between two modalities, we incorporate both inter-channel and inter\u0002keypoint distillation to model feature similarity. With our target inner-geometry learning, TiG-BEV effectively boosts BEVDepth by +2.3% NDS on nuScenes val set, and achieves leading performance with 61.9% NDS on nuScenes leaderboard.", "keywords": "BEV;3D Object Detection", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/996fe1e8b83459367b0e6f2f1f7f132aff8d39ae.pdf", "author": "Peixiang Huang;Li Liu;Renrui Zhang;Jiaming Liu;Mingjie Pan;Rui Xu;Xinli Xu;Yongqiang Qin", "authorids": "~Peixiang_Huang1;~Li_Liu19;~Renrui_Zhang1;~Jiaming_Liu2;~Mingjie_Pan1;~Rui_Xu11;~Xinli_Xu1;~Yongqiang_Qin1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";;;https://github.com/liujiaming1996;https://github.com/xray-pku;;;https://github.com/pmj110119", "dblp": "250/5451;;244/1748;;;;65/8354;335/1213", "google_scholar": ";zO5iemAAAAAJ;YlL3xN4AAAAJ;cPki5sUAAAAJ;;https://scholar.google.com.sg/citations?user=lrgPuBUAAAAJ;kBc-zJYAAAAJ;QdUeY3IAAAAJ", "orcid": ";;;0000-0002-6770-4390;;0000-0002-7866-6027;;", "linkedin": ";;;;;;yongqiang-qin/;", "or_profile": "~Peixiang_Huang1;~Li_Liu19;~Renrui_Zhang1;~Jiaming_Liu2;~Rui_Xu11;~Xinli_Xu1;~Yongqiang_Qin1;~Pan_Mingjie1", "aff": "Peking University;Xiaomi;MMLab of CUHK & Shanghai AI Laboratory;Peking University;Peking University;Hong Kong University of Science and Technology;NIO;Peking University", "aff_domain": "pku.edu.cn;xiaomi.com;pjlab.org.cn;pku.edu.cn;pku.edu.cn;hkust.edu;nio.com;pku.edu.cn", "position": "MS student;Researcher;PhD student;PhD student;MS student;PhD student;Researcher;MS student", "bibtex": "@misc{\nhuang2024tigbev,\ntitle={TiG-{BEV}: Multi-view {BEV} 3D Object Detection via Target Inner-Geometry Learning},\nauthor={Peixiang Huang and Li Liu and Renrui Zhang and Jiaming Liu and Mingjie Pan and Rui Xu and Xinli Xu and Yongqiang Qin},\nyear={2024},\nurl={https://openreview.net/forum?id=9yKzVMxlkw}\n}", "github": "", "project": "", "reviewers": "CVGt;oss3;kyMY;CeGw;v25e", "site": "https://openreview.net/forum?id=9yKzVMxlkw", "pdf_size": 11753936, "rating": "3;5;6;6;8", "confidence": "4;4;4;3;4", "soundness": "3;3;2;3;3", "contribution": "2;2;2;2;4", "presentation": "3;3;2;3;3", "wc_summary": "78;44;108;83;85", "wc_strengths": "83;13;65;47;132", "wc_weaknesses": "454;248;132;189;52", "wc_questions": "109;2;75;32;47", "wc_review": "724;307;380;351;316", "wc_reply_reviewers": "0;301;167;0;0", "wc_reply_authors": "1270;1388;1193;538;430", "reply_reviewers": "0;1;1;0;0", "reply_authors": "3;3;2;2;2", "rating_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "contribution_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 79.6, 20.57765778702717 ], "wc_strengths_avg": [ 68.0, 39.486706623875335 ], "wc_weaknesses_avg": [ 215.0, 135.89996320823636 ], "wc_questions_avg": [ 53.0, 36.60054644400818 ], "wc_review_avg": [ 415.6, 156.3746782570631 ], "wc_reply_reviewers_avg": [ 93.6, 122.21718373453055 ], "wc_reply_authors_avg": [ 963.8, 398.11676679084997 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.1230914909793327, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6302894399736069090&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;0;0;3;4;0", "aff_unique_norm": "Peking University;Xiaomi Corporation;Chinese University of Hong Kong;Hong Kong University of Science and Technology;NIO", "aff_unique_dep": ";;MMLab;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.xiaomi.com;https://www.cuhk.edu.hk;https://www.ust.hk;", "aff_unique_abbr": "Peking U;Xiaomi;CUHK;HKUST;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "9ydLP7como", "title": "ReLU for Inference Acceleration", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Over the past decade, advancements in neural networks have outpaced human-level performance in a wide range of domains, including but not limited to natural language understanding and image generation. This progress has led to significantly larger networks with hundreds of billions of parameters, creating substantial computational demands. We propose the re-introduction of ReLU activation function to replace gradient-smooth alternatives during inference. We show that this can reduce computational costs while achieving minimal accuracy degradation with the help of specialized knowledge distillation training. The effectiveness of the proposed method is demonstrated by a wide variety of network architectures, covering popular applications such as image classification, object detection, and language modeling. We observed FPS improvement of 2-10% for Convolution based neural networks while observing only 1.8-2.6% accuracy degradation. The different Transformer networks demonstrated accuracy difference of < 1% between proposed ReLU and original GeLU networks with comparable QPS. The improvement in performance is significantly noticeable on AI accelerators like ours, with ReLU based convolution networks showcasing theoretical improvement of 41-74% compared to their SiLU based counterpart.", "keywords": "Deep learning;knowledge distillation;quantization;object detection;tranformer;nlp", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Kinjal Pravinbhai Patel;Ben Goel;Ofer Shai", "authorids": "~Kinjal_Pravinbhai_Patel1;~Ben_Goel1;~Ofer_Shai1", "gender": ";;M", "homepage": ";;", "dblp": ";;12/4686", "google_scholar": ";;", "orcid": ";;", "linkedin": ";benjamin-goel?trk=contact-info;", "or_profile": "~Kinjal_Pravinbhai_Patel1;~Ben_Goel1;~Ofer_Shai1", "aff": ";University of Toronto, University of Toronto;", "aff_domain": ";ece.utoronto.ca;", "position": ";Undergrad student;", "bibtex": "@misc{\npatel2024relu,\ntitle={Re{LU} for Inference Acceleration},\nauthor={Kinjal Pravinbhai Patel and Ben Goel and Ofer Shai},\nyear={2024},\nurl={https://openreview.net/forum?id=9ydLP7como}\n}", "github": "", "project": "", "reviewers": "jbaK;9ug7;RjcT;SAkP", "site": "https://openreview.net/forum?id=9ydLP7como", "pdf_size": 206409, "rating": "3;3;5;5", "confidence": "5;4;4;4", "soundness": "3;2;2;3", "contribution": "2;2;2;2", "presentation": "2;3;2;3", "wc_summary": "42;41;54;66", "wc_strengths": "23;16;23;90", "wc_weaknesses": "89;232;12;106", "wc_questions": "9;14;93;2", "wc_review": "163;303;182;264", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 50.75, 10.18270592720815 ], "wc_strengths_avg": [ 38.0, 30.157917700000443 ], "wc_weaknesses_avg": [ 109.75, 78.96953526518945 ], "wc_questions_avg": [ 29.5, 36.908671067921155 ], "wc_review_avg": [ 228.0, 57.58037860243713 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SD-VPUuxp7kJ:scholar.google.com/&scioq=ReLU+for+Inference+Acceleration&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "9zEBK3E9bX", "title": "SPOT: Scalable 3D Pre-training via Occupancy Prediction for Autonomous Driving", "track": "main", "status": "Reject", "tldr": "", "abstract": "Annotating 3D LiDAR point clouds for perception tasks including 3D object detection and LiDAR semantic segmentation is notoriously time-and-energy-consuming. To alleviate the burden from labeling, it is promising to perform large-scale pre-training and fine-tune the pre-trained backbone on different downstream datasets as well as tasks. In this paper, we propose SPOT, namely Scalable Pre-training via Occupancy prediction for learning Transferable 3D representations, and demonstrate its effectiveness on various public datasets with different downstream tasks under the label-efficiency setting. Our contributions are threefold: (1) Occupancy prediction is shown to be promising for learning general representations, which is demonstrated by extensive experiments on plenty of datasets and tasks. (2) SPOT uses beam re-sampling technique for point cloud augmentation and applies class-balancing strategies to overcome the domain gap brought by various LiDAR sensors and annotation strategies in different datasets. (3) Scalable pre-training is observed, that is, the downstream performance across all the experiments gets better with more pre-training data. We believe that our findings can facilitate understanding of LiDAR point clouds and pave the way for future exploration in LiDAR pre-training. Codes and models will be released.", "keywords": "3D LiDAR point clouds;3D Pre-training;Object Detection;Semantic Segmentation", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/322395f5f11bbdb54b6f57a5e463b75b50a8f9b3.zip", "author": "Xiangchao Yan;Runjian Chen;Bo Zhang;Jiakang Yuan;Xinyu Cai;Botian Shi;Wenqi Shao;Junchi Yan;Ping Luo;Yu Qiao", "authorids": "~Xiangchao_Yan1;~Runjian_Chen1;~Bo_Zhang17;~Jiakang_Yuan1;~Xinyu_Cai2;~Botian_Shi1;~Wenqi_Shao2;~Junchi_Yan2;~Ping_Luo2;~Yu_Qiao1", "gender": ";M;M;M;;M;M;;;", "homepage": "https://github.com/sky-fly97;https://runjian-chen.github.io;https://bobrown.github.io/boZhang.github.io/;https://jiakangyuan.github.io/;;;https://wqshao126.github.io/;;;", "dblp": "314/2496.html;257/4647;36/2259-69;323/7363;;245/8742;227/3122;;;", "google_scholar": "0mMk6PMAAAAJ;_USUMdAAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;;K0PpvLkAAAAJ;Bs9mrwwAAAAJ;;;", "orcid": ";0000-0003-0519-496X;0000-0001-8052-782X;;0000-0001-8500-9300;0000-0003-3677-7252;;;;", "linkedin": ";;;;;friskit/;;;;", "or_profile": "~Xiangchao_Yan1;~Runjian_Chen1;~Bo_Zhang17;~Jiakang_Yuan1;~Xinyu_Cai2;~Botian_Shi1;~Wenqi_Shao2;~Junchi_Yan2;~Ping_Luo2;~Yu_Qiao1", "aff": "Shanghai AI Laboratory;University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai AI Lab;Shanghai AI Laboratory;;;", "aff_domain": "pjlab.org.cn;hku.hk;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;;", "position": "Researcher;PhD student;Researcher;Intern;Researcher;Researcher;Researcher;;;", "bibtex": "@misc{\nyan2024spot,\ntitle={{SPOT}: Scalable 3D Pre-training via Occupancy Prediction for Autonomous Driving},\nauthor={Xiangchao Yan and Runjian Chen and Bo Zhang and Jiakang Yuan and Xinyu Cai and Botian Shi and Wenqi Shao and Junchi Yan and Ping Luo and Yu Qiao},\nyear={2024},\nurl={https://openreview.net/forum?id=9zEBK3E9bX}\n}", "github": "", "project": "", "reviewers": "y7L9;FWLb;4p2k", "site": "https://openreview.net/forum?id=9zEBK3E9bX", "pdf_size": 4762439, "rating": "3;5;5", "confidence": "4;4;4", "soundness": "2;3;2", "contribution": "2;3;2", "presentation": "3;4;3", "wc_summary": "77;147;85", "wc_strengths": "31;93;59", "wc_weaknesses": "56;368;297", "wc_questions": "39;3;48", "wc_review": "203;611;489", "wc_reply_reviewers": "48;0;0", "wc_reply_authors": "1513;734;4101", "reply_reviewers": "1;0;0", "reply_authors": "6;2;9", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 103.0, 31.283648551066843 ], "wc_strengths_avg": [ 61.0, 25.350871122442058 ], "wc_weaknesses_avg": [ 240.33333333333334, 133.52735882790296 ], "wc_questions_avg": [ 30.0, 19.44222209522358 ], "wc_review_avg": [ 434.3333333333333, 170.99187764985277 ], "wc_reply_reviewers_avg": [ 16.0, 22.627416997969522 ], "wc_reply_authors_avg": [ 2116.0, 1439.1847229131731 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.666666666666667, 2.8674417556808756 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13020716792976066220&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;2;3;0", "aff_unique_norm": "Shanghai AI Laboratory;University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Shanghai AI Lab", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.hku.hk;http://www.shailab.org/;https://www.shanghaiailab.com", "aff_unique_abbr": "SAIL;HKU;Shanghai AI Lab;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9zHxXaYEgw", "title": "LEO: Generative Latent Image Animator for Human Video Synthesis", "track": "main", "status": "Reject", "tldr": "", "abstract": "Spatio-temporal coherency is a major challenge in synthesizing high quality videos, particularly in synthesizing human videos that contain rich global and local deformations. To resolve this challenge, previous approaches have resorted to different features in the generation process aimed at representing appearance and motion. However, in the absence of strict mechanisms to guarantee such disentanglement, a separation of motion from appearance has remained challenging, resulting in spatial distortions and temporal jittering that break the spatio-temporal coherency. Motivated by this, we here propose LEO, a novel framework for human video synthesis, placing emphasis on spatio-temporal coherency. Our key idea is to represent motion as a sequence of flow maps in the generation process, which inherently isolate motion from appearance. We implement this idea via a flow-based image animator and a Latent Motion Diffusion Model (LMDM). The former bridges a space of motion codes with the space of flow maps, and synthesizes video frames in a warp-and-inpaint manner. LMDM learns to capture motion prior in the training data by synthesizing sequences of motion codes. Extensive quantitative and qualitative analysis suggests that LEO significantly improves coherent synthesis of human videos over previous methods on the datasets TaichiHD, FaceForensics and CelebV-HQ. In addition, the effective disentanglement of appearance and motion in LEO allows for two additional tasks, namely infinite-length human video synthesis, as well as content-preserving video editing.", "keywords": "video generation;diffusion models;talking head generation", "primary_area": "generative models", "supplementary_material": "/attachment/d6ecd717dc451a90b2a518048e38a28d685c543a.zip", "author": "Yaohui Wang;Xin Ma;Xinyuan Chen;Cunjian Chen;Antitza Dantcheva;Bo Dai;Yu Qiao", "authorids": "~Yaohui_Wang1;~Xin_Ma3;~Xinyuan_Chen1;~Cunjian_Chen2;~Antitza_Dantcheva1;~Bo_Dai2;~Yu_Qiao1", "gender": "M;;F;M;F;M;", "homepage": "https://wyhsirius.github.io/;https://maxin-cn.github.io/;;https://cunjian.github.io/;https://www-sop.inria.fr/members/Antitza.Dantcheva/;http://daibo.info/;", "dblp": "168/6263-1.html;;;73/2740.html;13/2986;64/2903-2;", "google_scholar": "R7LyAb4AAAAJ;https://scholar.google.com.hk/citations?user=dN8QWCQAAAAJ;3fWSC8YAAAAJ;f26cvh8AAAAJ;https://scholar.google.fr/citations?user=ZMggPHMAAAAJ;https://scholar.google.com.hk/citations?user=KNWTvgEAAAAJ;", "orcid": ";0000-0001-9389-9032;0000-0002-5517-7255;;0000-0003-0107-7029;0000-0003-0777-9232;", "linkedin": ";;;;antitza-dantcheva-ph-d-4b65b24/;;", "or_profile": "~Yaohui_Wang1;~Xin_Ma3;~Xinyuan_Chen1;~Cunjian_Chen2;~Antitza_Dantcheva1;~Bo_Dai2;~Yu_Qiao1", "aff": "Shanghai AI Laboratory;Monash University;Shanghai Artificial Intelligence Laboratory;Monash University;INRIA;Shanghai AI Laboratory;", "aff_domain": "pjlab.org.cn;monash.edu;pjlab.org.cn;monash.edu;inria.fr;pjlab.org.cn;", "position": "Research Scientist;PhD student;Research Scientist;Lecturer;Researcher;Scientist;", "bibtex": "@misc{\nwang2024leo,\ntitle={{LEO}: Generative Latent Image Animator for Human Video Synthesis},\nauthor={Yaohui Wang and Xin Ma and Xinyuan Chen and Cunjian Chen and Antitza Dantcheva and Bo Dai and Yu Qiao},\nyear={2024},\nurl={https://openreview.net/forum?id=9zHxXaYEgw}\n}", "github": "", "project": "", "reviewers": "NikU;8SWB;H4YQ", "site": "https://openreview.net/forum?id=9zHxXaYEgw", "pdf_size": 33226883, "rating": "3;6;6", "confidence": "4;3;4", "soundness": "3;3;3", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "45;74;77", "wc_strengths": "65;80;45", "wc_weaknesses": "46;123;62", "wc_questions": "2;55;30", "wc_review": "158;332;214", "wc_reply_reviewers": "151;165;0", "wc_reply_authors": "1234;746;432", "reply_reviewers": "1;2;0", "reply_authors": "3;2;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.33333333333333, 14.429907214608907 ], "wc_strengths_avg": [ 63.333333333333336, 14.337208778404378 ], "wc_weaknesses_avg": [ 77.0, 33.1762967593833 ], "wc_questions_avg": [ 29.0, 21.64871050817269 ], "wc_review_avg": [ 234.66666666666666, 72.52279335185287 ], "wc_reply_reviewers_avg": [ 105.33333333333333, 74.70088501632509 ], "wc_reply_authors_avg": [ 804.0, 329.97373632861553 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12488673244627239660&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;1;3;0", "aff_unique_norm": "Shanghai AI Laboratory;Monash University;Shanghai Artificial Intelligence Laboratory;INRIA", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.monash.edu;http://www.shailab.org/;https://www.inria.fr", "aff_unique_abbr": "SAIL;Monash;Shanghai AI Lab;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;2;0", "aff_country_unique": "China;Australia;France" }, { "title": "Leveraging Generative Models for Unsupervised Alignment of Neural Time Series Data", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19278", "id": "9zhHVyLY4K", "author_site": "Ayesha Vermani, Il Memming Park, Josue Nassar", "tldr": "", "abstract": "Large scale inference models are widely used in neuroscience to extract latent representations from high-dimensional neural recordings. Due to the statistical heterogeneities between sessions and animals, a new model is trained from scratch to infer the underlying dynamics for each new dataset. This is computationally expensive and does not fully leverage all the available data. Moreover, as these models get more complex, they can be challenging to train. In parallel, it is becoming common to use pre-trained models in the machine learning community for few shot and transfer learning. One major hurdle that prevents the re-use of generative models in neuroscience is the complex spatio-temporal structure of neural dynamics within and across animals. Interestingly, the underlying dynamics identified from different datasets on the same task are qualitatively similar. In this work, we exploit this observation and propose a source-free and unsupervised alignment approach that utilizes the learnt dynamics and enables the re-use of trained generative models. We validate our approach on simulations and show the efficacy of the alignment on neural recordings from the motor cortex obtained during a reaching task.", "keywords": "neural dynamics;transfer learning;distribution alignment;neuroscience;few-shot learning", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Ayesha Vermani;Il Memming Park;Josue Nassar", "authorids": "~Ayesha_Vermani1;~Il_Memming_Park1;~Josue_Nassar1", "gender": ";M;M", "homepage": ";http://catniplab.github.io/;", "dblp": ";00/4652-2;230/8314", "google_scholar": "https://scholar.google.com/citations?hl=en;CsmltusAAAAJ;a5RNqTYAAAAJ", "orcid": ";0000-0002-4255-7750;", "linkedin": "ayesha-vermani-3825151a2/;memming/;", "or_profile": "~Ayesha_Vermani1;~Il_Memming_Park1;~Josue_Nassar1", "aff": "State University of New York, Stony Brook;Champalimaud Centre for the Unknown;Optum Labs", "aff_domain": "stonybrook.edu;fchampalimaud.org;optum.com", "position": "PhD student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nvermani2024leveraging,\ntitle={Leveraging Generative Models for Unsupervised Alignment of Neural Time Series Data},\nauthor={Ayesha Vermani and Il Memming Park and Josue Nassar},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=9zhHVyLY4K}\n}", "github": "", "project": "", "reviewers": "nsT1;Jh7W;MYCT;H5ee", "pdf_size": 5965413, "rating": "6;6;6;8", "confidence": "3;3;4;4", "soundness": "2;3;3;4", "contribution": "2;3;3;4", "presentation": "2;3;2;3", "wc_summary": "46;87;100;134", "wc_strengths": "36;62;50;84", "wc_weaknesses": "30;132;109;101", "wc_questions": "231;26;19;71", "wc_review": "343;307;278;390", "wc_reply_reviewers": "12;12;15;11", "wc_reply_authors": "574;443;360;374", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 91.75, 31.499007920885383 ], "wc_strengths_avg": [ 58.0, 17.60681686165901 ], "wc_weaknesses_avg": [ 93.0, 38.11167800031901 ], "wc_questions_avg": [ 86.75, 85.63987097141144 ], "wc_review_avg": [ 329.5, 41.8359892915179 ], "wc_reply_reviewers_avg": [ 12.5, 1.5 ], "wc_reply_authors_avg": [ 437.75, 84.70647850076168 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=140027540675777815&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=9zhHVyLY4K", "pdf": "https://openreview.net/pdf?id=9zhHVyLY4K", "email": "stonybrook.edu;fchampalimaud.org;optum.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "State University of New York;Champalimaud Centre for the Unknown;Optum Labs", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stonybrook.edu;https://www.champalimaud.org;https://www.optumlabs.com", "aff_unique_abbr": "SUNY Stony Brook;CCU;", "aff_campus_unique_index": "0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Portugal" }, { "id": "9zpOUsOvLM", "title": "Aligning Persistent Homology with Graph Pooling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, there has been an emerging trend to integrate persistent homology (PH) into graph neural networks (GNNs) to enrich expressive power. However, naively plugging PH features into GNN layers always results in marginal improvement with low interpretability. In this paper, we investigate a novel mechanism for injecting global topological invariance into pooling layers using PH, motivated by the observation that filtration operation in PH naturally aligns graph pooling in a cut-off manner. In this fashion, message passing in the coarsened graph is performed along persistent sub-topology, leading to improved performance. Experimentally, we apply our mechanism to a collection of graph pooling methods and observe consistent and substantial performance gain over several popular datasets, demonstrating its wide applicability and flexibility. Code is open-sourced at https://anonymous.4open.science/r/TIP.", "keywords": "graph pooling;persistent homology;graph neural networks", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Chaolong Ying;Xinjian Zhao;Tianshu Yu", "authorids": "~Chaolong_Ying1;~Xinjian_Zhao2;~Tianshu_Yu2", "gender": "M;M;M", "homepage": "https://sds.cuhk.edu.cn/node/708;https://sduzxj.github.io//;https://mypage.cuhk.edu.cn/academics/yutianshu/", "dblp": "295/3323;02/8613;152/6675", "google_scholar": "1-O3158AAAAJ;iKuIMsgAAAAJ;MTHO7DsAAAAJ", "orcid": "0000-0002-8555-8817;0009-0003-1553-8209;0000-0002-6537-1924", "linkedin": ";;", "or_profile": "~Chaolong_Ying1;~Xinjian_Zhao2;~Tianshu_Yu2", "aff": "The Chinese University of Hong Kong, Shenzhen;Chinese University of Hong Kong (Shenzhen);Chinese University of Hong Kong (Shenzhen)", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nying2024aligning,\ntitle={Aligning Persistent Homology with Graph Pooling},\nauthor={Chaolong Ying and Xinjian Zhao and Tianshu Yu},\nyear={2024},\nurl={https://openreview.net/forum?id=9zpOUsOvLM}\n}", "github": "", "project": "", "reviewers": "r2G4;XtaZ;adEb;S9nw", "site": "https://openreview.net/forum?id=9zpOUsOvLM", "pdf_size": 1443618, "rating": "3;3;5;6", "confidence": "4;4;4;3", "soundness": "2;1;2;4", "contribution": "2;1;2;3", "presentation": "3;2;3;3", "wc_summary": "47;76;49;100", "wc_strengths": "35;32;31;47", "wc_weaknesses": "297;241;242;44", "wc_questions": "134;445;2;332", "wc_review": "513;794;324;523", "wc_reply_reviewers": "47;354;84;11", "wc_reply_authors": "1051;1622;671;479", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;2;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 1.0897247358851685 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.0, 21.737065119284157 ], "wc_strengths_avg": [ 36.25, 6.378675411086537 ], "wc_weaknesses_avg": [ 206.0, 96.23668739103607 ], "wc_questions_avg": [ 228.25, 171.62222321133123 ], "wc_review_avg": [ 538.5, 167.46716095999238 ], "wc_reply_reviewers_avg": [ 124.0, 135.27564451888597 ], "wc_reply_authors_avg": [ 955.75, 436.2724922568463 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZGLE4NwJYyMJ:scholar.google.com/&scioq=Aligning+Persistent+Homology+with+Graph+Pooling&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "9ztL7Trdnx", "title": "TAFS: Task-aware Activation Function Search for Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Since the inception of Graph Neural Networks (GNNs), extensive research efforts have concentrated on enhancing graph convolution, refining pooling operations, devising robust training strategies, and advancing theoretical foundations. Notably, one critical facet of current GNN research remains conspicuously underexplored\u2014the design of activation functions. Activation functions serve as pivotal components, imbuing GNNs with the essential capacity for non-linearity. Yet, the ubiquitous adoption of Rectified Linear Units (ReLU) persists.\nIn our study, we embark on a mission to craft task-aware activation functions tailored for diverse GNN applications. We introduce TAFS (Task-aware Activation Function Search), an adept and efficient framework for activation function design. TAFS leverages a streamlined parameterization and frames the problem as a bi-level stochastic optimization challenge. To enhance the search for smooth activation functions, we incorporate additional Lipschitz regularization. Our approach automates the discovery of the optimal activation patterns, customizing them to suit any downstream task seamlessly. Crucially, this entire process unfolds end-to-end without imposing significant computational or memory overhead. Comprehensive experimentation underscores the efficacy of our method. We consistently achieve substantial improvements across a spectrum of tasks, including node classification over diverse graph data. Moreover, our approach surpasses state-of-the-art results in the realm of link-level tasks, particularly in biomedical applications.", "keywords": "Activation Function;Graph Neural Networks;AutoML;Neural Architecture Search", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/0aea1b17e69a8e92bb8279c8f63dc4c2b2a7f169.zip", "author": "Zhen Xu;quanming yao;Xiaojin Zhang;Qiang Yang", "authorids": "~Zhen_Xu4;~quanming_yao1;~Xiaojin_Zhang2;~Qiang_Yang1", "gender": "Not Specified;M;F;", "homepage": "https://NehzUx.github.io;https://lars-group.github.io/;https://xiaojin319.github.io/;", "dblp": ";158/1014;69/8512-2;", "google_scholar": "MhE__x0AAAAJ;https://scholar.google.com/schhp?hl=en;https://scholar.google.com/citations?hl=zh-TW;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zhen_Xu4;~quanming_yao1;~Xiaojin_Zhang2;~Qiang_Yang1", "aff": "University of Chicago;Department of Electronic Engineering;Huazhong University of Science and Technology;", "aff_domain": "uchicago.edu;tsinghua.edu.cn;hust.edu.cn;", "position": "PhD student;Assistant Professor;Assistant Professor;", "bibtex": "@misc{\nxu2024tafs,\ntitle={{TAFS}: Task-aware Activation Function Search for Graph Neural Networks},\nauthor={Zhen Xu and quanming yao and Xiaojin Zhang and Qiang Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=9ztL7Trdnx}\n}", "github": "", "project": "", "reviewers": "SDpu;vEQt;kgxw;JLTs", "site": "https://openreview.net/forum?id=9ztL7Trdnx", "pdf_size": 1320361, "rating": "5;5;5;5", "confidence": "5;4;3;3", "soundness": "2;3;3;2", "contribution": "3;2;2;1", "presentation": "4;3;3;3", "wc_summary": "70;128;72;69", "wc_strengths": "37;73;84;27", "wc_weaknesses": "89;136;171;97", "wc_questions": "2;199;4;31", "wc_review": "198;536;331;224", "wc_reply_reviewers": "0;0;0;10", "wc_reply_authors": "1486;1301;1398;1080", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.75, 24.993749218554626 ], "wc_strengths_avg": [ 55.25, 23.836683913665507 ], "wc_weaknesses_avg": [ 123.25, 32.80529682840867 ], "wc_questions_avg": [ 59.0, 81.63638894512667 ], "wc_review_avg": [ 322.25, 133.09465616620375 ], "wc_reply_reviewers_avg": [ 2.5, 4.330127018922194 ], "wc_reply_authors_avg": [ 1316.25, 151.2818148357561 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TeHRN0G1cAEJ:scholar.google.com/&scioq=TAFS:+Task-aware+Activation+Function+Search+for+Graph+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Chicago;Institution Name Not Provided;Huazhong University of Science and Technology", "aff_unique_dep": ";Department of Electronic Engineering;", "aff_unique_url": "https://www.uchicago.edu;;http://www.hust.edu.cn", "aff_unique_abbr": "UChicago;;HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2", "aff_country_unique": "United States;;China" }, { "id": "A0DI5v6m8O", "title": "Black-Box Gradient Matching for Reliable Offline Black-Box Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline design optimization problem arises in numerous science and engineering applications including materials engineering, where expensive online experimentation necessitates the use of in silico surrogate functions to predict and maximize the target objective over candidate designs. Although these surrogates can be learned from offline data, their predictions can be potentially inaccurate outside the offline data regime. This challenge raises a fundamental question about the impact of imperfect surrogate model on the performance gap between its optima and the true oracle optima, and to what extent the performance loss can be mitigated. Although prior work developed methods to improve the robustness of surrogate models and their associated optimization processes, a provably quantifiable relationship between an imperfect surrogate and the corresponding performance gap, and whether prior methods directly address it, remain elusive. To shed more light on this important question, we present a novel theoretical formulation to understand offline black-box optimization, by explicitly bounding the optimization quality based on how well the surrogate matches the latent gradient field that underlines the offline data. Inspired by our theoretical analysis, we propose a principled black-box gradient matching algorithm to create effective surrogate models for offline optimization. Experiments on diverse real-world benchmarks demonstrate improved optimization quality using our approach to create surrogates.", "keywords": "Offline Optimization;Black-Box Optimization", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/59d372ecc04948c0197dd9e33c167396517be0c5.zip", "author": "Minh Hoang;Azza Fadhel;Aryan Deshwal;Jana Doppa;Trong Nghia Hoang", "authorids": "~Minh_Hoang1;~Azza_Fadhel1;~Aryan_Deshwal1;~Jana_Doppa1;~Trong_Nghia_Hoang1", "gender": "M;F;M;;", "homepage": ";;https://aryandeshwal.github.io/;;", "dblp": ";;246/3012.html;;", "google_scholar": "56Mb6DY0_NUC;;wNEYBrAAAAAJ;;", "orcid": ";;;;", "linkedin": ";azza-fadhel-594a21246/;aryan-deshwal-a27835120/;;", "or_profile": "~Minh_Hoang1;~Azza_Fadhel1;~Aryan_Deshwal1;~Jana_Doppa1;~Trong_Nghia_Hoang1", "aff": "Princeton University;Washington State University at Pullman;University of Minnesota - Twin Cities;;", "aff_domain": "princeton.edu;wsu.edu;umn.edu;;", "position": "Postdoc;PhD student;Assistant Professor;;", "bibtex": "@misc{\nhoang2024blackbox,\ntitle={Black-Box Gradient Matching for Reliable Offline Black-Box Optimization},\nauthor={Minh Hoang and Azza Fadhel and Aryan Deshwal and Jana Doppa and Trong Nghia Hoang},\nyear={2024},\nurl={https://openreview.net/forum?id=A0DI5v6m8O}\n}", "github": "", "project": "", "reviewers": "1MFJ;eLsu;7Vbx;ehny", "site": "https://openreview.net/forum?id=A0DI5v6m8O", "pdf_size": 669087, "rating": "3;5;6;8", "confidence": "4;5;2;4", "soundness": "2;1;3;3", "contribution": "2;1;3;4", "presentation": "3;2;3;4", "wc_summary": "43;139;140;24", "wc_strengths": "19;31;83;11", "wc_weaknesses": "373;159;139;6", "wc_questions": "65;2;94;32", "wc_review": "500;331;456;73", "wc_reply_reviewers": "537;406;0;0", "wc_reply_authors": "1756;1751;995;122", "reply_reviewers": "2;3;0;0", "reply_authors": "4;5;3;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.5, 53.42518132865812 ], "wc_strengths_avg": [ 36.0, 28.053520278211074 ], "wc_weaknesses_avg": [ 169.25, 131.51497063072324 ], "wc_questions_avg": [ 48.25, 34.55701810052482 ], "wc_review_avg": [ 340.0, 166.15203880783406 ], "wc_reply_reviewers_avg": [ 235.75, 240.25650355401413 ], "wc_reply_authors_avg": [ 1156.0, 672.514312115363 ], "reply_reviewers_avg": [ 1.25, 1.299038105676658 ], "reply_authors_avg": [ 3.25, 1.479019945774904 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.19088542889273336, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bBQRVXHIg6kJ:scholar.google.com/&scioq=Black-Box+Gradient+Matching+for+Reliable+Offline+Black-Box+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Princeton University;Washington State University;University of Minnesota", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://wsu.edu;https://www.minnesota.edu", "aff_unique_abbr": "Princeton;WSU;UMN", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Pullman;Twin Cities", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Mechanistically analyzing the effects of fine-tuning on procedurally defined tasks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19277", "id": "A0HKeKl4Nl", "author_site": "Samyak Jain, Robert Kirk, Ekdeep Singh Lubana, Robert Dick, Hidenori Tanaka, Tim Rocktaeschel, Edward Grefenstette, David Krueger", "tldr": "", "abstract": "Fine-tuning large pre-trained models has become the de facto strategy for developing both task-specific and general-purpose machine learning systems, including developing models that are safe to deploy. Despite its clear importance, there has been minimal work that explains how fine-tuning alters the underlying capabilities learned by a model during pretraining: does fine-tuning yield entirely novel capabilities or does it just modulate existing ones? We address this question empirically in synthetic, controlled settings where we can use mechanistic interpretability tools (e.g., network pruning and probing) to understand how the model's underlying capabilities are changing. We perform an extensive analysis of the effects of fine-tuning in these settings, and show that: (i) fine-tuning rarely alters the underlying model capabilities; (ii) a minimal transformation, which we call a `wrapper', is typically learned on top of the underlying model capabilities, creating the illusion that they have been modified; and (iii) further fine-tuning on a task where such ``wrapped capabilities'' are relevant leads to sample-efficient revival of the capability, i.e., the model begins reusing these capabilities after only a few gradient steps. This indicates that practitioners can unintentionally remove a model's safety wrapper merely by fine-tuning it on a, e.g., superficially unrelated, downstream task. We additionally perform analysis on language models trained on the TinyStories dataset to support our claims in a more realistic setup.", "keywords": "Fine-Tuning;Interpretability;Mechanisms", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Samyak Jain;Robert Kirk;Ekdeep Singh Lubana;Robert P. Dick;Hidenori Tanaka;Tim Rockt\u00e4schel;Edward Grefenstette;David Krueger", "authorids": "~Samyak_Jain1;~Robert_Kirk1;~Ekdeep_Singh_Lubana1;~Robert_P._Dick1;~Hidenori_Tanaka1;~Tim_Rockt\u00e4schel1;~Edward_Grefenstette1;~David_Krueger1", "gender": "M;M;M;M;;M;M;M", "homepage": "https://samyakjain0112.github.io/;https://robertkirk.github.io;https://ekdeepslubana.github.io/;http://robertdick.org/;https://sites.google.com/view/htanaka/home;http://egrefen.com/;https://mila.umontreal.ca/en/person/david-scott-krueger/;http://rockt.ai", "dblp": "249/4464.html;01/9684;228/2683;84/523.html;;http://dblp.uni-trier.de/pers/hd/g/Grefenstette:Edward;142/2741.html;43/11537", "google_scholar": "https://scholar.google.co.in/citations?hl=en;https://scholar.google.co.uk/citations?user=PL5KWdYAAAAJ;https://scholar.google.co.in/citations?user=OP7S3vsAAAAJ;;f_pWOGIAAAAJ;https://scholar.google.co.uk/citations?user=ezllEwMAAAAJ;https://scholar.google.ca/citations?user=5Uz70IoAAAAJ;https://scholar.google.co.uk/citations?user=mWBY8aIAAAAJ", "orcid": "0000-0003-3785-4782;;;;;;;", "linkedin": "samyak-jain-276738178/;;;;;;;rockt/", "or_profile": "~Samyak_Jain1;~Robert_Kirk1;~Ekdeep_Singh_Lubana1;~Robert_P._Dick1;~Hidenori_Tanaka1;~Edward_Grefenstette1;~David_Krueger1;~Tim_Rocktaeschel1", "aff": "Five AI;University College London;University of Michigan;University of Michigan;Physics & Informatics Lab, NTT Research, Inc.;Google DeepMind;University of Cambridge;Google DeepMind", "aff_domain": "five.ai;ucl.ac.uk;umich.edu;umich.edu;ntt-research.com;deepmind.com;cam.ac.uk;google.com", "position": "Intern;PhD student;PhD student;Full Professor;Senior Research Scientist;Principal Researcher;Assistant Professor;Senior Staff Research Scientist", "bibtex": "@inproceedings{\njain2024mechanistically,\ntitle={Mechanistically analyzing the effects of fine-tuning on procedurally defined tasks},\nauthor={Samyak Jain and Robert Kirk and Ekdeep Singh Lubana and Robert P. Dick and Hidenori Tanaka and Tim Rockt{\\\"a}schel and Edward Grefenstette and David Krueger},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=A0HKeKl4Nl}\n}", "github": "", "project": "", "reviewers": "GZdd;wEP2;eQAm", "pdf_size": 5401464, "rating": "6;6;8", "confidence": "4;4;4", "soundness": "3;2;4", "contribution": "3;3;3", "presentation": "2;2;3", "wc_summary": "21;78;51", "wc_strengths": "60;126;90", "wc_weaknesses": "402;630;253", "wc_questions": "7;6;19", "wc_review": "490;840;413", "wc_reply_reviewers": "0;28;63", "wc_reply_authors": "938;1616;2040", "reply_reviewers": "0;1;1", "reply_authors": "2;3;3", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 50.0, 23.280893453645632 ], "wc_strengths_avg": [ 92.0, 26.981475126464083 ], "wc_weaknesses_avg": [ 428.3333333333333, 155.0318963597133 ], "wc_questions_avg": [ 10.666666666666666, 5.90668171555645 ], "wc_review_avg": [ 581.0, 185.8189082592691 ], "wc_reply_reviewers_avg": [ 30.333333333333332, 25.77250904010361 ], "wc_reply_authors_avg": [ 1531.3333333333333, 453.8555815332548 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3773752858357865751&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "openreview": "https://openreview.net/forum?id=A0HKeKl4Nl", "pdf": "https://openreview.net/pdf?id=A0HKeKl4Nl", "email": "five.ai;ucl.ac.uk;umich.edu;umich.edu;ntt-research.com;deepmind.com;cam.ac.uk;google.com", "author_num": 8, "aff_unique_index": "0;1;2;2;3;4;5;4", "aff_unique_norm": "Five AI;University College London;University of Michigan;NTT Research, Inc.;Google;University of Cambridge", "aff_unique_dep": ";;;Physics & Informatics Lab;Google DeepMind;", "aff_unique_url": "https://www.five.ai;https://www.ucl.ac.uk;https://www.umich.edu;https://www.ntt-research.com;https://deepmind.com;https://www.cam.ac.uk", "aff_unique_abbr": "Five AI;UCL;UM;NTT Research;DeepMind;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;1;1;1;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Course Correcting Koopman Representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19276", "id": "A18gWgc5mi", "author_site": "Mahan Fathi, Clement Gehring, Jonathan Pilault, David Kanaa, Pierre-Luc Bacon, Ross Goroshin", "tldr": "", "abstract": "Koopman representations aim to learn features of nonlinear dynamical systems (NLDS) which lead to linear dynamics in the latent space. Theoretically, such features can be used to simplify many problems in modeling and control of NLDS. In this work we study autoencoder formulations of this problem, and different ways they can be used to model dynamics, specifically for future state prediction over long horizons. We discover several limitations of predicting future states in the latent space and propose an inference-time mechanism, which we refer to as Periodic Reencoding, for faithfully capturing long term dynamics. We justify this method both analytically and empirically via experiments in low and high dimensional NLDS.", "keywords": "Koopman;Autoencoders;Dynamical Systems;Sequence Modeling;Inference-time Methods;Planning;Unsupervised Learning;Representation Learning;Robotics", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/8bf353eea7668c39a57b740083ffac9e690d322e.pdf", "author": "Mahan Fathi;Clement Gehring;Jonathan Pilault;David Kanaa;Pierre-Luc Bacon;Ross Goroshin", "authorids": "~Mahan_Fathi1;~Clement_Gehring1;~Jonathan_Pilault1;~David_Kanaa1;~Pierre-Luc_Bacon1;~Ross_Goroshin1", "gender": "M;M;;;;", "homepage": "https://mahanfathi.github.io/;http://people.csail.mit.edu/gehring/;;;;", "dblp": ";131/5247;248/8053.html;;;", "google_scholar": ";KvX7mJUAAAAJ;https://scholar.google.com/citations?hl=en;;;EC4o-1oAAAAJ", "orcid": ";;;;;", "linkedin": "mahanfathi/;;jonathanpilault/;;;", "or_profile": "~Mahan_Fathi1;~Clement_Gehring1;~Jonathan_Pilault1;~David_Kanaa1;~Pierre-Luc_Bacon1;~Ross_Goroshin1", "aff": "Mila;Massachusetts Institute of Technology;Polytechnique Montreal;;;Google", "aff_domain": "mila.quebec;mit.edu;polymtl.ca;;;google.com", "position": "MS student;PhD student;PhD student;;;Research Scientist", "bibtex": "@inproceedings{\nfathi2024course,\ntitle={Course Correcting Koopman Representations},\nauthor={Mahan Fathi and Clement Gehring and Jonathan Pilault and David Kanaa and Pierre-Luc Bacon and Ross Goroshin},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=A18gWgc5mi}\n}", "github": "", "project": "", "reviewers": "f4vn;Fg8g;FYpR;Woag;QVMC", "pdf_size": 30922143, "rating": "6;6;6;8;8", "confidence": "2;3;2;3;4", "soundness": "3;3;3;4;4", "contribution": "2;2;3;4;3", "presentation": "3;3;3;4;3", "wc_summary": "422;57;70;54;119", "wc_strengths": "89;18;14;24;57", "wc_weaknesses": "510;138;156;33;368", "wc_questions": "141;26;100;24;3", "wc_review": "1162;239;340;135;547", "wc_reply_reviewers": "0;81;0;11;112", "wc_reply_authors": "1450;656;895;291;1591", "reply_reviewers": "0;1;0;1;3", "reply_authors": "3;2;3;1;4", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "contribution_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 144.4, 140.75027531056557 ], "wc_strengths_avg": [ 40.4, 28.653795560099887 ], "wc_weaknesses_avg": [ 241.0, 172.96704888504053 ], "wc_questions_avg": [ 58.8, 52.63990881451069 ], "wc_review_avg": [ 484.6, 365.02416358372767 ], "wc_reply_reviewers_avg": [ 40.8, 46.6964666757561 ], "wc_reply_authors_avg": [ 976.6, 486.0216456085058 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.6, 1.019803902718557 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7637626158259733, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16303568290070259858&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=A18gWgc5mi", "pdf": "https://openreview.net/pdf?id=A18gWgc5mi", "email": "mila.quebec;mit.edu;polymtl.ca;;;google.com", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Mila;Massachusetts Institute of Technology;Polytechnique Montreal;Google", "aff_unique_dep": "Quebec Artificial Intelligence Institute;;;Google", "aff_unique_url": "https://mila.quebec;https://web.mit.edu;https://www.polymtl.ca;https://www.google.com", "aff_unique_abbr": "Mila;MIT;PolyMTL;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Montreal;Mountain View", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Canada;United States" }, { "id": "A1z0JnxnGp", "title": "Power Characterization of Noisy Quantum Kernels", "track": "main", "status": "Reject", "tldr": "", "abstract": "Quantum kernel methods have been widely recognized as one of promising quantum machine learning algorithms that have potential to achieve quantum advantages. In this paper, we theoretically characterize the power of noisy quantum kernels and demonstrate that under global depolarization noise, for different input data the predictions of the optimal hypothesis inferred by the noisy quantum kernel approximately concentrate towards some fixed value. In particular, we depict the convergence rate in terms of the strength of quantum noise, the size of training samples, the number of qubits, the number of layers affected by quantum noises, as well as the number of measurement shots. Our results show that noises may make quantum kernel methods to only have poor prediction capability, even when the generalization error is small. Thus, we provide a crucial warning to employ noisy quantum kernel methods for quantum computation and the theoretical results can also serve as guidelines when developing practical quantum kernel algorithms for achieving quantum advantages.", "keywords": "quantum kernel;quantum machine learning;prediction capability;noisy quantum circuit", "primary_area": "metric learning, kernel learning, and sparse coding", "supplementary_material": "", "author": "Yabo Wang;Bo Qi;Xin Wang;Tongliang Liu;Daoyi Dong", "authorids": "~Yabo_Wang1;~Bo_Qi1;~Xin_Wang75;~Tongliang_Liu1;~Daoyi_Dong1", "gender": ";M;M;M;M", "homepage": "https://github.com/yabowang23;;https://sheffieldwang.github.io/;https://tongliang-liu.github.io/;https://profiles.uts.edu.au/Daoyi.Dong", "dblp": ";;;150/6667;27/3317", "google_scholar": ";;;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;https://scholar.google.com.au/citations?hl=en", "orcid": ";0000-0002-4504-0124;;;0000-0002-7425-3559", "linkedin": ";;;;", "or_profile": "~Yabo_Wang1;~Bo_Qi1;~Xin_Wang75;~Tongliang_Liu1;~Daoyi_Dong1", "aff": "Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Academy of Mathematics and Systems Science, Chinese Academy of Sciences;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Mohamed bin Zayed University of Artificial Intelligence;Australian National University", "aff_domain": "amss.ac.cn;amss.ac.cn;amss.ac.cn;mbzuai.ac.ae;anu.edu.au", "position": "PhD student;Associate Professor;MS student;Affiliated Associate Professor;Full Professor", "bibtex": "@misc{\nwang2024power,\ntitle={Power Characterization of Noisy Quantum Kernels},\nauthor={Yabo Wang and Bo Qi and Xin Wang and Tongliang Liu and Daoyi Dong},\nyear={2024},\nurl={https://openreview.net/forum?id=A1z0JnxnGp}\n}", "github": "", "project": "", "reviewers": "8JA5;XjgB;5XBn;VA13;M7Gm", "site": "https://openreview.net/forum?id=A1z0JnxnGp", "pdf_size": 508969, "rating": "3;5;5;5;6", "confidence": "3;4;4;4;3", "soundness": "2;3;3;2;4", "contribution": "2;2;2;2;2", "presentation": "3;2;3;4;3", "wc_summary": "60;138;82;43;104", "wc_strengths": "41;79;74;56;88", "wc_weaknesses": "127;129;131;108;124", "wc_questions": "38;162;33;45;38", "wc_review": "266;508;320;252;354", "wc_reply_reviewers": "57;40;68;172;151", "wc_reply_authors": "943;1662;1231;1259;598", "reply_reviewers": "1;1;1;1;1", "reply_authors": "4;4;4;2;3", "rating_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 85.4, 33.368248380758615 ], "wc_strengths_avg": [ 67.6, 16.906803364326446 ], "wc_weaknesses_avg": [ 123.8, 8.23164625090267 ], "wc_questions_avg": [ 63.2, 49.547552916365106 ], "wc_review_avg": [ 340.0, 91.6951470907812 ], "wc_reply_reviewers_avg": [ 97.6, 53.34641506230761 ], "wc_reply_authors_avg": [ 1138.6, 354.25674305508994 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.4, 0.8 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13191641656876513006&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Chinese Academy of Sciences;Mohamed bin Zayed University of Artificial Intelligence;Australian National University", "aff_unique_dep": "Academy of Mathematics and Systems Science;;", "aff_unique_url": "http://www.cas.cn;https://mbzuai.ac.ae;https://www.anu.edu.au", "aff_unique_abbr": "CAS;MBZUAI;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "China;United Arab Emirates;Australia" }, { "id": "A2KKgcYYDB", "title": "Global Convergence Rate of Deep Equilibrium Models with General Activations", "track": "main", "status": "Reject", "tldr": "", "abstract": "In a recent paper, Ling et al. investigated the over-parametrized Deep Equilibrium Model (DEQ) with ReLU activation. They proved that the gradient descent converges to a globally optimal solution at a linear convergence rate for the quadratic loss function. This paper shows that this fact still holds for DEQs with any general activation that has bounded first and second derivatives. Since the new activation function is generally non-linear, bounding the least eigenvalue of the Gram matrix of the equilibrium point is particularly challenging. To accomplish this task, we need to create a novel population Gram matrix and develop a new form of dual activation with Hermite polynomial expansion.", "keywords": "Deep Learning;Deep Equilibrium Model;Gradient Descent Convergence;Hermite Polynomial Expansion", "primary_area": "learning theory", "supplementary_material": "", "author": "Lan V. Truong", "authorids": "~Lan_V._Truong1", "gender": "M", "homepage": "https://sites.google.com/site/truongvinhlan/", "dblp": "91/11265.html", "google_scholar": "BCoSKrQAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Lan_Vinh_Truong1", "aff": "University of Essex", "aff_domain": "essex.ac.uk", "position": "Lecturer", "bibtex": "@misc{\ntruong2024global,\ntitle={Global Convergence Rate of Deep Equilibrium Models with General Activations},\nauthor={Lan V. Truong},\nyear={2024},\nurl={https://openreview.net/forum?id=A2KKgcYYDB}\n}", "github": "", "project": "", "reviewers": "pmW6;fu3L;ubP5;SDT1", "site": "https://openreview.net/forum?id=A2KKgcYYDB", "pdf_size": 430501, "rating": "1;3;5;6", "confidence": "2;5;4;3", "soundness": "2;3;3;3", "contribution": "1;2;2;2", "presentation": "1;2;2;2", "wc_summary": "27;47;142;76", "wc_strengths": "11;92;123;51", "wc_weaknesses": "178;149;444;98", "wc_questions": "39;2;50;16", "wc_review": "255;290;759;241", "wc_reply_reviewers": "588;0;380;9", "wc_reply_authors": "669;490;1261;170", "reply_reviewers": "1;0;3;1", "reply_authors": "1;1;4;1", "rating_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 43.47988040461933 ], "wc_strengths_avg": [ 69.25, 42.22780482099442 ], "wc_weaknesses_avg": [ 217.25, 134.01002760987703 ], "wc_questions_avg": [ 26.75, 18.833148966649205 ], "wc_review_avg": [ 386.25, 215.9460291369119 ], "wc_reply_reviewers_avg": [ 244.25, 250.79511059827303 ], "wc_reply_authors_avg": [ 647.5, 396.75464710573965 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.29111125486979095, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5049746081414068337&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Essex", "aff_unique_dep": "", "aff_unique_url": "https://www.essex.ac.uk", "aff_unique_abbr": "Essex", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Semantic Flow: Learning Semantic Fields of Dynamic Scenes from Monocular Videos", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19275", "id": "A2mRcRyGdl", "author_site": "Fengrui Tian, Yueqi Duan, Angtian Wang, Jianfei Guo, Shaoyi Du", "tldr": "", "abstract": "In this work, we pioneer Semantic Flow, a neural semantic representation of dynamic scenes from monocular videos. In contrast to previous NeRF methods that reconstruct dynamic scenes from the colors and volume densities of individual points, Semantic Flow learns semantics from continuous flows that contain rich 3D motion information. As there is 2D-to-3D ambiguity problem in the viewing direction when extracting 3D flow features from 2D video frames, we consider the volume densities as opacity priors that describe the contributions of flow features to the semantics on the frames. More specifically, we first learn a flow network to predict flows in the dynamic scene, and propose a flow feature aggregation module to extract flow features from video frames. Then, we propose a flow attention module to extract motion information from flow features, which is followed by a semantic network to output semantic logits of flows. We integrate the logits with\nvolume densities in the viewing direction to supervise the flow features with semantic labels on video frames. Experimental results show that our model is able to learn from multiple dynamic scenes and supports a series of new tasks such as instance-level scene editing, semantic completions, dynamic scene tracking and semantic adaption on novel scenes.", "keywords": "3D vision;NeRF;semantic understanding", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/a151f48e53dbd76f25d3817e0b2f656802151425.zip", "author": "Fengrui Tian;Yueqi Duan;Angtian Wang;Jianfei Guo;Shaoyi Du", "authorids": "~Fengrui_Tian1;~Yueqi_Duan1;~Angtian_Wang2;~Jianfei_Guo1;~Shaoyi_Du1", "gender": "M;M;M;M;", "homepage": "https://tianfr.github.io/;https://duanyueqi.github.io/;https://angtianwang.github.io/;https://ventusff.github.io;", "dblp": "336/7668;168/8373;;305/7388.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;qDseo3cAAAAJ;YR7re-cAAAAJ;MJb2_wYAAAAJ;", "orcid": "0000-0002-9577-5276;;;0000-0002-5838-679X;", "linkedin": ";;;;", "or_profile": "~Fengrui_Tian1;~Yueqi_Duan1;~Angtian_Wang2;~Jianfei_Guo1;~Shaoyi_Du1", "aff": "Xi'an Jiaotong University;Tsinghua University;Johns Hopkins University;Shanghai Artificial Intelligence Laboratory;", "aff_domain": "xjtu.edu.cn;tsinghua.edu.cn;jhu.edu;pjlab.org.cn;", "position": "MS student;Assistant Professor;PhD student;Researcher;", "bibtex": "@inproceedings{\ntian2024semantic,\ntitle={Semantic Flow: Learning Semantic Fields of Dynamic Scenes from Monocular Videos},\nauthor={Fengrui Tian and Yueqi Duan and Angtian Wang and Jianfei Guo and Shaoyi Du},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=A2mRcRyGdl}\n}", "github": "", "project": "", "reviewers": "QtRh;xtsS;5VQj", "pdf_size": 16230649, "rating": "5;6;8", "confidence": "4;4;3", "soundness": "3;3;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "126;69;54", "wc_strengths": "136;65;66", "wc_weaknesses": "283;180;81", "wc_questions": "80;20;62", "wc_review": "625;334;263", "wc_reply_reviewers": "91;149;0", "wc_reply_authors": "1370;1129;348", "reply_reviewers": "1;3;0", "reply_authors": "4;5;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.0, 31.016124838541646 ], "wc_strengths_avg": [ 89.0, 33.23652609203716 ], "wc_weaknesses_avg": [ 181.33333333333334, 82.47154391398014 ], "wc_questions_avg": [ 54.0, 25.13961017995307 ], "wc_review_avg": [ 407.3333333333333, 156.6191417278091 ], "wc_reply_reviewers_avg": [ 80.0, 61.32427469335994 ], "wc_reply_authors_avg": [ 949.0, 436.2117222939643 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 3.3333333333333335, 1.699673171197595 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18433628933461041863&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=A2mRcRyGdl", "pdf": "https://openreview.net/pdf?id=A2mRcRyGdl", "email": "xjtu.edu.cn;tsinghua.edu.cn;jhu.edu;pjlab.org.cn;", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Xi'an Jiao Tong University;Tsinghua University;Johns Hopkins University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.tsinghua.edu.cn;https://www.jhu.edu;http://www.shailab.org/", "aff_unique_abbr": "XJTU;THU;JHU;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "A4YlfnbaSD", "title": "Overcoming the Stability Gap in Continual Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "In many real-world applications, deep neural networks are retrained from scratch as a dataset grows in size. Given the computational expense for retraining networks, it has been argued that continual learning could make updating networks more efficient. An obstacle to achieving this goal is the stability gap, which refers to an observation that when updating on new data, performance on previously learned data degrades before recovering. Addressing this problem would enable learning new data with fewer network updates, resulting in increased computational efficiency. We study how to mitigate the stability gap. We test a variety of hypotheses to understand why the stability gap occurs. This leads us to discover a method that vastly reduces this gap. In large-scale class incremental learning experiments, we are able to significantly reduce the number of network updates needed for continual learning. Our work has the potential to advance the state-of-the-art in continual learning for real-world applications along with reducing the carbon footprint required to maintain updated neural networks.", "keywords": "Continual Learning;Catastrophic Forgetting;Stability Gap;Stability-Plasticity Dilemma", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/15ba233b64c3c5ce3183ed3d71147ce77a34f3fd.pdf", "author": "Md Yousuf Harun;Christopher Kanan", "authorids": "~Md_Yousuf_Harun1;~Christopher_Kanan1", "gender": "M;M", "homepage": "https://yousuf907.github.io;https://chriskanan.com/", "dblp": ";14/8653", "google_scholar": "https://scholar.google.com/citations?hl=en;jMxZjBoAAAAJ", "orcid": "0000-0001-6544-6159;0000-0002-6412-995X", "linkedin": "md-yousuf-harun-71748572;chriskanan/", "or_profile": "~Md_Yousuf_Harun1;~Christopher_Kanan1", "aff": "Rochester Institute of Technology;University of Rochester", "aff_domain": "rit.edu;rochester.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nharun2024overcoming,\ntitle={Overcoming the Stability Gap in Continual Learning},\nauthor={Md Yousuf Harun and Christopher Kanan},\nyear={2024},\nurl={https://openreview.net/forum?id=A4YlfnbaSD}\n}", "github": "", "project": "", "reviewers": "EKNY;9Qg6;wwBV;2UBD", "site": "https://openreview.net/forum?id=A4YlfnbaSD", "pdf_size": 5703220, "rating": "3;5;5;5", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "contribution": "2;2;2;2", "presentation": "3;3;2;3", "wc_summary": "185;143;101;83", "wc_strengths": "26;87;41;64", "wc_weaknesses": "205;351;276;105", "wc_questions": "115;47;194;138", "wc_review": "531;628;612;390", "wc_reply_reviewers": "0;221;0;0", "wc_reply_authors": "764;874;1137;1073", "reply_reviewers": "0;1;0;0", "reply_authors": "1;3;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 128.0, 39.45883931389772 ], "wc_strengths_avg": [ 54.5, 23.13547060251855 ], "wc_weaknesses_avg": [ 234.25, 90.73966883342698 ], "wc_questions_avg": [ 123.5, 52.690131903421914 ], "wc_review_avg": [ 540.25, 94.21882773628634 ], "wc_reply_reviewers_avg": [ 55.25, 95.69580711818047 ], "wc_reply_authors_avg": [ 962.0, 149.91164064207956 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5029589355732838682&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Rochester Institute of Technology;University of Rochester", "aff_unique_dep": ";", "aff_unique_url": "https://www.rit.edu;https://www.rochester.edu", "aff_unique_abbr": "RIT;U of R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Dirichlet-based Per-Sample Weighting by Transition Matrix for Noisy Label Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19274", "id": "A4mJuFRMN8", "author_site": "HeeSun Bae, Seungjae Shin, Byeonghu Na, Il-chul Moon", "tldr": "", "abstract": "For learning with noisy labels, the transition matrix, which explicitly models the relation between noisy label distribution and clean label distribution, has been utilized to achieve the statistical consistency of either the classifier or the risk. Previous researches have focused more on how to estimate this transition matrix well, rather than how to utilize it. We propose good utilization of the transition matrix is crucial and suggest a new utilization method based on resampling, coined RENT. Specifically, we first demonstrate current utilizations can have potential limitations for implementation. As an extension to Reweighting, we suggest the Dirichlet distribution-based per-sample Weight Sampling (DWS) framework, and compare reweighting and resampling under DWS framework. With the analyses from DWS, we propose RENT, a REsampling method with Noise Transition matrix. Empirically, RENT consistently outperforms existing transition matrix utilization methods, which includes reweighting, on various benchmark datasets. Our code is available at https://github.com/BaeHeeSun/RENT.", "keywords": "learning with noisy label;noisy label classification;Transition matrix;Dirichlet distribution;Importance sampling", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "HeeSun Bae;Seungjae Shin;Byeonghu Na;Il-chul Moon", "authorids": "~HeeSun_Bae1;~Seungjae_Shin1;~Byeonghu_Na1;~Il-chul_Moon1", "gender": "F;M;M;", "homepage": ";https://sites.google.com/view/seungjae-shin;https://sites.google.com/view/byeonghu-na;", "dblp": ";29/551;276/5100;", "google_scholar": "https://scholar.google.co.kr/citations?user=D9U_ohsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.kr/citations?user=mJoqpmEAAAAJ;", "orcid": "0000-0002-9986-0945;;0000-0003-3463-2674;", "linkedin": "heesun-bae-8a4b8523a/;seungjae-shin-hoodie/;byeonghu-na-17942120b/;", "or_profile": "~HeeSun_Bae1;~Seungjae_Shin1;~Byeonghu_Na1;~Il-chul_Moon1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;", "position": "PhD student;PhD student;PhD student;", "bibtex": "@inproceedings{\nbae2024dirichletbased,\ntitle={Dirichlet-based Per-Sample Weighting by Transition Matrix for Noisy Label Learning},\nauthor={HeeSun Bae and Seungjae Shin and Byeonghu Na and Il-chul Moon},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=A4mJuFRMN8}\n}", "github": "", "project": "", "reviewers": "z7WX;4M7K;KTbE;HW3A;Epey;hEe7", "pdf_size": 8461139, "rating": "5;6;6;6;6;6", "confidence": "3;4;4;3;3;3", "soundness": "3;3;3;2;3;2", "contribution": "2;3;2;2;3;3", "presentation": "2;2;3;3;3;2", "wc_summary": "64;41;94;62;77;97", "wc_strengths": "61;47;54;55;31;136", "wc_weaknesses": "172;127;100;105;186;50", "wc_questions": "54;22;9;140;28;64", "wc_review": "351;237;257;362;322;347", "wc_reply_reviewers": "0;30;45;84;38;0", "wc_reply_authors": "1757;587;613;2669;327;1119", "reply_reviewers": "0;1;1;1;1;0", "reply_authors": "4;3;2;6;2;4", "rating_avg": [ 5.833333333333333, 0.372677996249965 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.5, 19.397164741270824 ], "wc_strengths_avg": [ 64.0, 33.54598833442433 ], "wc_weaknesses_avg": [ 123.33333333333333, 45.766314638121735 ], "wc_questions_avg": [ 52.833333333333336, 43.229683731847445 ], "wc_review_avg": [ 312.6666666666667, 48.29308669180531 ], "wc_reply_reviewers_avg": [ 32.833333333333336, 28.742631906088363 ], "wc_reply_authors_avg": [ 1178.6666666666667, 811.8402278500096 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.5, 1.3844373104863459 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.31622776601683783, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hAGhZb9SFH4J:scholar.google.com/&scioq=Dirichlet-based+Per-Sample+Weighting+by+Transition+Matrix+for+Noisy+Label+Learning&hl=en&as_sdt=0,5", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=A4mJuFRMN8", "pdf": "https://openreview.net/pdf?id=A4mJuFRMN8", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "A5nLEfjhJW", "title": "SHARCS: SHARed Concept Space for\\\\Explainable Multimodal Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multimodal learning is an essential paradigm for addressing complex real-world problems, where individual data modalities are typically insufficient for accurately solving a given modelling task. While various deep learning approaches have successfully addressed these challenges, their reasoning process is often opaque; limiting the capabilities for a principled explainable cross-modal analysis and any domain-expert intervention. In this paper, we introduce SHARCS (SHARed Concept Space) -- a novel concept-based approach for explainable multimodal learning. SHARCS learns and maps interpretable concepts from different heterogeneous modalities into a single unified concept-manifold, which leads to an intuitive projection of semantically similar cross-modal concepts. We demonstrate that such an approach can lead to inherently explainable task predictions while also improving downstream predictive performance. Moreover, we show that SHARCS can operate and significantly outperform other approaches in practically significant scenarios, such as retrieval of missing modalities and cross-modal explanations. Our approach is model agnostic and easily applicable to different types (and number) of modalities, thus advancing the development of effective, interpretable, and trustworthy multimodal approaches.", "keywords": "Explainable AI;Multimodal Learning;Concept Based Models", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "/attachment/77fa16b490d1cb28d040a45c5a5db8e4a85479d8.zip", "author": "Gabriele Dominici;Pietro Barbiero;Lucie Charlotte Magister;Pietro Lio;Nikola Simidjievski", "authorids": "~Gabriele_Dominici1;~Pietro_Barbiero1;~Lucie_Charlotte_Magister1;~Pietro_Lio1;~Nikola_Simidjievski1", "gender": "M;M;F;M;Unspecified", "homepage": ";http://www.pietrobarbiero.eu/;;https://www.cst.cam.ac.uk/people/pl219;https://simidjievskin.github.io/", "dblp": "351/0657;238/7860;298/1032;l/PietroLio.html;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.it/citations?user=4gbToQoAAAAJ;do6o-rYAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ;", "orcid": "0009-0009-1955-0778;0000-0003-3155-2564;0000-0003-3499-5475;0000-0002-0540-5053;", "linkedin": "gabriele-dominici-677bb6161/;;;;", "or_profile": "~Gabriele_Dominici1;~Pietro_Barbiero1;~Lucie_Charlotte_Magister1;~Pietro_Lio1;~Nikola_Simidjievski1", "aff": "Universita della Svizzera Italiana;Universita della Svizzera Italiana;Apple;University of Cambridge;University of Cambridge", "aff_domain": "usi.ch;usi.ch;apple.com;cam.ac.uk;cam.ac.uk", "position": "PhD student;Postdoc;ML Research Intern;Full Professor;Principal Researcher", "bibtex": "@misc{\ndominici2024sharcs,\ntitle={{SHARCS}: {SHAR}ed Concept Space for{\\textbackslash}{\\textbackslash}Explainable Multimodal Learning},\nauthor={Gabriele Dominici and Pietro Barbiero and Lucie Charlotte Magister and Pietro Lio and Nikola Simidjievski},\nyear={2024},\nurl={https://openreview.net/forum?id=A5nLEfjhJW}\n}", "github": "", "project": "", "reviewers": "w4Zt;yLh6;D8iH", "site": "https://openreview.net/forum?id=A5nLEfjhJW", "pdf_size": 11778429, "rating": "3;5;6", "confidence": "4;4;3", "soundness": "2;3;2", "contribution": "2;2;3", "presentation": "3;2;3", "wc_summary": "61;51;62", "wc_strengths": "38;53;73", "wc_weaknesses": "305;136;123", "wc_questions": "3;38;28", "wc_review": "407;278;286", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 58.0, 4.96655480858378 ], "wc_strengths_avg": [ 54.666666666666664, 14.337208778404378 ], "wc_weaknesses_avg": [ 188.0, 82.90154803540563 ], "wc_questions_avg": [ 23.0, 14.719601443879744 ], "wc_review_avg": [ 323.6666666666667, 59.01600536201081 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15506685760797823300&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "Universita della Svizzera Italiana;Apple;University of Cambridge", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.usi.ch;https://www.apple.com;https://www.cam.ac.uk", "aff_unique_abbr": "USI;Apple;Cambridge", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;1;2;2", "aff_country_unique": "Switzerland;United States;United Kingdom" }, { "id": "A6juYCULJO", "title": "Abstractive Summarization through the PRISM of Decoding Strategies", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the realm of natural language generation, abstractive summarization (AS) is at the center of an unparalleled evolution driven by transformer-based language models (LMs). However, the significance of decoding strategies is often neglected despite their influence on the generated summaries. Given the abundance of token selection heuristics and their accompanying hyperparameters, the community needs directions to steer well-founded decisions based on the task and the target metrics at hand. To fill this gap, we comparatively assess the effectiveness and efficiency of decoding-time techniques for short, long, and multi-document AS. We explore more than 2500 combinations of 3 widely used million-scale autoregressive encoder-decoder models, 6 datasets, and 9 decoding settings. Our findings shed light on the field, demonstrating that optimized decoding choices can yield substantial performance enhancements. In addition to human evaluation, we quantitatively measure effects using 10 automatic metrics, including dimensions such as semantic similarity, factuality, compression, redundancy, and carbon footprint. We introduce PRISM, a first-of-its-kind dataset that pairs AS gold input-output examples with LM predictions under a wide array of decoding options.", "keywords": "Decoding Strategies;Abstractive Summarization;Short Document Summarization;Long Document Summarization;Multi-Document Summarization;Natural Language Generation;Autoregressive Language Models;Datasets and Benchmarks", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Giacomo Frisoni;Luca Ragazzi;David Cohen;Gianluca Moro;Antonella Carbonaro;Claudio Sartori", "authorids": "~Giacomo_Frisoni1;~Luca_Ragazzi1;~David_Cohen4;~Gianluca_Moro1;~Antonella_Carbonaro1;~Claudio_Sartori1", "gender": "M;M;M;M;F;M", "homepage": ";https://www.unibo.it/sitoweb/l.ragazzi;;https://www.unibo.it/sitoweb/gianluca.moro/en;https://www.unibo.it/sitoweb/antonella.carbonaro;https://www.unibo.it/sitoweb/claudio.sartori/en", "dblp": "271/1231;https://dblp.uni-trier.de/pid/320/5349;;m/GianlucaMoro;90/6831;s/ClaudioSartori.html", "google_scholar": "BEZlFiAAAAAJ;BmaBHcAAAAAJ;;QOfSGKkAAAAJ;TsJoqIUAAAAJ;yGZuBaAAAAAJ", "orcid": "0000-0002-9845-0231;0000-0003-3574-9962;;;0000-0002-3890-4852;0000-0003-4535-1026", "linkedin": "giacomo-frisoni-3b97a8129/;luca-ragazzi-38557316b/;david-cohen96;;;claudiosartori/?originalSubdomain=it", "or_profile": "~Giacomo_Frisoni1;~Luca_Ragazzi1;~David_Cohen4;~Gianluca_Moro1;~Antonella_Carbonaro1;~Claudio_Sartori1", "aff": "University of Bologna;University of Bologna;University of Bologna;DISI - University of Bologna;University of Bologna;University of Bologna", "aff_domain": "unibo.it;unibo.it;unibo.it;unibo.it;unibo.it;unibo.it", "position": "PhD student;Postdoc;MS student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nfrisoni2024abstractive,\ntitle={Abstractive Summarization through the {PRISM} of Decoding Strategies},\nauthor={Giacomo Frisoni and Luca Ragazzi and David Cohen and Gianluca Moro and Antonella Carbonaro and Claudio Sartori},\nyear={2024},\nurl={https://openreview.net/forum?id=A6juYCULJO}\n}", "github": "", "project": "", "reviewers": "BSXa;8Gmg;rLZb;Eedp", "site": "https://openreview.net/forum?id=A6juYCULJO", "pdf_size": 6564224, "rating": "5;5;6;8", "confidence": "4;4;3;3", "soundness": "3;2;2;3", "contribution": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "31;130;94;49", "wc_strengths": "19;271;35;50", "wc_weaknesses": "75;498;26;147", "wc_questions": "1;308;35;25", "wc_review": "126;1207;190;271", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "530;1826;445;595", "reply_reviewers": "0;0;0;0", "reply_authors": "1;3;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.0, 38.71046370169182 ], "wc_strengths_avg": [ 93.75, 102.92078264374013 ], "wc_weaknesses_avg": [ 186.5, 184.9222809723047 ], "wc_questions_avg": [ 92.25, 125.17462802021822 ], "wc_review_avg": [ 448.5, 440.92431323300826 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 849.0, 566.5734727288244 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8164965809277259, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9682898466353828177&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Bologna", "aff_unique_dep": "", "aff_unique_url": "https://www.unibo.it", "aff_unique_abbr": "Unibo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Italy" }, { "id": "A6kK5e3DhR", "title": "Controllable Data Generation via Iterative Data-Property Mutual Mappings", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deep generative models have been widely used for their ability to generate realistic data samples in various areas, such as images, molecules, text, and speech. One major goal of data generation is controllability, namely to generate new data with desired properties. Despite growing interest in the area of controllable generation, significant challenges still remain, including 1) Disentangling desired properties with unrelated latent variables, 2) out-of-distribution property control, and 3) objective optimization for out-of-distribution property control. To address these challenges, in this paper, we propose a general framework to enhance VAE-based data generators with property controllability and disentanglement ensure. Our proposed objective can be optimized on both data seen and unseen in the training set. We propose a training procedure to train the objective in a semi-supervised manner by iteratively conducting mutual mappings between the data and properties. The proposed framework is implemented on four VAE-based controllable generators to evaluate its performance on property error, disentanglement performance, generation quality, and training time. The results indicate that our proposed framework enables more precise control over the properties of generated samples in a short training time, ensuring the disentanglement stated above and keeping the validity of the generated samples.", "keywords": "Controllable data generation;generative models", "primary_area": "generative models", "supplementary_material": "/attachment/ff639ecc52639976ac8a50ef6ff465d365352768.pdf", "author": "Bo Pan;Muran Qin;Shiyu Wang;Yifei Zhang;Liang Zhao", "authorids": "~Bo_Pan2;~Muran_Qin1;~Shiyu_Wang2;~Yifei_Zhang10;~Liang_Zhao6", "gender": ";M;M;M;M", "homepage": "https://pb0316.github.io/;;https://sites.google.com/view/about-shiyuwang;https://yifeizhangcs.github.io/;https://cs.emory.edu/~lzhao41/", "dblp": ";;;55/5266-6;63/5422-2", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;y5FkC7cAAAAJ;qnvyqtwAAAAJ", "orcid": "0009-0005-7501-7581;;;0009-0004-6136-733X;0000-0002-2648-9989", "linkedin": "bo-pan;;shiyu-wang-647a7b91/;yifei-jimmy-zhang/;", "or_profile": "~Bo_Pan2;~Muran_Qin1;~Shiyu_Wang2;~Yifei_Zhang10;~Liang_Zhao6", "aff": "Emory University;University of California, San Diego;Emory University;Emory University;Emory University", "aff_domain": "emory.edu;ucsd.edu;emory.edu;emory.edu;emory.edu", "position": "PhD student;MS student;PhD student;PhD student;Associate Professor", "bibtex": "@misc{\npan2024controllable,\ntitle={Controllable Data Generation via Iterative Data-Property Mutual Mappings},\nauthor={Bo Pan and Muran Qin and Shiyu Wang and Yifei Zhang and Liang Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=A6kK5e3DhR}\n}", "github": "", "project": "", "reviewers": "rFTY;Umbq;YxqT;UtY6", "site": "https://openreview.net/forum?id=A6kK5e3DhR", "pdf_size": 3883, "rating": "1;3;5;5", "confidence": "4;4;4;4", "soundness": "1;1;3;3", "contribution": "1;2;2;2", "presentation": "1;3;3;2", "wc_summary": "95;131;81;120", "wc_strengths": "93;44;80;74", "wc_weaknesses": "167;101;263;566", "wc_questions": "53;42;73;5", "wc_review": "408;318;497;765", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 1.0 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 106.75, 19.778460506318485 ], "wc_strengths_avg": [ 72.75, 17.963504669189696 ], "wc_weaknesses_avg": [ 274.25, 178.01878412122693 ], "wc_questions_avg": [ 43.25, 24.722206616724165 ], "wc_review_avg": [ 497.0, 167.17206704470695 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AB6W9j4pEXkJ:scholar.google.com/&scioq=Controllable+Data+Generation+via+Iterative+Data-Property+Mutual+Mappings&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Emory University;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.emory.edu;https://www.ucsd.edu", "aff_unique_abbr": "Emory;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hyper Evidential Deep Learning to Quantify Composite Classification Uncertainty", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19273", "id": "A7t7z6g6tM", "author_site": "Changbin Li, Kangshuo Li, Yuzhe Ou, Lance Kaplan, Audun J\u00f8sang, Jin-Hee Cho, DONG HYUN JEONG, Feng Chen", "tldr": "", "abstract": "Deep neural networks (DNNs) have been shown to perform well on exclusive, multi-class classification tasks. However, when different classes have similar visual features, it becomes challenging for human annotators to differentiate them. When an image is ambiguous, such as a blurry one where an annotator can't distinguish between a husky and a wolf, it may be labeled with both classes: {husky, wolf}. This scenario necessitates the use of composite set labels. \nIn this paper, we propose a novel framework called Hyper-Evidential Neural Network (HENN) that explicitly models predictive uncertainty caused by composite set labels in training data in the context of the belief theory called Subjective Logic (SL).\nBy placing a Grouped Dirichlet distribution on the class probabilities, we treat predictions of a neural network as parameters of hyper-subjective opinions and learn the network that collects both single and composite evidence leading to these hyper-opinions by a deterministic DNN from data.\nWe introduce a new uncertainty type called vagueness originally designed for hyper-opinions in SL to quantify composite classification uncertainty for DNNs.\nOur experiments prove that HENN outperforms its state-of-the-art counterparts based on four image datasets.\nThe code and datasets are available at: https://shorturl.at/dhoqx.", "keywords": "Evidential Neural Network;hyperdomain;vagueness", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/6ef067847995e4802cedd24f94bbb3467d122878.pdf", "author": "Changbin Li;Kangshuo Li;Yuzhe Ou;Lance M. Kaplan;Audun J\u00f8sang;Jin-Hee Cho;DONG HYUN JEONG;Feng Chen", "authorids": "~Changbin_Li1;~Kangshuo_Li1;~Yuzhe_Ou1;~Lance_M._Kaplan1;~Audun_J\u00f8sang1;~Jin-Hee_Cho1;~DONG_HYUN_JEONG1;~Feng_Chen7", "gender": "M;M;M;M;M;F;;M", "homepage": ";https://www.linkedin.com/in/kangshuo-li-10359a218/;;;https://www.mn.uio.no/ifi/english/people/aca/josang/;https://people.cs.vt.edu/~jicho/;;https://personal.utdallas.edu/~fxc190007/", "dblp": "125/9302;;250/9325.html;47/4107;;;;21/3047-1", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com/citations?hl=en;obew8e0AAAAJ;https://scholar.google.no/citations?user=wduTe90AAAAJ;wToVkEUAAAAJ;;KOQ-SSYAAAAJ", "orcid": ";;0000-0001-8740-4531;0000-0002-3627-4471;0000-0001-6337-2264;;0000-0001-5271-293X;", "linkedin": "changbin-li/;;;;audunjosang/;;;", "or_profile": "~Changbin_Li1;~Kangshuo_Li1;~Yuzhe_Ou1;~Lance_M._Kaplan1;~Audun_J\u00f8sang1;~Jin-Hee_Cho1;~DONG_HYUN_JEONG1;~Feng_Chen7", "aff": "University of Texas, Dallas;UT-Dallas;The University of Texas at Dallas;US DEVCOM Army Research Laboratory ;University of Oslo;Virginia Polytechnic Institute and State University;University of the District of Columbia;University of Texas, Dallas", "aff_domain": "utdallas.edu;cs.utdallas.edu;utdallas.edu;army.mil;uio.no;vt.edu;udc.edu;utdallas.edu", "position": "PhD student;PhD student;PhD student;Principal Researcher;Full Professor;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nli2024hyper,\ntitle={Hyper Evidential Deep Learning to Quantify Composite Classification Uncertainty},\nauthor={Changbin Li and Kangshuo Li and Yuzhe Ou and Lance M. Kaplan and Audun J{\\o}sang and Jin-Hee Cho and DONG HYUN JEONG and Feng Chen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=A7t7z6g6tM}\n}", "github": "", "project": "", "reviewers": "3vp1;AQu8;Gzpb;RL9p", "pdf_size": 7365416, "rating": "6;6;6;6", "confidence": "3;3;4;2", "soundness": "3;3;3;2", "contribution": "3;2;3;2", "presentation": "3;3;3;2", "wc_summary": "79;218;57;61", "wc_strengths": "34;186;95;102", "wc_weaknesses": "69;704;124;206", "wc_questions": "7;52;44;71", "wc_review": "189;1160;320;440", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "336;2573;678;1969", "reply_reviewers": "0;0;0;0", "reply_authors": "1;4;2;4", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.75, 66.48073029081435 ], "wc_strengths_avg": [ 104.25, 54.10348879693434 ], "wc_weaknesses_avg": [ 275.75, 252.01029244854266 ], "wc_questions_avg": [ 43.5, 23.243278598338918 ], "wc_review_avg": [ 527.25, 375.9490490744723 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1389.0, 915.5034134289178 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9018239966282149804&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=A7t7z6g6tM", "pdf": "https://openreview.net/pdf?id=A7t7z6g6tM", "email": "utdallas.edu;cs.utdallas.edu;utdallas.edu;army.mil;uio.no;vt.edu;udc.edu;utdallas.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;2;3;4;0", "aff_unique_norm": "University of Texas at Dallas;US Army Research Laboratory;University of Oslo;Virginia Tech;University of the District of Columbia", "aff_unique_dep": ";DEVCOM;;;", "aff_unique_url": "https://www.utdallas.edu;https://www.arl.army.mil;https://www.uio.no;https://www.vt.edu;https://www.udc.edu", "aff_unique_abbr": "UT Dallas;ARL;UiO;VT;UDC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Dallas;", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;Norway" }, { "id": "A81iom2Y41", "title": "Be Your Own Neighborhood: Detecting Adversarial Example by the Neighborhood Relations Built on Self-Supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep Neural Networks (DNNs) have achieved excellent performance in various fields. However, DNNs\u2019 vulnerability to Adversarial Examples (AE) hinders their deployments to safety-critical applications. This paper presents a novel AE detection framework, named BEYOND, for trustworthy predictions. BEYOND performs the detection by distinguishing the AE\u2019s abnormal relation with its augmented versions, i.e. neighbors, from two prospects: representation similarity and label consistency. An off-the-shelf Self-Supervised Learning (SSL) model is used to extract the representation and predict the label for its highly informative representation capacity compared to supervised learning models. For clean samples, their representations and predictions are closely consistent with their neighbors, whereas those of AEs differ greatly. Furthermore, we explain this observation and show that by leveraging this discrepancy BEYOND can effectively detect AEs. We develop a rigorous justification for the effectiveness of BEYOND. Furthermore, as a plug-and-play model, BEYOND can easily cooperate with the Adversarial Trained Classifier (ATC), achieving the state-of-the-art (SOTA) robustness accuracy. Experimental results show that BEYOND outperforms baselines by a large margin, especially under adaptive attacks. Empowered by the robust relation net built on SSL, we found that BEYOND outperforms baselines in terms of both detection ability and speed. Our code will be publicly available.", "keywords": "Adversarial Examples;Self-supervised Learning;Adversarial Examples Detection", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/da24b4d9fe6bee964e5657232a67c1f42368c564.zip", "author": "Zhiyuan He;Yijun YANG;Pin-Yu Chen;Qiang Xu;Tsung-Yi Ho", "authorids": "~Zhiyuan_He2;~Yijun_YANG2;~Pin-Yu_Chen1;~Qiang_Xu1;~Tsung-Yi_Ho2", "gender": ";F;M;M;M", "homepage": ";https://yangyijune.github.io/;http://www.pinyuchen.com;https://github.com/cure-lab;https://www.cse.cuhk.edu.hk/people/faculty/tsung-yi-ho/", "dblp": ";;39/8969;43/1230-1;63/4181.html", "google_scholar": ";GZDKIUMAAAAJ;jxwlCUUAAAAJ;https://scholar.google.com.tw/citations?user=eSiKPqUAAAAJ;TRDUYkAAAAAJ", "orcid": ";0000-0002-4496-3154;0000-0003-1039-8369;;0000-0001-7348-5625", "linkedin": ";%E4%B8%80%E5%90%9B-%E6%9D%A8-82aa60133/;pin-yu-chen-940062a2;;", "or_profile": "~Zhiyuan_He2;~Yijun_YANG2;~Pin-Yu_Chen1;~Qiang_Xu1;~Tsung-Yi_Ho2", "aff": ";The Chinese University of Hong Kong;International Business Machines;The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": ";cuhk.edu.hk;ibm.com;cuhk.edu.hk;cse.cuhk.edu.hk", "position": ";PhD student;Principal Researcher;Full Professor;Full Professor", "bibtex": "@misc{\nhe2024be,\ntitle={Be Your Own Neighborhood: Detecting Adversarial Example by the Neighborhood Relations Built on Self-Supervised Learning},\nauthor={Zhiyuan He and Yijun YANG and Pin-Yu Chen and Qiang Xu and Tsung-Yi Ho},\nyear={2024},\nurl={https://openreview.net/forum?id=A81iom2Y41}\n}", "github": "", "project": "", "reviewers": "qcuA;cka9;SDmS;J3f8", "site": "https://openreview.net/forum?id=A81iom2Y41", "pdf_size": 1357156, "rating": "5;6;6;8", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "3;2;2;3", "wc_summary": "78;22;228;137", "wc_strengths": "117;28;30;171", "wc_weaknesses": "92;166;42;366", "wc_questions": "16;28;72;44", "wc_review": "303;244;372;718", "wc_reply_reviewers": "546;28;0;130", "wc_reply_authors": "1251;505;153;600", "reply_reviewers": "2;1;0;2", "reply_authors": "3;2;1;4", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 116.25, 76.26393315322781 ], "wc_strengths_avg": [ 86.5, 60.59084089200281 ], "wc_weaknesses_avg": [ 166.5, 123.33997729852231 ], "wc_questions_avg": [ 40.0, 20.97617696340303 ], "wc_review_avg": [ 409.25, 183.92304776726598 ], "wc_reply_reviewers_avg": [ 176.0, 219.02967835432713 ], "wc_reply_authors_avg": [ 627.25, 396.7570887835528 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14773029716196574449&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Chinese University of Hong Kong;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.ibm.com", "aff_unique_abbr": "CUHK;IBM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "A8Sqe4RZqF", "title": "RoBERT: Low-Cost Bi-Directional Sequence Model for Flexible Robot Behavior Control", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Requirement of human involvement for data collection or system design has always been a major challenge for building robot control policy. In this paper, we present $\\textbf{Ro}$bot-$\\textbf{BERT}$ (RoBERT), a method to build\n general robot control policy for complex behaviors with $\\textit{least}$ human effort. Starting from unsupervisedly-collected dataset, RoBERT has no requirements of human labels, high-quality\n behavior dataset or accurate information of system model, in contrast to most\n other methods for building general robot agent. RoBERT is further pre-trained via $\\textit{Masked Action-Inverse-Inference}$ (MAII), a method inspired by\n $\\textit{Masked Language Modeling}$ (MLM) in BERT-like language models and has potential to enable $\\textit{zero-shot}$, $\\textit{multi-task}$, $\\textit{keyframe-based}$ robot control with little\n architectural change and user-friendly interface. In our empirical study, RoBERT\n is successfully applied on various types of robots in simulated environment and could generate stable and flexible behaviors to fulfill complex commands.", "keywords": "Imitation Learning;Sequence Model;Transformer;Robotics", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/9ccc8ef515966dbe2fbe240ee2517223a0eb29f5.zip", "author": "Diyuan Shi;Shangke Lyu;Donglin Wang", "authorids": "~Diyuan_Shi1;~Shangke_Lyu1;~Donglin_Wang1", "gender": ";M;M", "homepage": ";;https://milab.westlake.edu.cn/", "dblp": ";;", "google_scholar": ";;https://scholar.google.ca/citations?user=-fo6wdwAAAAJ", "orcid": ";0000-0002-8302-6630;0000-0002-8188-3735", "linkedin": ";;", "or_profile": "~Diyuan_Shi1;~Shangke_Lyu1;~Donglin_Wang1", "aff": ";Westlake University;Westlake University", "aff_domain": ";westlake.edu;westlake.edu.cn", "position": ";Researcher;Associate Professor", "bibtex": "@misc{\nshi2024robert,\ntitle={Ro{BERT}: Low-Cost Bi-Directional Sequence Model for Flexible Robot Behavior Control},\nauthor={Diyuan Shi and Shangke Lyu and Donglin Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=A8Sqe4RZqF}\n}", "github": "", "project": "", "reviewers": "piq4;xJAs;oyMc", "site": "https://openreview.net/forum?id=A8Sqe4RZqF", "pdf_size": 14323779, "rating": "3;3;3", "confidence": "4;4;4", "soundness": "2;2;2", "contribution": "2;1;2", "presentation": "1;2;1", "wc_summary": "86;61;65", "wc_strengths": "21;28;47", "wc_weaknesses": "215;96;269", "wc_questions": "98;46;302", "wc_review": "420;231;683", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 70.66666666666667, 10.96458946893235 ], "wc_strengths_avg": [ 32.0, 10.98483803552272 ], "wc_weaknesses_avg": [ 193.33333333333334, 72.26955713776644 ], "wc_questions_avg": [ 148.66666666666666, 110.48177325795518 ], "wc_review_avg": [ 444.6666666666667, 185.35071860904367 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8A3muF_3-14J:scholar.google.com/&scioq=RoBERT:+Low-Cost+Bi-Directional+Sequence+Model+for+Flexible+Robot+Behavior+Control&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Westlake University", "aff_unique_dep": "", "aff_unique_url": "https://www.westlake.edu.cn", "aff_unique_abbr": "WU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "A8et2yjbly", "title": "Cross-Modality Masked Pre-training for Visible-Infrared Person Re-identification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Visible-Infrared person re-identification is a challenging yet important task in the field of intelligent surveillance. Most existing approaches focus on designing powerful deep networks to learn modality-shared representations, while little attention has been paid to using pre-training methods, although they can improve the performance of cross-modality tasks stably. This paper proposes a cross-modality masked pre-training (CMMP) method for visible-infrared person re-identification. Specifically, we generate color-irrelevant images using random channel exchangeable augmentation to minimize the difference between modalities at first. In the pre-training process, the visible together with the generated image, and the infrared image are masked by sharing the same random mask. Considering the misalignment of visible and infrared images in the datasets, we then reconstruct the masked areas only of the visible and the generated images using a lightweight decoder, which makes the pre-training process more efficient. Extensive experiments on two visible-infrared person re-identification datasets verify the effectiveness of the proposed method. CMMP outperforms the baseline method by +1.87\\% and +1.24\\% mAP on SYSU-MM01 and RegDB, respectively.", "keywords": "Person Re-identification;Cross-modality;Pre-training;Self-supervised Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Haoyan Ma;Xiang Li;Xia Yuan;Jie Li;Chunxia Zhao", "authorids": "~Haoyan_Ma1;~Xiang_Li20;~Xia_Yuan1;~Jie_Li8;~Chunxia_Zhao1", "gender": "M;M;M;;", "homepage": ";http://implus.github.io/;;;", "dblp": ";40/1491-41;69/2223;;78/5979", "google_scholar": "mcx4JCsAAAAJ;oamjJdYAAAAJ;;;", "orcid": ";;0000-0002-7271-0058;;", "linkedin": ";;;;", "or_profile": "~Haoyan_Ma1;~Xiang_Li20;~Xia_Yuan1;~Jie_Li8;~Chunxia_Zhao1", "aff": ";Nankai University;Nanjing University of Science and Technology;;Nanjing University of Science and Technology", "aff_domain": ";nankai.edu.cn;njust.edu.cn;;njust.edu.cn", "position": ";Associate Professor;Associate Professor;;Full Professor", "bibtex": "@misc{\nma2024crossmodality,\ntitle={Cross-Modality Masked Pre-training for Visible-Infrared Person Re-identification},\nauthor={Haoyan Ma and Xiang Li and Xia Yuan and Jie Li and Chunxia Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=A8et2yjbly}\n}", "github": "", "project": "", "reviewers": "JDzU;HM7x;t1sC", "site": "https://openreview.net/forum?id=A8et2yjbly", "pdf_size": 570733, "rating": "1;3;5", "confidence": "4;5;5", "soundness": "1;2;2", "contribution": "1;1;3", "presentation": "1;3;2", "wc_summary": "20;48;52", "wc_strengths": "1;20;35", "wc_weaknesses": "1;301;17", "wc_questions": "1;237;93", "wc_review": "23;606;197", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.9428090415820634 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 40.0, 14.236104336041748 ], "wc_strengths_avg": [ 18.666666666666668, 13.912424503139471 ], "wc_weaknesses_avg": [ 106.33333333333333, 137.80501522884506 ], "wc_questions_avg": [ 110.33333333333333, 97.12306054119634 ], "wc_review_avg": [ 275.3333333333333, 244.36902877046882 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kTMjHNgnHi0J:scholar.google.com/&scioq=Cross-Modality+Masked+Pre-training+for+Visible-Infrared+Person+Re-identification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Nankai University;Nanjing University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;http://www.nust.edu.cn/", "aff_unique_abbr": "NKU;NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "A8xmyDIZhn", "title": "FedDRO: Federated Compositional Optimization for Distributionally Robust Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recently, compositional optimization (CO) has gained popularity because of its applications in distributionally robust optimization (DRO) and many other machine learning problems. Large-scale and distributed availability of data demands the development of efficient federated learning (FL) algorithms for solving CO problems. Developing FL algorithms for CO is particularly challenging because of the compositional nature of the objective. Moreover, current state-of-the-art methods to solve such problems rely on large batch gradients (depending on the solution accuracy) not feasible for most practical settings. To address these challenges, in this work, we propose efficient FedAvg-type algorithms for solving non-convex CO in the FL setting. We first establish that vanilla FedAvg is not suitable to solve distributed CO problems because of the data heterogeneity in the compositional objective at each client which leads to the amplification of bias in the local compositional gradient estimates. To this end, we propose a novel Distributed-DRO (D-DRO)~framework that utilizes the DRO problem structure to design a communication strategy that allows FedAvg to control the bias in the estimation of the compositional gradient. A key novelty of our work is to develop solution accuracy-independent algorithms that do not require large batch gradients (and function evaluations) for solving federated CO problems. We establish $\\mathcal{O}(\\epsilon^{-2})$ and \n sample and $\\mathcal{O}(\\epsilon^{-3/2})$ communication complexity in the FL setting while achieving linear speedup with the number of clients. We corroborate our theoretical findings with empirical studies on large-scale DRO problems with multiple real datasets.", "keywords": "Compositional Optimization;Federated Learning;Distributionally robust Learning", "primary_area": "optimization", "supplementary_material": "/attachment/41c2707ac0a77343414ce38b49a363ef560d0584.pdf", "author": "Prashant Khanduri;Chengyin Li;RAFI IBN SULTAN;Yao Qiang;Joerg Kliewer;Dongxiao Zhu", "authorids": "~Prashant_Khanduri1;~Chengyin_Li1;~RAFI_IBN_SULTAN1;~Yao_Qiang1;~Joerg_Kliewer1;~Dongxiao_Zhu1", "gender": "M;M;M;M;;M", "homepage": "https://sites.google.com/view/khanduri-prashant/home?authuser=0;https://chengyinlee.github.io/;https://rafiibnsultan.github.io/;https://qiangyao1988.github.io/;https://web.njit.edu/~jkliewer/wp/;https://dongxiaozhu.github.io", "dblp": "158/4888;262/6036;245/7089;261/3623;39/4721;15/6233", "google_scholar": ";GeL7DtsAAAAJ;https://scholar.google.com/citations?hl=en;8ADcg38AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-2450-9760;0009-0002-5814-0404;0000-0003-2995-3385;;", "linkedin": "prashant-khanduri-0497894b/;chengyin-li-a4262862/;rafi-ibn-sultan;yaoqiang/;;dongxiao-zhu-5796754/", "or_profile": "~Prashant_Khanduri1;~Chengyin_Li1;~RAFI_IBN_SULTAN1;~Yao_Qiang1;~Joerg_Kliewer1;~Dongxiao_Zhu1", "aff": "Wayne State University;Wayne State University;Wayne State University;Wayne State University;New Jersey Institute of Technology;Wayne State University", "aff_domain": "wayne.edu;wayne.edu;wayne.edu;wayne.edu;njit.edu;wayne.edu", "position": "Assistant Professor;PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nkhanduri2024feddro,\ntitle={Fed{DRO}: Federated Compositional Optimization for Distributionally Robust Learning},\nauthor={Prashant Khanduri and Chengyin Li and RAFI IBN SULTAN and Yao Qiang and Joerg Kliewer and Dongxiao Zhu},\nyear={2024},\nurl={https://openreview.net/forum?id=A8xmyDIZhn}\n}", "github": "", "project": "", "reviewers": "StaW;J3Ha;EdtR;Sjhd", "site": "https://openreview.net/forum?id=A8xmyDIZhn", "pdf_size": 2042583, "rating": "3;5;5;6", "confidence": "5;4;4;3", "soundness": "2;3;2;3", "contribution": "2;2;2;2", "presentation": "1;3;3;3", "wc_summary": "62;41;41;46", "wc_strengths": "11;36;49;41", "wc_weaknesses": "28;143;3;22", "wc_questions": "228;5;353;1", "wc_review": "329;225;446;110", "wc_reply_reviewers": "28;0;399;14", "wc_reply_authors": "1789;528;2380;90", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;3;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 47.5, 8.616843969807043 ], "wc_strengths_avg": [ 34.25, 14.201672436723781 ], "wc_weaknesses_avg": [ 49.0, 55.04997729336498 ], "wc_questions_avg": [ 146.75, 150.3967669200372 ], "wc_review_avg": [ 277.5, 124.35533764177555 ], "wc_reply_reviewers_avg": [ 110.25, 167.00355535137567 ], "wc_reply_authors_avg": [ 1196.75, 925.0625316701569 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9733285267845754, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:N6j1u_SC_o4J:scholar.google.com/&scioq=FedDRO:+Federated+Compositional+Optimization+for+Distributionally+Robust+Learning&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Wayne State University;New Jersey Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://wayne.edu;https://www.njit.edu", "aff_unique_abbr": "WSU;NJIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AAxIs3D2ZZ", "title": "RLAIF: Scaling Reinforcement Learning from Human Feedback with AI Feedback", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reinforcement learning from human feedback (RLHF) is an effective technique for aligning large language models (LLMs) to human preferences, but gathering high-quality human preference labels is a critical bottleneck. RL from AI Feedback (RLAIF) is an alternative solution that generates preferences labels using an off-the-shelf LLM in lieu of human annotators. We compare RLAIF and RLHF, and we find that RLAIF achieves improvements on par with RLHF, with both RL policies outperforming the baseline supervised fine-tuning policy by approximately 70\\% for summarization and 60\\% for helpful dialogue generation, as rated by human evaluators. Furthermore, when asked to rate RLAIF against RLHF in a head-to-head comparison, both are equally preferred. These results suggest that RLAIF can achieve human-level performance, offering a potential solution to the scalability limitations of RLHF.", "keywords": "reinforcement learning;rlhf;rlaif;nlp;large language models;llm;nlp;machine learning", "primary_area": "generative models", "supplementary_material": "", "author": "Harrison Lee;Samrat Phatale;Hassan Mansoor;Kellie Ren Lu;Thomas Mesnard;Johan Ferret;Colton Bishop;Ethan Hall;Victor Carbune;Abhinav Rastogi", "authorids": "~Harrison_Lee1;~Samrat_Phatale1;~Hassan_Mansoor1;~Kellie_Ren_Lu1;~Thomas_Mesnard2;~Johan_Ferret1;~Colton_Bishop1;~Ethan_Hall1;~Victor_Carbune1;~Abhinav_Rastogi2", "gender": "M;M;M;;;M;M;M;M;M", "homepage": ";;https://www.linkedin.com/in/hassan-mansoor-6938364/;;https://thomasmesnard.github.io/;https://ferretj.github.io;https://bishopcolton.com;;https://ai.google/research/people/104909;", "dblp": "249/6387-1;192/7541.html;;;;;;;199/7020;", "google_scholar": ";gTK5jNYAAAAJ;;qZ3I8gQAAAAJ;;uyUnqjMAAAAJ;;;https://scholar.google.ch/citations?user=35djUQYAAAAJ;uDrgdtwAAAAJ", "orcid": ";;;;;;;;;", "linkedin": "harrisonl;;;;;;;ethan-hall-397391b0/;vcarbune/;abhinav-rastogi-0a466934/", "or_profile": "~Harrison_Lee1;~Samrat_Phatale1;~Hassan_Mansoor1;~Kellie_Ren_Lu1;~Thomas_Mesnard2;~Johan_Ferret1;~Colton_Bishop1;~Ethan_Hall1;~Victor_Carbune1;~Abhinav_Rastogi2", "aff": "Google;Google DeepMind;Google;;Google DeepMind;Google;;Google;Google;Google", "aff_domain": "google.com;deepmind.com;google.com;;google.com;google.com;;google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher;;PhD student;Researcher;;Software Engineer;Researcher;Research Scientist", "bibtex": "@misc{\nlee2024rlaif,\ntitle={{RLAIF}: Scaling Reinforcement Learning from Human Feedback with {AI} Feedback},\nauthor={Harrison Lee and Samrat Phatale and Hassan Mansoor and Kellie Ren Lu and Thomas Mesnard and Johan Ferret and Colton Bishop and Ethan Hall and Victor Carbune and Abhinav Rastogi},\nyear={2024},\nurl={https://openreview.net/forum?id=AAxIs3D2ZZ}\n}", "github": "", "project": "", "reviewers": "RhkW;BDUS;4rk2;LzbJ", "site": "https://openreview.net/forum?id=AAxIs3D2ZZ", "pdf_size": 759682, "rating": "3;6;6;8", "confidence": "4;4;4;4", "soundness": "2;3;2;4", "contribution": "2;4;3;4", "presentation": "3;3;3;4", "wc_summary": "90;236;62;27", "wc_strengths": "83;133;40;41", "wc_weaknesses": "318;280;300;49", "wc_questions": "1;97;224;17", "wc_review": "492;746;626;134", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1064;1010;1052;490", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 103.75, 79.54990571961729 ], "wc_strengths_avg": [ 74.25, 38.10101704679286 ], "wc_weaknesses_avg": [ 236.75, 109.22768650850388 ], "wc_questions_avg": [ 84.75, 88.23937613106747 ], "wc_review_avg": [ 499.5, 229.35289403013863 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 904.0, 239.86246058939693 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 540, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7995210232742152683&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "ABIcBDLBVG", "title": "Fill in the Blank: Exploring and Enhancing LLM Capabilities for Backward Reasoning in Math Word Problems", "track": "main", "status": "Reject", "tldr": "", "abstract": "While forward reasoning (i.e., find the answer given the question) has been explored extensively in the recent literature, backward reasoning is relatively unexplored. We examine the backward reasoning capabilities of LLMs on Math Word Problems (MWPs): given a mathematical question and its answer, with some details omitted from the question, can LLMs effectively retrieve the missing information? \n\nIn this paper, we formally define the backward reasoning task on math word problems and modify three datasets to evaluate this task: GSM8k, SVAMP and MultiArith. Our findings show a significant drop in the accuracy of models on backward reasoning compared to forward reasoning across four SOTA LLMs (GPT4, GPT3.5, PaLM-2, and LLaMa). Utilizing the specific format of this task, we propose three novel techniques that improve performance: Rephrase reformulates the given problem into a forward reasoning problem, PAL-Tools combines the idea of Program-Aided LLMs to produce a set of equations that can be solved by an external solver, and Check your Work exploits the availability of natural verifier of high accuracy in the forward direction, interleaving solving and verification steps. Finally, realizing that each of our base methods correctly solves a different set of problems, we propose a novel Bayesian formulation for creating an ensemble over these base methods aided by a verifier to further boost the accuracy by a significant margin. Extensive experimentation demonstrates that our techniques successively improve the performance of LLMs on the backward reasoning task, with the final ensemble-based method resulting in a substantial performance gain compared to the raw LLMs with standard prompting techniques such as chain-of-thought.", "keywords": "large language models;prompting;mathematical reasoning;natural language processing", "primary_area": "generative models", "supplementary_material": "", "author": "Aniruddha Deb;Neeva Hareshbhai Oza;Sarthak Singla;Dinesh Khandelwal;Dinesh Garg;Parag Singla", "authorids": "~Aniruddha_Deb1;~Neeva_Hareshbhai_Oza1;~Sarthak_Singla1;~Dinesh_Khandelwal2;~Dinesh_Garg1;~Parag_Singla1", "gender": "Not Specified;;M;M;M;M", "homepage": "https://www.aniruddhadeb.com;;;https://research.ibm.com/people/dinesh-khandelwal;https://researcher.watson.ibm.com/researcher/view.php?person=in-garg.dinesh;http://www.cse.iitd.ac.in/~parags", "dblp": ";358/6181;;177/0164;https://dblp.uni-trier.de/pers/g/Garg:Dinesh.html;14/167", "google_scholar": ";;;Pi-SqXwAAAAJ;https://scholar.google.com.tw/citations?user=YrU_ZDkAAAAJ;https://scholar.google.co.in/citations?user=V49BsgMAAAAJ", "orcid": ";;;;;", "linkedin": ";learner4ever/;sarthak-singla/;dinesh-khandelwal-68689420/;dingarg/;", "or_profile": "~Aniruddha_Deb1;~Neeva_Hareshbhai_Oza1;~Sarthak_Singla1;~Dinesh_Khandelwal2;~Dinesh_Garg1;~Parag_Singla1", "aff": "Indian Institute of Technology, Delhi;Indian Institute of Technology, Delhi;;International Business Machines;;Indian Institute of Technology, Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;;ibm.com;;iitd.ac.in", "position": "Undergrad student;MS student;;Researcher;;Full Professor", "bibtex": "@misc{\ndeb2024fill,\ntitle={Fill in the Blank: Exploring and Enhancing {LLM} Capabilities for Backward Reasoning in Math Word Problems},\nauthor={Aniruddha Deb and Neeva Hareshbhai Oza and Sarthak Singla and Dinesh Khandelwal and Dinesh Garg and Parag Singla},\nyear={2024},\nurl={https://openreview.net/forum?id=ABIcBDLBVG}\n}", "github": "", "project": "", "reviewers": "AYQE;9odL;4wdv;xzVN", "site": "https://openreview.net/forum?id=ABIcBDLBVG", "pdf_size": 421256, "rating": "3;5;6;8", "confidence": "5;3;4;3", "soundness": "2;2;2;3", "contribution": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "125;79;63;41", "wc_strengths": "66;41;39;30", "wc_weaknesses": "519;210;116;49", "wc_questions": "187;13;47;49", "wc_review": "897;343;265;169", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1136;740;606;306", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 77.0, 30.822070014844883 ], "wc_strengths_avg": [ 44.0, 13.360389215887388 ], "wc_weaknesses_avg": [ 223.5, 179.93679445849867 ], "wc_questions_avg": [ 74.0, 66.79071791798619 ], "wc_review_avg": [ 418.5, 283.0525569571842 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 697.0, 298.21636440678435 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7526178090063818, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15364179068116096669&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Indian Institute of Technology Delhi;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitdelhi.ac.in;https://www.ibm.com", "aff_unique_abbr": "IIT Delhi;IBM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "India;United States" }, { "title": "Manipulating dropout reveals an optimal balance of efficiency and robustness in biological and machine visual systems", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19272", "id": "ADDCErFzev", "author_site": "Jacob Prince, Gabriel Fajardo, George Alvarez, Talia Konkle", "tldr": "", "abstract": "According to the efficient coding hypothesis, neural populations encode information optimally when representations are high-dimensional and uncorrelated. However, such codes may carry a cost in terms of generalization and robustness. Past empirical studies of early visual cortex (V1) in rodents have suggested that this tradeoff indeed constrains sensory representations. However, it remains unclear whether these insights generalize across the hierarchy of the human visual system, and particularly to object representations in high-level occipitotemporal cortex (OTC). To gain new empirical clarity, here we develop a family of object recognition models with parametrically varying dropout proportion $p$, which induces systematically varying dimensionality of internal responses (while controlling all other inductive biases). We find that increasing dropout produces an increasingly smooth, low-dimensional representational space. Optimal robustness to lesioning is observed at around 70% dropout, after which both accuracy and robustness decline. Representational comparison to large-scale 7T fMRI data from occipitotemporal cortex in the Natural Scenes Dataset reveals that this optimal degree of dropout is also associated with maximal emergent neural predictivity. Finally, using new techniques for achieving denoised estimates of the eigenspectrum of human fMRI responses, we compare the rate of eigenspectrum decay between model and brain feature spaces. We observe that the match between model and brain representations is associated with a common balance between efficiency and robustness in the representational space. These results suggest that varying dropout may reveal an optimal point of balance between the efficiency of high-dimensional codes and the robustness of low dimensional codes in hierarchical vision systems.", "keywords": "Efficient coding;object representation;dropout;robustness;human fMRI;occipitotemporal cortex;cognitive neuroscience;distributed coding", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Jacob S. Prince;Gabriel Fajardo;George A. Alvarez;Talia Konkle", "authorids": "~Jacob_S._Prince1;fajardgb@bc.edu;~George_A._Alvarez2;~Talia_Konkle1", "gender": "M;;M;F", "homepage": "https://jacob-prince.github.io/;;https://visionlab.harvard.edu/george/;https://konklab.fas.harvard.edu/", "dblp": ";;;", "google_scholar": "pd6wUgIAAAAJ;;qU8dld4AAAAJ;QxV9vroAAAAJ", "orcid": "0000-0001-6169-9503;;;0000-0003-1738-4744", "linkedin": "jacobprince/;;;", "or_profile": "~Jacob_S._Prince1;fajardgb@bc.edu;~George_A._Alvarez2;~Talia_Konkle1", "aff": "Harvard University;;Harvard University;Harvard University", "aff_domain": "harvard.edu;;harvard.edu;harvard.edu", "position": "PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nprince2024manipulating,\ntitle={Manipulating dropout reveals an optimal balance of efficiency and robustness in biological and machine visual systems},\nauthor={Jacob S. Prince and Gabriel Fajardo and George A. Alvarez and Talia Konkle},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=ADDCErFzev}\n}", "github": "", "project": "", "reviewers": "YU1u;h1kR;yavH;wjr3", "pdf_size": 6385398, "rating": "6;6;6;6", "confidence": "4;4;3;3", "soundness": "3;3;4;3", "contribution": "3;3;3;3", "presentation": "4;4;3;3", "wc_summary": "110;107;111;170", "wc_strengths": "39;117;70;83", "wc_weaknesses": "4;28;185;172", "wc_questions": "170;29;102;108", "wc_review": "323;281;468;533", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "673;182;546;496", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 124.5, 26.31064423384574 ], "wc_strengths_avg": [ 77.25, 27.9676152004421 ], "wc_weaknesses_avg": [ 97.25, 81.8210700247803 ], "wc_questions_avg": [ 102.25, 49.97186708539115 ], "wc_review_avg": [ 401.25, 102.95235548543802 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 474.25, 180.64658175564796 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8966726572268208078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=ADDCErFzev", "pdf": "https://openreview.net/pdf?id=ADDCErFzev", "email": "harvard.edu;;harvard.edu;harvard.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "LoTa-Bench: Benchmarking Language-oriented Task Planners for Embodied Agents", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19271", "id": "ADSxCpCu9s", "author_site": "Jae-Woo Choi, Youngwoo Yoon, Hyobin Ong, Jaehong Kim, Minsu Jang", "tldr": "", "abstract": "Large language models (LLMs) have recently received considerable attention as alternative solutions for task planning. However, comparing the performance of language-oriented task planners becomes difficult, and there exists a dearth of detailed exploration regarding the effects of various factors such as pre-trained model selection and prompt construction. To address this, we propose a benchmark system for automatically quantifying performance of task planning for home-service embodied agents. Task planners are tested on two pairs of datasets and simulators: 1) ALFRED and AI2-THOR, 2) an extension of Watch-And-Help and VirtualHome. Using the proposed benchmark system, we perform extensive experiments with LLMs and prompts, and explore several enhancements of the baseline planner. We expect that the proposed benchmark tool would accelerate the development of language-oriented task planners.", "keywords": "task planning;language models;benchmarking;embodied agents;home robots", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Jae-Woo Choi;Youngwoo Yoon;Hyobin Ong;Jaehong Kim;Minsu Jang", "authorids": "~Jae-Woo_Choi1;~Youngwoo_Yoon1;~Hyobin_Ong1;~Jaehong_Kim3;~Minsu_Jang1", "gender": "M;M;F;M;M", "homepage": ";https://sites.google.com/view/youngwoo-yoon/;https://ohnghb99.github.io/hyobin.github.io/;;https://zebehn.github.io", "dblp": ";82/5691;;75/3644-1;64/4831", "google_scholar": "vE3PElsAAAAJ;XPL1OiAAAAAJ;_7yFVacAAAAJ;https://scholar.google.si/citations?user=PfnxK1kAAAAJ;ggkuHCcAAAAJ", "orcid": ";;0009-0000-8479-0510;0000-0002-6840-5026;0000-0002-7166-0300", "linkedin": ";;;;minsu-jang-066b1bb/", "or_profile": "~Jae-Woo_Choi1;~Youngwoo_Yoon1;~Hyobin_Ong1;~Jaehong_Kim3;~Minsu_Jang1", "aff": "Electronics and Telecommunications Research Institute;Electronics and Telecommunications Research Institute;University of Science and Technology;ETRI;Electronics and Telecommunications Research Institute", "aff_domain": "etri.re.kr;etri.re.kr;ust.ac.kr;etri.re.kr;etri.re.kr", "position": "Researcher;Principal Researcher;MS student;Principle Researcher/Director;Principal Researcher", "bibtex": "@inproceedings{\nchoi2024lotabench,\ntitle={LoTa-Bench: Benchmarking Language-oriented Task Planners for Embodied Agents},\nauthor={Jae-Woo Choi and Youngwoo Yoon and Hyobin Ong and Jaehong Kim and Minsu Jang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=ADSxCpCu9s}\n}", "github": "", "project": "", "reviewers": "mB86;DjjY;JeGM;JjuX", "pdf_size": 12665485, "rating": "6;6;6;6", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "contribution": "2;2;3;1", "presentation": "3;3;2;3", "wc_summary": "86;106;77;49", "wc_strengths": "76;79;137;45", "wc_weaknesses": "209;400;229;81", "wc_questions": "3;8;148;11", "wc_review": "374;593;591;186", "wc_reply_reviewers": "24;0;28;59", "wc_reply_authors": "845;1116;1748;889", "reply_reviewers": "1;0;1;1", "reply_authors": "2;3;4;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 20.5 ], "wc_strengths_avg": [ 84.25, 33.23683950077083 ], "wc_weaknesses_avg": [ 229.75, 113.51514216173982 ], "wc_questions_avg": [ 42.5, 60.97745485013293 ], "wc_review_avg": [ 436.0, 169.57151883497417 ], "wc_reply_reviewers_avg": [ 27.75, 20.980645843252777 ], "wc_reply_authors_avg": [ 1149.5, 360.52219071785305 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5315466281824827335&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=ADSxCpCu9s", "pdf": "https://openreview.net/pdf?id=ADSxCpCu9s", "email": "etri.re.kr;etri.re.kr;ust.ac.kr;etri.re.kr;etri.re.kr", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Electronics and Telecommunications Research Institute;University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.etri.re.kr;", "aff_unique_abbr": "ETRI;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea;" }, { "id": "AEi2wyAMyb", "title": "Bi-Level Optimization for Pseudo-Labeling Based Semi-Supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Semi-supervised learning (SSL) is a fundamental task in machine learning, empowering models to extract valuable insights from datasets with limited labeled samples and a large amount of unlabeled data. \nAlthough pseudo-labeling is a widely used approach for SSL that generates pseudo-labels for unlabeled data and leverages them as ground truth labels for training, traditional pseudo-labeling techniques often suffer from the problem of error accumulation, leading to a significant decrease in the quality of pseudo-labels and hence\n\tthe overall model performance. \n\tIn this paper, we propose a novel Bi-level Optimization method for Pseudo-label Learning (BOPL) \n\tto boost semi-supervised training. \nIt treats pseudo-labels as latent variables, and optimizes the model parameters and pseudo-labels\njointly within a bi-level optimization framework. \nBy enabling direct optimization over the pseudo-labels towards maximizing the prediction model performance,\nthe method is expected to produce high-quality pseudo-labels that are much less susceptible to error accumulation. \nTo evaluate the effectiveness of the proposed approach, \nwe conduct extensive experiments on \nmultiple SSL benchmarks. \nThe experimental results show the proposed BOPL outperforms the state-of-the-art SSL techniques.", "keywords": "Semi-Supervised Learning;Bi-level Optimization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Marzi Heidari;Yuhong Guo", "authorids": "~Marzi_Heidari1;~Yuhong_Guo1", "gender": "F;", "homepage": ";", "dblp": "270/0305;", "google_scholar": "https://scholar.google.ca/citations?user=OEWPekoAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Marzi_Heidari1;~Yuhong_Guo1", "aff": "Carleton University;", "aff_domain": "carleton.ca;", "position": "PhD student;", "bibtex": "@misc{\nheidari2024bilevel,\ntitle={Bi-Level Optimization for Pseudo-Labeling Based Semi-Supervised Learning},\nauthor={Marzi Heidari and Yuhong Guo},\nyear={2024},\nurl={https://openreview.net/forum?id=AEi2wyAMyb}\n}", "github": "", "project": "", "reviewers": "Yzuc;hAat;akKq", "site": "https://openreview.net/forum?id=AEi2wyAMyb", "pdf_size": 579116, "rating": "5;5;6", "confidence": "4;3;3", "soundness": "2;3;3", "contribution": "2;2;3", "presentation": "3;2;3", "wc_summary": "76;113;46", "wc_strengths": "102;58;55", "wc_weaknesses": "484;152;183", "wc_questions": "7;56;26", "wc_review": "669;379;310", "wc_reply_reviewers": "29;110;137", "wc_reply_authors": "539;839;611", "reply_reviewers": "1;1;3", "reply_authors": "2;3;4", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 78.33333333333333, 27.402351886086144 ], "wc_strengths_avg": [ 71.66666666666667, 21.483844059096022 ], "wc_weaknesses_avg": [ 273.0, 149.7353220408153 ], "wc_questions_avg": [ 29.666666666666668, 20.17148702720969 ], "wc_review_avg": [ 452.6666666666667, 155.5427772529331 ], "wc_reply_reviewers_avg": [ 92.0, 45.89117562233506 ], "wc_reply_authors_avg": [ 663.0, 127.87493890516625 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:29BwJvPaTNEJ:scholar.google.com/&scioq=Bi-Level+Optimization+for+Pseudo-Labeling+Based+Semi-Supervised+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Carleton University", "aff_unique_dep": "", "aff_unique_url": "https://carleton.ca", "aff_unique_abbr": "Carleton", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "SpeechTokenizer: Unified Speech Tokenizer for Speech Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19270", "id": "AF9Q8Vip84", "author_site": "Xin Zhang, Dong Zhang, Shimin Li, Yaqian Zhou, Xipeng Qiu", "tldr": "", "abstract": "Current speech large language models build upon discrete speech representations,\nwhich can be categorized into semantic tokens and acoustic tokens. However,\nexisting speech tokens are not specifically designed for speech language modeling. To assess the suitability of speech tokens for building speech language\nmodels, we established the first benchmark, SLMTokBench. Our results indicate\nthat neither semantic nor acoustic tokens are ideal for this purpose. Therefore, we\npropose SpeechTokenizer, a unified speech tokenizer for speech large language\nmodels. SpeechTokenizer adopts the Encoder-Decoder architecture with residual\nvector quantization (RVQ). Unifying semantic and acoustic tokens, SpeechTokenizer disentangles different aspects of speech information hierarchically across\ndifferent RVQ layers. Furthermore, We construct a Unified Speech Language\nModel (USLM) leveraging SpeechTokenizer. Experiments show that SpeechTokenizer performs comparably to EnCodec in speech reconstruction and demonstrates\nstrong performance on the SLMTokBench benchmark. Also, USLM outperforms\nVALL-E in zero-shot Text-to-Speech tasks. Code and models are available at\nhttps://github.com/ZhangXInFD/SpeechTokenizer/.", "keywords": "speech;audio;multi-modal;large language model", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Xin Zhang;Dong Zhang;Shimin Li;Yaqian Zhou;Xipeng Qiu", "authorids": "~Xin_Zhang36;~Dong_Zhang9;~Shimin_Li1;~Yaqian_Zhou1;~Xipeng_Qiu1", "gender": "M;M;M;F;M", "homepage": "https://github.com/ZhangXInFD;;;;https://xpqiu.github.io/", "dblp": ";;;34/389-1.html;69/1395", "google_scholar": "https://scholar.google.com/citations?hl=en;ScVbeu0AAAAJ;0xxkGjMAAAAJ;;Pq4Yp_kAAAAJ", "orcid": ";;;;0000-0001-7163-5247", "linkedin": ";;;;", "or_profile": "~Xin_Zhang36;~Dong_Zhang9;~Shimin_Li1;~Yaqian_Zhou1;~Xipeng_Qiu1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;MS student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024speechtokenizer,\ntitle={SpeechTokenizer: Unified Speech Tokenizer for Speech Language Models},\nauthor={Xin Zhang and Dong Zhang and Shimin Li and Yaqian Zhou and Xipeng Qiu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AF9Q8Vip84}\n}", "github": "", "project": "", "reviewers": "pbGo;g1yJ;usJL;tD35", "pdf_size": 1257544, "rating": "3;6;6;8", "confidence": "4;4;3;4", "soundness": "3;2;2;3", "contribution": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "149;72;47;69", "wc_strengths": "98;69;33;115", "wc_weaknesses": "210;257;100;61", "wc_questions": "29;31;5;44", "wc_review": "486;429;185;289", "wc_reply_reviewers": "0;107;0;0", "wc_reply_authors": "666;1474;513;234", "reply_reviewers": "0;2;0;0", "reply_authors": "1;4;1;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 38.6094224251024 ], "wc_strengths_avg": [ 78.75, 31.115711465431737 ], "wc_weaknesses_avg": [ 157.0, 79.48899294870957 ], "wc_questions_avg": [ 27.25, 14.077908225301087 ], "wc_review_avg": [ 347.25, 117.95417542418751 ], "wc_reply_reviewers_avg": [ 26.75, 46.332359102467464 ], "wc_reply_authors_avg": [ 721.75, 461.1032286809538 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.08084520834544431, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10604890170283513348&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=AF9Q8Vip84", "pdf": "https://openreview.net/pdf?id=AF9Q8Vip84", "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MuSc: Zero-Shot Industrial Anomaly Classification and Segmentation with Mutual Scoring of the Unlabeled Images", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19269", "id": "AHgc5SMdtd", "author_site": "Xurui Li, Ziming Huang, Feng Xue, Yu Zhou", "tldr": "", "abstract": "This paper studies zero-shot anomaly classification (AC) and segmentation (AS) in industrial vision.\nWe reveal that the abundant normal and abnormal cues implicit in unlabeled test images can be exploited for anomaly determination, which is ignored by prior methods.\nOur key observation is that for the industrial product images, the normal image patches could find a relatively large number of similar patches in other unlabeled images,\nwhile the abnormal ones only have a few similar patches.\nWe leverage such a discriminative characteristic to design a novel zero-shot AC/AS method by Mutual Scoring (MuSc) of the unlabeled images, \nwhich does not need any training or prompts.\nSpecifically, we perform Local Neighborhood Aggregation with Multiple Degrees (LNAMD) to obtain the patch features that are capable of representing anomalies in varying sizes.\nThen we propose the Mutual Scoring Mechanism (MSM) to leverage the unlabeled test images to assign the anomaly score to each other. \nFurthermore, we present an optimization approach named Re-scoring with Constrained Image-level Neighborhood (RsCIN) for image-level anomaly classification to suppress the false positives caused by noises in normal images.\nThe superior performance on the challenging MVTec AD and VisA datasets demonstrates the effectiveness of our approach. \nCompared with the state-of-the-art zero-shot approaches, \nMuSc achieves a $\\textbf{21.1}$% PRO absolute gain (from 72.7\\% to 93.8\\%) on MVTec AD, a $\\textbf{19.4}$% pixel-AP gain and a $\\textbf{14.7}$% pixel-AUROC gain on VisA.\nIn addition, our zero-shot approach outperforms most of the few-shot approaches and is comparable to some one-class methods.\nCode is available at https://github.com/xrli-U/MuSc.", "keywords": "zero-shot anomaly detection; Industrial Informatics\uff1b", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Xurui Li;Ziming Huang;Feng Xue;Yu Zhou", "authorids": "~Xurui_Li4;~Ziming_Huang2;~Feng_Xue3;~Yu_Zhou1", "gender": "M;M;M;", "homepage": "https://github.com/xrli-U;https://github.com/ZimingHuang1;https://xuefeng-cvr.github.io/;https://github.com/zhouyu-hust", "dblp": ";;;36/2728-16.html", "google_scholar": ";;66SeiQsAAAAJ;", "orcid": "0009-0007-3590-9870;;0000-0002-4101-3401;", "linkedin": ";;;", "or_profile": "~Xurui_Li4;~Ziming_Huang2;~Feng_Xue3;~Yu_Zhou1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;University of Trento;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;unitn.it;hust.edu.cn", "position": "MS student;MS student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nli2024musc,\ntitle={MuSc: Zero-Shot Industrial Anomaly Classification and Segmentation with Mutual Scoring of the Unlabeled Images},\nauthor={Xurui Li and Ziming Huang and Feng Xue and Yu Zhou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AHgc5SMdtd}\n}", "github": "", "project": "", "reviewers": "52S5;dCfU;H5pV;idjR;NLA8", "pdf_size": 15540149, "rating": "3;5;6;6;6", "confidence": "3;4;5;3;5", "soundness": "1;3;4;3;2", "contribution": "1;2;3;3;3", "presentation": "1;3;4;3;2", "wc_summary": "170;39;26;45;70", "wc_strengths": "15;5;30;79;147", "wc_weaknesses": "165;157;144;103;67", "wc_questions": "1;92;3;3;2", "wc_review": "351;293;203;230;286", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1083;2241;437;185;1416", "reply_reviewers": "0;0;0;0;0", "reply_authors": "2;4;1;1;3", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.6, 1.019803902718557 ], "contribution_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 70.0, 52.003846011617256 ], "wc_strengths_avg": [ 55.2, 52.46865731081748 ], "wc_weaknesses_avg": [ 127.2, 36.90203246435079 ], "wc_questions_avg": [ 20.2, 35.907659350060676 ], "wc_review_avg": [ 272.6, 51.77103437251375 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1072.4, 731.4384731472634 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5752237416355278, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4485039909524484938&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=AHgc5SMdtd", "pdf": "https://openreview.net/pdf?id=AHgc5SMdtd", "email": "hust.edu.cn;hust.edu.cn;unitn.it;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;University of Trento", "aff_unique_dep": ";", "aff_unique_url": "http://www.hust.edu.cn;https://www.unitn.it", "aff_unique_abbr": "HUST;UniTN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Italy" }, { "id": "AIbQ3HDDHU", "title": "Training and inference of large language models using 8-bit floating point", "track": "main", "status": "Reject", "tldr": "", "abstract": "FP8 formats are gaining popularity to boost the computational efficiency for training and inference of large deep learning models. Their main challenge is that a careful choice of scaling is needed to prevent degradation due to the reduced dynamic range compared to higher-precision formats. Although there exists ample literature about selecting such scalings for INT formats, this critical aspect has yet to be addressed for FP8. This paper presents a methodology to select the scalings for FP8 linear layers, based on dynamically updating per-tensor scales for the weights, gradients and activations. We apply this methodology to train and validate large language models of the type of GPT and Llama 2 using FP8, for model sizes ranging from 111M to 70B. To facilitate the understanding of the FP8 dynamics, our results are accompanied by plots of the per-tensor scale distribution for weights, activations and gradients during both training and inference.", "keywords": "FP8;quantisation;low-precision training;low-precision inference;post-training quantisation;large language models;hardware", "primary_area": "infrastructure, software libraries, hardware, etc.", "supplementary_material": "", "author": "Sergio P. Perez;Yan Zhang;James Briggs;Charlie Blake;Josh Levy-Kramer;Paul Balanca;Carlo Luschi;Stephen Barlow;Andrew W Fitzgibbon", "authorids": "~Sergio_P._Perez1;~Yan_Zhang32;~James_Briggs1;~Charlie_Blake1;~Josh_Levy-Kramer1;~Paul_Balanca1;~Carlo_Luschi1;~Stephen_Barlow2;~Andrew_W_Fitzgibbon1", "gender": ";;M;M;M;M;M;M;M", "homepage": ";;https://jimypbr.github.io/;https://thecharlieblake.co.uk/;;https://github.com/balancap;;https://graphcore.ai;http://awf.fitzgibbon.ie", "dblp": ";;;243/6977;;;72/10621;;f/AndrewWFitzgibbon", "google_scholar": "izqE_ooAAAAJ;;TYNnuTYAAAAJ;kvibgXMAAAAJ;NJxqHNcAAAAJ;;;;73t3lIcAAAAJ", "orcid": ";;;;;;;;", "linkedin": "sergiopp;yan-zhang-54b79071;jimypbr/;;joshlevykramer/;;carlo-luschi-1908144/;stevebarlow;andrew-fitzgibbon-952b9370", "or_profile": "~Sergio_P._Perez1;~Yan_Zhang32;~James_Briggs1;~Charlie_Blake1;~Josh_Levy-Kramer1;~Paul_Balanca1;~Carlo_Luschi1;~Stephen_Barlow2;~Andrew_W_Fitzgibbon1", "aff": ";Graphcore;;;;Graphcore;Graphcore;Graphcore Ltd;Graphcore", "aff_domain": ";graphcore.ai;;;;graphcore.ai;graphcore.ai;graphcore.ai;graphcore.ai", "position": ";Researcher;;;;Researcher;VP & Head of Research;Member of Engineering Team;Researcher", "bibtex": "@misc{\nperez2024training,\ntitle={Training and inference of large language models using 8-bit floating point},\nauthor={Sergio P. Perez and Yan Zhang and James Briggs and Charlie Blake and Josh Levy-Kramer and Paul Balanca and Carlo Luschi and Stephen Barlow and Andrew W Fitzgibbon},\nyear={2024},\nurl={https://openreview.net/forum?id=AIbQ3HDDHU}\n}", "github": "", "project": "", "reviewers": "LXwv;PWj7;ViM8", "site": "https://openreview.net/forum?id=AIbQ3HDDHU", "pdf_size": 677378, "rating": "3;5;6", "confidence": "3;4;3", "soundness": "2;3;3", "contribution": "1;2;3", "presentation": "2;3;3", "wc_summary": "27;111;41", "wc_strengths": "32;27;28", "wc_weaknesses": "275;257;40", "wc_questions": "17;47;37", "wc_review": "351;442;146", "wc_reply_reviewers": "184;50;86", "wc_reply_authors": "419;343;207", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 59.666666666666664, 36.745370078721784 ], "wc_strengths_avg": [ 29.0, 2.160246899469287 ], "wc_weaknesses_avg": [ 190.66666666666666, 106.79055305701074 ], "wc_questions_avg": [ 33.666666666666664, 12.472191289246473 ], "wc_review_avg": [ 313.0, 123.79283770342558 ], "wc_reply_reviewers_avg": [ 106.66666666666667, 56.62351298022953 ], "wc_reply_authors_avg": [ 323.0, 87.69644614616186 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12127229904750398817&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Graphcore", "aff_unique_dep": "", "aff_unique_url": "https://www.graphcore.ai", "aff_unique_abbr": "Graphcore", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "AJ7tnHhgWZ", "title": "Enhancing Vision-Language Prompt Learning through Image-Text Distribution Alignment", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large vision-language models (VLMs) such as CLIP have demonstrated impressive performance in zero-shot image classification tasks. These models usually leverage prompts to align the text and image distributions. However, existing prompting techniques have limitations in terms of interpretability or dynamic alignment of distributions. Specifically, the discrete prompt learning methods cannot effectively perform dynamic alignment of distributions, while the soft prompt learning method have very limited interpretability, rendering them challenging to comprehend and enhance. To jointly solve these issues, we leverage the interpretable descriptions to facilitate the soft prompt learning. In this paper, we introduce a novel training-free strategy to mitigate the distribution gap between plain text and image-text corpus, leveraging the power of pretrained models like GPT-3 to enhance image classification performance. Furthermore, we propose a new few-shot learning pipeline that incorporates a prompt learning and reweighting strategy to dynamically mitigate the image and text distribution gap. This method overcomes the limitations of existing prompting techniques and offers a more effective and interpretable solution for image classification tasks. Extensive experiments show the effectiveness of our method and illustrate the interpretability of our descriptions.", "keywords": "Domain adaptaion;CLIP;Prompt learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Dongliang Guo;Handong Zhao;Sungchul Kim;Ryan A. Rossi;Tong Yu;Sheng Li", "authorids": "~Dongliang_Guo1;~Handong_Zhao3;~Sungchul_Kim1;~Ryan_A._Rossi2;~Tong_Yu3;~Sheng_Li3", "gender": "M;M;;M;;M", "homepage": "https://donglgcn.github.io/;https://sites.google.com/site/subright;https://www.linkedin.com/in/tong-yu-42790744;http://sheng-li.org;https://hdzhao.github.io/;http://ryanrossi.com", "dblp": "48/7696-2.html;61/1573;32/1593-1;23/3439-1;79/8522;17/5085", "google_scholar": ";v8ISLgIAAAAJ;https://scholar.google.com/citations?hl=en;DEncVcYAAAAJ;0f-YOFgAAAAJ;_Dc6lbQAAAAJ", "orcid": "0000-0003-2856-4011;0000-0003-3580-5290;0000-0002-5991-2050;0000-0003-1205-8632;;0000-0001-9758-0635", "linkedin": ";;tong-yu-42790744;sheng-li-15a70022/;;", "or_profile": "~Dongliang_Guo1;~Sungchul_Kim1;~Tong_Yu3;~Sheng_Li3;~Handong_Zhao1;~Ryan_Rossi1", "aff": "University of Virginia, Charlottesville;Adobe Systems;Adobe Research;University of Virginia, Charlottesville;Adobe Systems;Adobe Research", "aff_domain": "virginia.edu;adobe.com;adobe.com;virginia.edu;adobe.com;adobe.com", "position": "PhD student;Researcher;Senior Research Scientist;Associate Professor;Research Scientist;Senior Research Scientist", "bibtex": "@misc{\nguo2024enhancing,\ntitle={Enhancing Vision-Language Prompt Learning through Image-Text Distribution Alignment},\nauthor={Dongliang Guo and Handong Zhao and Sungchul Kim and Ryan A. Rossi and Tong Yu and Sheng Li},\nyear={2024},\nurl={https://openreview.net/forum?id=AJ7tnHhgWZ}\n}", "github": "", "project": "", "reviewers": "1JaY;W7r5;sZnc", "site": "https://openreview.net/forum?id=AJ7tnHhgWZ", "pdf_size": 1873941, "rating": "3;3;3", "confidence": "4;5;5", "soundness": "2;2;2", "contribution": "2;2;1", "presentation": "2;2;2", "wc_summary": "79;84;104", "wc_strengths": "102;25;89", "wc_weaknesses": "290;740;306", "wc_questions": "202;6;2", "wc_review": "673;855;501", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 89.0, 10.801234497346433 ], "wc_strengths_avg": [ 72.0, 33.65511352924941 ], "wc_weaknesses_avg": [ 445.3333333333333, 208.46315954836933 ], "wc_questions_avg": [ 70.0, 93.35237900914291 ], "wc_review_avg": [ 676.3333333333334, 144.5391142755329 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Y40cEljJIjgJ:scholar.google.com/&scioq=Enhancing+Vision-Language+Prompt+Learning+through+Image-Text+Distribution+Alignment&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;1;1", "aff_unique_norm": "University of Virginia;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.virginia.edu;https://www.adobe.com", "aff_unique_abbr": "UVA;Adobe", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Charlottesville;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Backdoor Federated Learning by Poisoning Backdoor-Critical Layers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19268", "id": "AJBGSVSTT2", "author_site": "Haomin Zhuang, Mingxian Yu, Hao Wang, Yang Hua, Jian Li, Xu Yuan", "tldr": "", "abstract": "Federated learning (FL) has been widely deployed to enable machine learning training on sensitive data across distributed devices. However, the decentralized learning paradigm and heterogeneity of FL further extend the attack surface for backdoor attacks. Existing FL attack and defense methodologies typically focus on the whole model. None of them recognizes the existence of backdoor-critical (BC) layers-a small subset of layers that dominate the model vulnerabilities. Attacking the BC layers achieves equivalent effects as attacking the whole model but at a far smaller chance of being detected by state-of-the-art (SOTA) defenses. This paper proposes a general in-situ approach that identifies and verifies BC layers from the perspective of attackers. Based on the identified BC layers, we carefully craft a new backdoor attack methodology that adaptively seeks a fundamental balance between attacking effects and stealthiness under various defense strategies. Extensive experiments show that our BC layer-aware backdoor attacks can successfully backdoor FL under seven SOTA defenses with only 10% malicious clients and outperform the latest backdoor attack methods.", "keywords": "Federated Learning;Backdoor Attack", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/60ac36473d69d31eaa56e5f7104eae3c979a5554.zip", "author": "Haomin Zhuang;Mingxian Yu;Hao Wang;Yang Hua;Jian Li;Xu Yuan", "authorids": "~Haomin_Zhuang1;~Mingxian_Yu1;~Hao_Wang29;~Yang_Hua2;~Jian_Li14;~Xu_Yuan1", "gender": ";;M;M;M;M", "homepage": "https://zhmzm.github.io/;;https://www.haow.us;https://pure.qub.ac.uk/en/persons/yang-hua;https://sites.google.com/stonybrook.edu/jianli;https://yuanxuyx.github.io/", "dblp": "344/1798;;w/HaoWang-22;;33/5448-8;24/6114-1", "google_scholar": "vXllNroAAAAJ;;r-Ik__gAAAAJ;N0tFi8MAAAAJ;h039Yq4AAAAJ;R3XkwA8AAAAJ", "orcid": ";0009-0008-8547-7831;0000-0002-1444-2657;0000-0001-5536-503X;;", "linkedin": ";;haowanguoft/;;;", "or_profile": "~Haomin_Zhuang1;~Mingxian_Yu1;~Hao_Wang29;~Yang_Hua2;~Jian_Li14;~Xu_Yuan1", "aff": "University of Notre Dame;SUN YAT-SEN UNIVERSITY;Louisiana State University;Queen's University Belfast;State University of New York at Stony Brook;University of Delaware", "aff_domain": "nd.edu;sysu.edu.cn;lsu.edu;qub.ac.uk;stonybrook.edu;udel.edu", "position": "PhD student;MS student;Assistant Professor;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhuang2024backdoor,\ntitle={Backdoor Federated Learning by Poisoning Backdoor-Critical Layers},\nauthor={Haomin Zhuang and Mingxian Yu and Hao Wang and Yang Hua and Jian Li and Xu Yuan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AJBGSVSTT2}\n}", "github": "", "project": "", "reviewers": "EGpy;MrXZ;vQhS;tdce", "pdf_size": 1740676, "rating": "6;6;6;6", "confidence": "4;4;4;3", "soundness": "3;3;3;2", "contribution": "3;2;3;2", "presentation": "3;2;3;3", "wc_summary": "73;76;128;143", "wc_strengths": "11;49;94;27", "wc_weaknesses": "125;229;84;27", "wc_questions": "151;63;20;143", "wc_review": "360;417;326;340", "wc_reply_reviewers": "23;132;0;20", "wc_reply_authors": "559;1082;436;854", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.0, 30.97579700346708 ], "wc_strengths_avg": [ 45.25, 31.21197686786276 ], "wc_weaknesses_avg": [ 116.25, 73.81522539422338 ], "wc_questions_avg": [ 94.25, 54.96987811520051 ], "wc_review_avg": [ 360.75, 34.65093793824346 ], "wc_reply_reviewers_avg": [ 43.75, 51.71254683343299 ], "wc_reply_authors_avg": [ 732.75, 252.45135670065233 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13229252772080574765&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=AJBGSVSTT2", "pdf": "https://openreview.net/pdf?id=AJBGSVSTT2", "email": "nd.edu;sysu.edu.cn;lsu.edu;qub.ac.uk;stonybrook.edu;udel.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "University of Notre Dame;Sun Yat-sen University;Louisiana State University;Queen's University Belfast;State University of New York at Stony Brook;University of Delaware", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.nd.edu;http://www.sysu.edu.cn;https://www.lsu.edu;https://www.qub.ac.uk;https://www.stonybrook.edu;https://www.udel.edu", "aff_unique_abbr": "Notre Dame;SYSU;LSU;QUB;SUNY Stony Brook;UD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stony Brook", "aff_country_unique_index": "0;1;0;2;0;0", "aff_country_unique": "United States;China;United Kingdom" }, { "title": "Causality-Inspired Spatial-Temporal Explanations for Dynamic Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19267", "id": "AJBkfwXh3u", "author_site": "Kesen Zhao, Liang Zhang", "tldr": "", "abstract": "Dynamic Graph Neural Networks (DyGNNs) have gained significant popularity in the research of dynamic graphs, but are limited by the low transparency, such that human-understandable insights can hardly be drawn from their predictions. Although a number of existing research have been devoted to investigating the interpretability of graph neural networks (GNNs), achieving the interpretability of DyGNNs is pivotally challenging due to the complex spatial-temporal correlations in dynamic graphs. To this end, we propose an innovative causality-inspired generative model based on structural causal model (SCM), which explores the underlying philosophies of DyGNN predictions by identifying the trivial, static, and dynamic causal relationships. To reach this goal, two critical tasks need to be accomplished including (1) disentangling the complex causal relationships, and (2) fitting the spatial-temporal explanations of DyGNNs in the SCM architecture. To tackle these challenges, the proposed method incorporates a contrastive learning module to disentangle trivial and causal relationships, and a dynamic correlating module to disentangle dynamic and static causal relationships, respectively. A dynamic VGAE-based framework is further developed, which generates causal-and-dynamic masks for spatial interpretability, and recognizes dynamic relationships along the time horizon through causal invention for temporal interpretability. Comprehensive experiments have been conducted on both synthetic and real-world datasets, where our approach yields substantial improvements, thereby demonstrating significant superiority.", "keywords": "Dynamic Graph;Graph Explanation;Graph Neural Network;Causal Inference", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/6961ddeb5ee2ee2596341db5f50262266b74bb58.zip", "author": "Kesen Zhao;Liang Zhang", "authorids": "~Kesen_Zhao1;~Liang_Zhang17", "gender": "M;M", "homepage": ";https://sites.google.com/view/liangzhang1111/liang-zhang-hk-polyu", "dblp": "331/3303;", "google_scholar": ";MKlx5KsAAAAJ", "orcid": ";0000-0002-5805-7099", "linkedin": ";", "or_profile": "~Kesen_Zhao1;~Liang_Zhang17", "aff": "Nanyang Technological University;Shenzhen Research Institute of Big Data", "aff_domain": "ntu.edu.sg;sribd.cn", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nzhao2024causalityinspired,\ntitle={Causality-Inspired Spatial-Temporal Explanations for Dynamic Graph Neural Networks},\nauthor={Kesen Zhao and Liang Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AJBkfwXh3u}\n}", "github": "", "project": "", "reviewers": "JCbW;fzQP;8sLV;U7py", "pdf_size": 563165, "rating": "5;5;6;8", "confidence": "4;2;4;2", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "3;1;3;3", "wc_summary": "51;44;94;61", "wc_strengths": "77;71;49;59", "wc_weaknesses": "263;102;322;9", "wc_questions": "141;50;2;12", "wc_review": "532;267;467;141", "wc_reply_reviewers": "29;0;56;0", "wc_reply_authors": "650;833;680;108", "reply_reviewers": "1;0;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 62.5, 19.1637678967368 ], "wc_strengths_avg": [ 64.0, 10.816653826391969 ], "wc_weaknesses_avg": [ 174.0, 124.73371637211808 ], "wc_questions_avg": [ 51.25, 54.82415070021605 ], "wc_review_avg": [ 351.75, 156.02143282254525 ], "wc_reply_reviewers_avg": [ 21.25, 23.29565410114084 ], "wc_reply_authors_avg": [ 567.75, 274.3595952395323 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14135559314020236466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=AJBkfwXh3u", "pdf": "https://openreview.net/pdf?id=AJBkfwXh3u", "email": "ntu.edu.sg;sribd.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Nanyang Technological University;Shenzhen Research Institute of Big Data", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;http://www.sribd.cn", "aff_unique_abbr": "NTU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Singapore;China" }, { "id": "AJgVY0zOB0", "title": "Weakly-supervised Camera Localization by Ground-to-satellite Image Registration", "track": "main", "status": "Reject", "tldr": "", "abstract": "The ground-to-satellite image matching/retrieval was initially proposed for city-scale ground camera localization. Recently, more and more attention has been paid to increasing the camera pose accuracy by ground-to-satellite image matching, once a coarse location and orientation has been obtained from the city-scale retrieval. This paper addresses the same scenario. \nHowever, existing learning-based methods for solving this task require accurate GPS labels of ground images for network training. \nUnfortunately, obtaining such accurate GPS labels is not always possible, often requiring an expensive RTK setup and suffering from signal occlusion, multi-path signal disruptions, \\etc. \nTo address this issue, this paper proposes a weakly-supervised learning strategy for ground-to-satellite image registration. It does not require highly accurate ground truth (GT)\npose labels for ground images in the training dataset. Instead, a coarse location and orientation label, either derived from the city-scale retrieval or noisy sensors (GPS, compass, \\etc), is sufficient. \nSpecifically, we present a pseudo image pair creation strategy for cross-view rotation estimation network training, and a novel method that leverages deep metric learning for translation estimation between ground-and-satellite image pairs.\nExperimental results show that our weakly-supervised learning strategy achieves the best performance on cross-area evaluation, compared to the recent state-of-the-art methods that require accurate pose labels for supervision, and shows comparable performance on same-area evaluation.", "keywords": "Cross-view localization;ground-to-satellite image matching;cross-view image matching", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "", "author": "Yujiao Shi;Hongdong Li;Akhil Perincherry;Ankit Vora", "authorids": "~Yujiao_Shi1;~Hongdong_Li1;~Akhil_Perincherry1;~Ankit_Vora1", "gender": "F;M;;M", "homepage": "https://shiyujiao.github.io/;http://users.cecs.anu.edu.au/~hongdong/;;https://ankitvora19.wixsite.com/portfolio", "dblp": "159/2546;59/4859.html;;242/8412", "google_scholar": "rVsRpZEAAAAJ;https://scholar.google.com.tw/citations?hl=en;;EUS0qnEAAAAJ", "orcid": "0000-0001-6028-9051;;;0000-0001-7976-8730", "linkedin": "yujiao-shi-053a12198/;;;https://linkedin.com/in/ankitvora1", "or_profile": "~Yujiao_Shi1;~Hongdong_Li1;~Akhil_Perincherry1;~Ankit_Vora1", "aff": "Australian National University;Australian National University;;Ford Motor Company", "aff_domain": "anu.edu.au;anu.edu.au;;ford.com", "position": "Postdoc;Full Professor;;Researcher", "bibtex": "@misc{\nshi2024weaklysupervised,\ntitle={Weakly-supervised Camera Localization by Ground-to-satellite Image Registration},\nauthor={Yujiao Shi and Hongdong Li and Akhil Perincherry and Ankit Vora},\nyear={2024},\nurl={https://openreview.net/forum?id=AJgVY0zOB0}\n}", "github": "", "project": "", "reviewers": "9PqU;db9o;mQu3", "site": "https://openreview.net/forum?id=AJgVY0zOB0", "pdf_size": 21753895, "rating": "3;6;6", "confidence": "5;4;4", "soundness": "3;3;3", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "118;80;110", "wc_strengths": "36;76;167", "wc_weaknesses": "200;148;197", "wc_questions": "18;36;91", "wc_review": "372;340;565", "wc_reply_reviewers": "0;20;146", "wc_reply_authors": "685;694;395", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 102.66666666666667, 16.35712552851373 ], "wc_strengths_avg": [ 93.0, 54.81483983983413 ], "wc_weaknesses_avg": [ 181.66666666666666, 23.837412238374835 ], "wc_questions_avg": [ 48.333333333333336, 31.05192783422991 ], "wc_review_avg": [ 425.6666666666667, 99.38589213543116 ], "wc_reply_reviewers_avg": [ 55.333333333333336, 64.62885492478486 ], "wc_reply_authors_avg": [ 591.3333333333334, 138.87724395626842 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14290918079063947141&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1", "aff_unique_norm": "Australian National University;Ford Motor Company", "aff_unique_dep": ";", "aff_unique_url": "https://www.anu.edu.au;https://www.ford.com", "aff_unique_abbr": "ANU;Ford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Australia;United States" }, { "id": "AKAlVyunxA", "title": "SHINE: Shielding Backdoors in Deep Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent studies have discovered that similar to supervised classifiers, a deep reinforcement learning (DRL) policy is also vulnerable to backdoor attacks. Existing defenses against backdoor attacks either do not consider RL's unique mechanism or make unrealistic assumptions, resulting in limited defense efficacy, practicability, and generalizability. In this work, we propose SHINE, a novel backdoor shielding method for DRL. SHINE first leverages policy explanation techniques to identify the backdoor triggers and then designs a policy retraining algorithm to eliminate the negative impact of the triggers on backdoored agents. We theoretically prove that SHINE guarantees to improve a backdoored agent's performance in a poisoned environment while ensuring its performance difference in the clean environment before and after shielding is bounded. We further conduct extensive experiments that evaluate SHINE against three mainstream DRL backdoor attacks in various benchmark RL environments. Our results show that SHINE significantly outperforms existing defenses in mitigating these backdoor attacks.", "keywords": "deep reinforcement learning;trojan backdoor;explanation", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/658689c17d6bf5701683a010a94f12063ad683d2.zip", "author": "Wenbo Guo;Zhuowen Yuan;Jinyuan Jia;Bo Li;Dawn Song", "authorids": "~Wenbo_Guo1;~Zhuowen_Yuan1;~Jinyuan_Jia2;~Bo_Li19;~Dawn_Song1", "gender": "M;M;;F;F", "homepage": "https://henrygwb.github.io/;;https://jinyuan-jia.github.io/;http://boli.cs.illinois.edu/;", "dblp": "144/1238-2.html;304/3576;24/5124-1.html;50/3402-26;s/DXSong", "google_scholar": "KyPheRMAAAAJ;F-r0bYQAAAAJ;iyg4ytkAAAAJ;K8vJkTcAAAAJ;", "orcid": ";;0000-0002-9785-7769;;", "linkedin": ";;;;", "or_profile": "~Wenbo_Guo1;~Zhuowen_Yuan1;~Jinyuan_Jia2;~Bo_Li19;~Dawn_Song1", "aff": "University of California, Santa Barbara;University of Illinois Urbana-Champaign;Pennsylvania State University;University of Illinois, Urbana Champaign;University of California, Berkeley", "aff_domain": "ucsb.edu;illinois.edu;psu.edu;illinois.edu;berkeley.edu", "position": "Assistant Professor;PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nguo2024shine,\ntitle={{SHINE}: Shielding Backdoors in Deep Reinforcement Learning},\nauthor={Wenbo Guo and Zhuowen Yuan and Jinyuan Jia and Bo Li and Dawn Song},\nyear={2024},\nurl={https://openreview.net/forum?id=AKAlVyunxA}\n}", "github": "", "project": "", "reviewers": "F9HD;2tnZ;5eAS;r1tp", "site": "https://openreview.net/forum?id=AKAlVyunxA", "pdf_size": 1157602, "rating": "5;6;6;6", "confidence": "5;4;2;3", "soundness": "2;2;3;3", "contribution": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "90;173;66;74", "wc_strengths": "32;75;53;77", "wc_weaknesses": "171;195;31;130", "wc_questions": "162;235;1;32", "wc_review": "455;678;151;313", "wc_reply_reviewers": "120;198;22;0", "wc_reply_authors": "1077;1714;397;745", "reply_reviewers": "2;3;1;0", "reply_authors": "4;4;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.75, 42.5991490525339 ], "wc_strengths_avg": [ 59.25, 18.335416548308903 ], "wc_weaknesses_avg": [ 131.75, 62.63934466451577 ], "wc_questions_avg": [ 107.5, 95.22210877732125 ], "wc_review_avg": [ 399.25, 193.5695934283068 ], "wc_reply_reviewers_avg": [ 85.0, 79.3536388579629 ], "wc_reply_authors_avg": [ 983.25, 485.6018816891055 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6033307709105802344&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "University of California, Santa Barbara;University of Illinois Urbana-Champaign;Pennsylvania State University;University of California, Berkeley", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucsb.edu;https://illinois.edu;https://www.psu.edu;https://www.berkeley.edu", "aff_unique_abbr": "UCSB;UIUC;PSU;UC Berkeley", "aff_campus_unique_index": "0;1;1;3", "aff_campus_unique": "Santa Barbara;Urbana-Champaign;;Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AKJLnDgzkm", "title": "Welfare Diplomacy: Benchmarking Language Model Cooperation", "track": "main", "status": "Reject", "tldr": "", "abstract": "The growing capabilities and increasingly widespread deployment of AI systems necessitate robust benchmarks for measuring their cooperative capabilities. Unfortunately, most multi-agent benchmarks are either zero-sum or purely cooperative, providing limited opportunities for such measurements. We introduce a general-sum variant of the zero-sum board game Diplomacy\u2014called Welfare Diplomacy\u2014in which players must balance investing in military conquest and domestic welfare. We argue that Welfare Diplomacy facilitates both a clearer assessment of and stronger training incentives for cooperative capabilities. Our contributions are: (1) proposing the Welfare Diplomacy rules and implementing them via an open- source Diplomacy engine; (2) constructing baseline agents using zero-shot prompted language models; and (3) conducting experiments where we find that baselines using state-of-the-art models attain high social welfare but are exploitable. Our work aims to promote societal safety by aiding researchers in developing and assessing multi-agent AI systems. Code to evaluate Welfare Diplomacy and reproduce our experiments is available at https://anonymous.4open.science/r/welfare-diplomacy-72AC.", "keywords": "multiagent systems;cooperative AI;AI agents;language models", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/9076a3d46cd1dfd13e87f73ee5ff9636ce6368be.pdf", "author": "Gabriel Mukobi;Hannah Erlebach;Niklas Lauffer;Lewis Hammond;Alan Chan;Jesse Clifton", "authorids": "~Gabriel_Mukobi1;~Hannah_Erlebach1;~Niklas_Lauffer1;~Lewis_Hammond1;~Alan_Chan2;~Jesse_Clifton1", "gender": "M;F;M;;M;M", "homepage": "https://gabrielmukobi.com/;;https://niklaslauffer.github.io/;https://www.lewishammond.com/;https://achan.ca;https://statistics.sciences.ncsu.edu/people/jclifto/", "dblp": ";;;228/6647;;", "google_scholar": ";;;8fYnp7UAAAAJ;lmQmYPgAAAAJ;", "orcid": ";;;0000-0003-1695-0871;;", "linkedin": "gabrielmukobi/;hannah-erlebach-625159195/;;lrhammond/;alan-chan-51858378/;", "or_profile": "~Gabriel_Mukobi1;~Hannah_Erlebach1;~Niklas_Lauffer1;~Lewis_Hammond1;~Alan_Chan2;~Jesse_Clifton1", "aff": "Computer Science Department, Stanford University;University College London, University of London;University of California, Berkeley;University of Oxford;University of Montreal;", "aff_domain": "cs.stanford.edu;ucl.ac.uk;berkeley.edu;ox.ac.uk;umontreal.ca;", "position": "MS student;MS student;PhD student;PhD student;PhD student;", "bibtex": "@misc{\nmukobi2024welfare,\ntitle={Welfare Diplomacy: Benchmarking Language Model Cooperation},\nauthor={Gabriel Mukobi and Hannah Erlebach and Niklas Lauffer and Lewis Hammond and Alan Chan and Jesse Clifton},\nyear={2024},\nurl={https://openreview.net/forum?id=AKJLnDgzkm}\n}", "github": "", "project": "", "reviewers": "zuKK;A4aM;R7WR", "site": "https://openreview.net/forum?id=AKJLnDgzkm", "pdf_size": 17615370, "rating": "5;6;8", "confidence": "4;3;4", "soundness": "3;3;3", "contribution": "2;3;3", "presentation": "2;3;3", "wc_summary": "39;74;43", "wc_strengths": "71;55;73", "wc_weaknesses": "209;61;60", "wc_questions": "103;64;266", "wc_review": "422;254;442", "wc_reply_reviewers": "67;0;0", "wc_reply_authors": "709;435;734", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 52.0, 15.641824275533422 ], "wc_strengths_avg": [ 66.33333333333333, 8.055363982396383 ], "wc_weaknesses_avg": [ 110.0, 70.00476174280338 ], "wc_questions_avg": [ 144.33333333333334, 87.49222187651249 ], "wc_review_avg": [ 372.6666666666667, 84.30631978419859 ], "wc_reply_reviewers_avg": [ 22.333333333333332, 31.584102892999123 ], "wc_reply_authors_avg": [ 626.0, 135.44248471829903 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7008315424132990650&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Stanford University;University College London;University of California, Berkeley;University of Oxford;University of Montreal", "aff_unique_dep": "Computer Science Department;;;;", "aff_unique_url": "https://www.stanford.edu;https://www.ucl.ac.uk;https://www.berkeley.edu;https://www.ox.ac.uk;https://wwwumontreal.ca", "aff_unique_abbr": "Stanford;UCL;UC Berkeley;Oxford;UM", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Stanford;;Berkeley", "aff_country_unique_index": "0;1;0;1;2", "aff_country_unique": "United States;United Kingdom;Canada" }, { "id": "AKZtQO81GQ", "title": "Evaluating model bias requires characterizing model mistakes", "track": "main", "status": "Reject", "tldr": "", "abstract": "The ability to properly benchmark model performance in the face of spurious correlation is important to both build better predictors and increase confidence that models are operating as intended. We demonstrate that characterizing (as opposed to simply quantifying) model mistakes across subgroups is pivotal to properly reflect model biases, which are ignored by standard metrics such as worst-group accuracy or accuracy gap. Inspired by the hypothesis testing framework, we introduce SkewSize, a flexible metric that captures bias from mistakes in a model\u2019s predictions. It can be used in multi-class settings or generalised to the open vocabulary setting of generative models. SkewSize is an aggregation of the effect size of the interaction between two categorical variables: the independent variable, representing the bias attribute (i.e. subgroup), and the dependent variable,representing the model\u2019s prediction. We demonstrate the utility of SkewSize in multiple settings including: standard vision models trained on synthetic data, vision models trained on ImageNet as well as the DomainNet distribution shift benchmark, and large scale vision-language models from the BLIP-2 family. In each case, the proposed SkewSize is able to highlight biases not captured by other metrics, while also providing insights on the impact of recently proposed techniques, such as instruction tuning.", "keywords": "model bias;performance disparity across subgroups;neural networks evaluation", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/d200a4ccf3be2e7b00c79399a1bacc8c9977b297.zip", "author": "Isabela Albuquerque;Jessica Schrouff;David Warde-Farley;Ali Taylan Cemgil;Sven Gowal;Olivia Wiles", "authorids": "~Isabela_Albuquerque1;~Jessica_Schrouff1;~David_Warde-Farley1;~Ali_Taylan_Cemgil2;~Sven_Gowal2;~Olivia_Wiles1", "gender": "F;F;M;;M;M", "homepage": ";;;;https://www.cmpe.boun.edu.tr/~cemgil/;", "dblp": "210/2719;96/9449;71/9421;194/3191;41/6613;75/8368", "google_scholar": ";https://scholar.google.co.uk/citations?user=2YWm2nMAAAAJ;https://scholar.google.ca/citations?user=MOgfm8oAAAAJ;https://scholar.google.co.uk/citations?user=XQzHJSgAAAAJ;X3ZFZ7AAAAAJ;", "orcid": ";0000-0003-4992-3183;;;http://orcid.org/0000-0003-4463-8455;", "linkedin": ";jessica-schrouff/;;;;", "or_profile": "~Isabela_Albuquerque1;~Jessica_Schrouff1;~David_Warde-Farley1;~Olivia_Wiles1;~ali_taylan_cemgil1;~Sven_Gowal1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google;Bogazici University;Google DeepMind", "aff_domain": "deepmind.com;google.com;google.com;google.com;boun.edu.tr;google.com", "position": "Researcher;Senior Researcher;Research Scientist;Researcher;Full Professor;Research Engineer", "bibtex": "@misc{\nalbuquerque2024evaluating,\ntitle={Evaluating model bias requires characterizing model mistakes},\nauthor={Isabela Albuquerque and Jessica Schrouff and David Warde-Farley and Ali Taylan Cemgil and Sven Gowal and Olivia Wiles},\nyear={2024},\nurl={https://openreview.net/forum?id=AKZtQO81GQ}\n}", "github": "", "project": "", "reviewers": "1DWK;6Zhw;kSV6;bd8T", "site": "https://openreview.net/forum?id=AKZtQO81GQ", "pdf_size": 1076041, "rating": "5;5;6;8", "confidence": "4;4;4;2", "soundness": "3;4;3;3", "contribution": "2;2;2;3", "presentation": "4;3;4;2", "wc_summary": "64;46;71;81", "wc_strengths": "68;56;81;62", "wc_weaknesses": "205;140;116;8", "wc_questions": "119;4;2;100", "wc_review": "456;246;270;251", "wc_reply_reviewers": "113;117;208;0", "wc_reply_authors": "2975;1143;1555;38", "reply_reviewers": "1;1;2;0", "reply_authors": "7;4;3;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 65.5, 12.776932339180638 ], "wc_strengths_avg": [ 66.75, 9.256754290786809 ], "wc_weaknesses_avg": [ 117.25, 70.98371292064117 ], "wc_questions_avg": [ 56.25, 53.67669419776147 ], "wc_review_avg": [ 305.75, 87.20772614854718 ], "wc_reply_reviewers_avg": [ 109.5, 73.7580504080741 ], "wc_reply_authors_avg": [ 1427.75, 1051.5063896619934 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.75, 2.165063509461097 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sBOFi0Nj4nAJ:scholar.google.com/&scioq=Evaluating+model+bias+requires+characterizing+model+mistakes&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Google;Bogazici University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.boun.edu.tr", "aff_unique_abbr": "DeepMind;BU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;2;0", "aff_country_unique": "United Kingdom;United States;T\u00fcrkiye" }, { "id": "AL1fq05o7H", "title": "Mamba: Linear-Time Sequence Modeling with Selective State Spaces", "track": "main", "status": "Reject", "tldr": "", "abstract": "Foundation models, now powering most of the exciting applications in deep learning, are almost universally based on the Transformer architecture and its core attention module. Many subquadratic-time architectures such as linear attention, gated convolution and recurrent models, and structured state space models (SSMs) have been developed to address Transformers' computational inefficiency on long sequences, but they have not performed as well as attention on important modalities such as language. We identify that a key weakness of such models is their inability to perform content-based reasoning, and make several improvements. First, simply letting the SSM parameters be functions of the input addresses their weakness with discrete modalities, allowing the model to *selectively* propagate or forget information along the sequence length dimension depending on the current token. Second, even though this change prevents the use of efficient convolutions, we design a hardware-aware parallel algorithm in recurrent mode. We integrate these selective SSMs into a simplified end-to-end neural network architecture without attention or even MLP blocks (**Mamba**). Mamba enjoys fast inference (5$\\times$ higher throughput than Transformers) and linear scaling in sequence length, and its performance improves on real data up to million-length sequences. As a general sequence model backbone, Mamba achieves state-of-the-art performance across several modalities such as language, audio, and genomics. On language modeling, our Mamba-1.4B model outperforms Transformers of the same size and matches Transformers twice its size, both in pretraining and downstream evaluation.", "keywords": "Sequence model;language model;state space model;RNN;SSM;S4;Mamba", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Albert Gu;Tri Dao", "authorids": "~Albert_Gu1;~Tri_Dao1", "gender": "M;", "homepage": ";https://tridao.me/", "dblp": "130/0612;206/7018", "google_scholar": "DVCHv1kAAAAJ;NQRw0bQAAAAJ", "orcid": "0000-0002-4946-6042;", "linkedin": ";", "or_profile": "~Albert_Gu1;~Tri_Dao1", "aff": "Carnegie Mellon University;Princeton University", "aff_domain": "cmu.edu;princeton.edu", "position": "Assistant Professor;Assistant Professor", "bibtex": "@misc{\ngu2024mamba,\ntitle={Mamba: Linear-Time Sequence Modeling with Selective State Spaces},\nauthor={Albert Gu and Tri Dao},\nyear={2024},\nurl={https://openreview.net/forum?id=AL1fq05o7H}\n}", "github": "", "project": "", "reviewers": "du8a;iEaX;sf96;5ZBk", "site": "https://openreview.net/forum?id=AL1fq05o7H", "pdf_size": 639407, "rating": "3;6;8;8", "confidence": "5;2;4;5", "soundness": "2;3;4;4", "contribution": "2;3;3;4", "presentation": "3;3;4;4", "wc_summary": "60;186;83;52", "wc_strengths": "71;60;69;20", "wc_weaknesses": "385;13;10;210", "wc_questions": "2;38;46;120", "wc_review": "518;297;208;402", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2115;335;229;473", "reply_reviewers": "0;0;0;0", "reply_authors": "4;1;1;1", "rating_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 95.25, 53.6161123171011 ], "wc_strengths_avg": [ 55.0, 20.627651344736268 ], "wc_weaknesses_avg": [ 154.5, 155.81479390609866 ], "wc_questions_avg": [ 51.5, 42.88064831599448 ], "wc_review_avg": [ 356.25, 115.91456983485726 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 788.0, 771.0129700595185 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.09975093361076329, "gs_citation": 3100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3513516483726437226&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.princeton.edu", "aff_unique_abbr": "CMU;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "AL4tS0HhJT", "title": "Post-prediction confidence training complements supervised learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Wrong prediction is bad. For users, having high confidence on a wrong prediction is even worse. Since even the best-trained class-label predictor will have some chance of making mistakes, users, especially in some AI application areas such as personalized medicine, may want to tell the high quality predictions from the low quality ones. In convolutional neural networks (CNN), confidence on a prediction is associated with the softmax output layer, which gives a probability distribution on the class-labels. But even a prediction with 95\\% probability concentrated on one class may still turn out wrong many times more often than the anticipated rate of 5\\%. There are at least three main sources of uncertainty to cause a large anticipation gap. The first one is that some of the test samples may not belong to the same distribution of the training samples. The second one is the sever population heterogeneity within each class, causing the variation of prediction quality across some hidden subpopulations. The third one is the imperfectness of the prediction model. While most researches are focused on the first source of prediction uncertainty, the other two receive much less attention. Here we take a different approach, termed post-prediction confidence training (PPCT), to guide users how to discern the high-quality predictions from the low-quality ones. Distinctively different from other methods including conformal prediction, PPCT entertains all three sources of uncertainty by searching features to anchor the criticism of prediction quality. An enhancement to CNN configuration is required during network training. We propose a blueprint by coupling each logit node (T channel) in the layer feeding to softmax with an additional node (C channel) and using maxout to link the pair to the softmax layer. The C channel is introduced to counter the T channel as a contrastive feature against the feature of the target class. A high-quality prediction must follow a logically-lucid pattern between T and C for every class. Successful implementation of our methods on popular image datasets are reported.", "keywords": "supervised learning;prediction uncertainty;maxout;feature representation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Yu-Cheng Li;Hao Ho;Ker-Chau Li", "authorids": "~Yu-Cheng_Li1;~Hao_Ho1;~Ker-Chau_Li1", "gender": "M;M;", "homepage": "https://www.stat.sinica.edu.tw/cht/index.php?act=researcher_manager&code=view&member=75;;http://statistics.ucla.edu/index.php/people1/all-faculty/7809-2/?smid=8818", "dblp": ";;", "google_scholar": ";;", "orcid": ";0000-0001-8911-7907;", "linkedin": ";;", "or_profile": "~Yu-Cheng_Li1;~Hao_Ho1;~Ker-Chau_Li1", "aff": "Academia Sinica;University of California, Los Angeles;UCLA, University of California, Los Angeles", "aff_domain": "sinica.edu.tw;ucla.edu;stat.ucla.edu", "position": "Postdoc;Assistant Professor;Full Professor", "bibtex": "@misc{\nli2024postprediction,\ntitle={Post-prediction confidence training complements supervised learning},\nauthor={Yu-Cheng Li and Hao Ho and Ker-Chau Li},\nyear={2024},\nurl={https://openreview.net/forum?id=AL4tS0HhJT}\n}", "github": "", "project": "", "reviewers": "2myu;c9j6;xsnZ;9DJD", "site": "https://openreview.net/forum?id=AL4tS0HhJT", "pdf_size": 4562090, "rating": "1;3;3;3", "confidence": "4;4;3;3", "soundness": "2;1;2;3", "contribution": "2;1;2;2", "presentation": "1;2;1;2", "wc_summary": "52;49;118;83", "wc_strengths": "20;13;81;54", "wc_weaknesses": "258;299;272;137", "wc_questions": "59;154;71;49", "wc_review": "389;515;542;323", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 75.5, 27.91504970441572 ], "wc_strengths_avg": [ 42.0, 27.340446228984632 ], "wc_weaknesses_avg": [ 241.5, 62.10676291677099 ], "wc_questions_avg": [ 83.25, 41.583500333665995 ], "wc_review_avg": [ 442.25, 89.85926496472136 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4zVtrlxgT0cJ:scholar.google.com/&scioq=Post-prediction+confidence+training+complements+supervised+learning&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Academia Sinica;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.sinica.edu.tw;https://www.ucla.edu", "aff_unique_abbr": "Academia Sinica;UCLA", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Taiwan;Los Angeles", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "One Forward is Enough for Neural Network Training via Likelihood Ratio Method", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19266", "id": "ALGFFPXWSi", "author_site": "Jinyang Jiang, Zeliang Zhang, Chenliang Xu, Zhaofei Yu, Yijie Peng", "tldr": "", "abstract": "While backpropagation (BP) is the mainstream approach for gradient computation in neural network training, its heavy reliance on the chain rule of differentiation constrains the designing flexibility of network architecture and training pipelines. We avoid the recursive computation in BP and develop a unified likelihood ratio (ULR) method for gradient estimation with only one forward propagation. Not only can ULR be extended to train a wide variety of neural network architectures, but the computation flow in BP can also be rearranged by ULR for better device adaptation. Moreover, we propose several variance reduction techniques to further accelerate the training process. Our experiments offer numerical results across diverse aspects, including various neural network training scenarios, computation flow rearrangement, and fine-tuning of pre-trained models. All findings demonstrate that ULR effectively enhances the flexibility of neural network training by permitting localized module training without compromising the global objective and significantly boosts the network robustness.", "keywords": "stochastic optimization;gradient estimation", "primary_area": "optimization", "supplementary_material": "/attachment/31f1213f0c37cde70d7c20340d346e97cceb370d.zip", "author": "Jinyang Jiang;Zeliang Zhang;Chenliang Xu;Zhaofei Yu;Yijie Peng", "authorids": "~Jinyang_Jiang1;~Zeliang_Zhang1;~Chenliang_Xu1;~Zhaofei_Yu1;~Yijie_Peng1", "gender": "M;M;M;M;M", "homepage": ";https://github.com/ZhangAIPI;https://www.cs.rochester.edu/~cxu22/;https://yuzhaofei.github.io;https://www.gsm.pku.edu.cn/faculty/pengyijie/", "dblp": "209/2313-1;219/9383;117/4770;166/0573;", "google_scholar": ";7nLfsSgAAAAJ;https://scholar.google.com.tw/citations?user=54HfyDIAAAAJ;qaUgD50AAAAJ;", "orcid": "0009-0004-7145-6272;;;;", "linkedin": ";;;;", "or_profile": "~Jinyang_Jiang1;~Zeliang_Zhang1;~Chenliang_Xu1;~Zhaofei_Yu1;~Yijie_Peng1", "aff": "Peking University;Microsoft Research;University of Rochester;Peking University;", "aff_domain": "pku.edu.cn;research.microsoft.com;rochester.edu;pku.edu.cn;", "position": "PhD student;Intern;Associate Professor;Assistant Professor;", "bibtex": "@inproceedings{\njiang2024one,\ntitle={One Forward is Enough for Neural Network Training via Likelihood Ratio Method},\nauthor={Jinyang Jiang and Zeliang Zhang and Chenliang Xu and Zhaofei Yu and Yijie Peng},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=ALGFFPXWSi}\n}", "github": "", "project": "", "reviewers": "3HaG;s8CG;hjbZ;HFF5", "pdf_size": 971319, "rating": "6;6;8;8", "confidence": "2;4;3;3", "soundness": "2;3;4;4", "contribution": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "141;179;87;123", "wc_strengths": "47;73;87;101", "wc_weaknesses": "97;129;69;188", "wc_questions": "159;285;21;137", "wc_review": "444;666;264;549", "wc_reply_reviewers": "392;73;0;394", "wc_reply_authors": "1506;1680;368;791", "reply_reviewers": "4;1;0;1", "reply_authors": "6;5;2;3", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 132.5, 33.14739808793444 ], "wc_strengths_avg": [ 77.0, 19.949937343260004 ], "wc_weaknesses_avg": [ 120.75, 44.25141240683737 ], "wc_questions_avg": [ 150.5, 93.69498385719483 ], "wc_review_avg": [ 480.75, 147.73857823872544 ], "wc_reply_reviewers_avg": [ 214.75, 180.11020931640715 ], "wc_reply_authors_avg": [ 1086.25, 531.9268629238422 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 4.0, 1.5811388300841898 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6983145891460231622&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=ALGFFPXWSi", "pdf": "https://openreview.net/pdf?id=ALGFFPXWSi", "email": "pku.edu.cn;research.microsoft.com;rochester.edu;pku.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Peking University;Microsoft;University of Rochester", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research;https://www.rochester.edu", "aff_unique_abbr": "Peking U;MSR;U of R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Coeditor: Leveraging Repo-level Diffs for Code Auto-editing", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19265", "id": "ALVwQjZRS8", "author_site": "Jiayi Wei, Greg Durrett, Isil Dillig", "tldr": "", "abstract": "Developers often dedicate significant time to maintaining and refactoring existing code. However, most prior work on generative models for code focuses solely on creating new code, overlooking the distinctive needs of editing existing code. In this work, we explore a multi-round code auto-editing setting, aiming to predict edits to a code region based on recent changes within the same codebase. Our model, Coeditor, is a fine-tuned language model specifically designed for code editing tasks. We represent code changes using a line diff format and employ static analysis to form large customized model contexts, ensuring the availability of appropriate information for prediction. We collect a code editing dataset from the commit histories of 1650 open-source Python projects for training and evaluation. In a simplified single-round, single-edit task, Coeditor significantly outperforms GPT-3.5 and SOTA open-source code completion models (bringing exact-match accuracy from 34.7 up to 60.4), demonstrating the benefits of incorporating editing history for code completion. In a multi-round, multi-edit setting, we observe substantial gains by iteratively conditioning on additional user edits. We have open-sourced our code, data, and model weights to encourage future research and have released a VSCode extension powered by our model for interactive IDE usage.", "keywords": "language model for code;editing;refactoring", "primary_area": "generative models", "supplementary_material": "/attachment/195f30db5399f6562d3051e183696313ace2ec05.zip", "author": "Jiayi Wei;Greg Durrett;Isil Dillig", "authorids": "~Jiayi_Wei2;~Greg_Durrett1;~Isil_Dillig1", "gender": "M;M;F", "homepage": "https://mrvplusone.github.io;http://www.cs.utexas.edu/~gdurrett/;https://www.cs.utexas.edu/~isil/", "dblp": ";69/7968;", "google_scholar": "fTJ8pY8AAAAJ;https://scholar.google.com.tw/citations?user=EpQ_sDEAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jiayi_Wei2;~Greg_Durrett1;~Isil_Dillig1", "aff": "Augment Computing;University of Texas at Austin;University of Texas, Austin", "aff_domain": "augmentcode.com;utexas.edu;utexas.edu", "position": "Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwei2024coeditor,\ntitle={Coeditor: Leveraging Repo-level Diffs for Code Auto-editing},\nauthor={Jiayi Wei and Greg Durrett and Isil Dillig},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=ALVwQjZRS8}\n}", "github": "", "project": "", "reviewers": "vqdF;sVPE;ueao;Kf1h", "pdf_size": 6469199, "rating": "5;6;6;8", "confidence": "5;3;4;4", "soundness": "2;3;3;3", "contribution": "2;4;3;3", "presentation": "4;4;3;3", "wc_summary": "105;162;142;107", "wc_strengths": "102;40;65;44", "wc_weaknesses": "557;111;116;87", "wc_questions": "80;44;139;104", "wc_review": "844;357;462;342", "wc_reply_reviewers": "378;120;25;0", "wc_reply_authors": "1046;294;1017;250", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 129.0, 24.072806234421446 ], "wc_strengths_avg": [ 62.75, 24.57005290999594 ], "wc_weaknesses_avg": [ 217.75, 196.17259620038678 ], "wc_questions_avg": [ 91.75, 34.64372237505664 ], "wc_review_avg": [ 501.25, 203.21586429213642 ], "wc_reply_reviewers_avg": [ 130.75, 149.6051051936397 ], "wc_reply_authors_avg": [ 651.75, 380.2067694031762 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2219570809759591003&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=ALVwQjZRS8", "pdf": "https://openreview.net/pdf?id=ALVwQjZRS8", "email": "augmentcode.com;utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Augment Computing;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": ";https://www.utexas.edu", "aff_unique_abbr": ";UT Austin", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "1;1", "aff_country_unique": ";United States" }, { "id": "AMCaG2TAeg", "title": "Causal Influence-Aware Counterfactual Data Augmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Pre-recorded data and human-collected demonstrations are both valuable and practical resources for teaching robots complex behaviors.\nIdeally, learning agents should not be constrained by the scarcity of available demonstrations, but rather generalize to as many new situations as possible.\nHowever, the combinatorial nature of real-world scenarios typically requires a huge amount of data to prevent neural network policies from picking up on spurious and non-causal factors.\nWe propose CAIAC, a data augmentation method that can create feasible synthetic samples from a fixed dataset without the need to perform new environment interactions.\nMotivated by the fact that an agent may only modify the environment through its actions, we swap causally $\\textit{action}$-unaffected parts of the state-space from different observed trajectories in the dataset.\nIn high-dimensional benchmark environments, we observe an increase in generalization capabilities and sample efficiency.", "keywords": "deep reinforcement learning;data augmentation;learning from demonstrations;out-of-distribution generalization", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "N\u00faria Armengol Urp\u00ed;Georg Martius", "authorids": "~N\u00faria_Armengol_Urp\u00ed1;~Georg_Martius1", "gender": "F;M", "homepage": ";https://uni-tuebingen.de/de/264672", "dblp": ";47/2706", "google_scholar": "https://scholar.google.co.uk/citations?user=Cq6i6XwAAAAJ;https://scholar.google.de/citations?user=b-JF-UIAAAAJ", "orcid": ";", "linkedin": "nuriaarmengolurpi;", "or_profile": "~N\u00faria_Armengol_Urp\u00ed1;~Georg_Martius1", "aff": "ETHZ - ETH Zurich;Max Planck Institute for Intelligent Systems", "aff_domain": "ethz.ch;tuebingen.mpg.de", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nurp{\\'\\i}2024causal,\ntitle={Causal Influence-Aware Counterfactual Data Augmentation},\nauthor={N{\\'u}ria Armengol Urp{\\'\\i} and Georg Martius},\nyear={2024},\nurl={https://openreview.net/forum?id=AMCaG2TAeg}\n}", "github": "", "project": "", "reviewers": "LtoA;1iyX;GhVi", "site": "https://openreview.net/forum?id=AMCaG2TAeg", "pdf_size": 8146296, "rating": "3;5;5", "confidence": "4;3;3", "soundness": "2;3;2", "contribution": "2;1;2", "presentation": "3;2;3", "wc_summary": "149;64;71", "wc_strengths": "59;45;78", "wc_weaknesses": "480;544;168", "wc_questions": "263;22;53", "wc_review": "951;675;370", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "2405;1598;881", "reply_reviewers": "0;0;0", "reply_authors": "5;3;2", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 94.66666666666667, 38.5256047612782 ], "wc_strengths_avg": [ 60.666666666666664, 13.523641850067197 ], "wc_weaknesses_avg": [ 397.3333333333333, 164.25454500730126 ], "wc_questions_avg": [ 112.66666666666667, 107.05242744665915 ], "wc_review_avg": [ 665.3333333333334, 237.29072651262393 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1628.0, 622.5319268921073 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zBA2vid52oUJ:scholar.google.com/&scioq=Causal+Influence-Aware+Counterfactual+Data+Augmentation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";Intelligent Systems", "aff_unique_url": "https://www.ethz.ch;https://www.mpi-is.mpg.de", "aff_unique_abbr": "ETHZ;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;Germany" }, { "id": "AMDKqZcZbi", "title": "Rapid Learning without Catastrophic Forgetting in the Morris Water Maze", "track": "main", "status": "Reject", "tldr": "", "abstract": "Machine learning models typically struggle to swiftly adapt to novel tasks while maintaining proficiency on previously trained tasks. This contrasts starkly with animals, which demonstrate these capabilities easily. The differences between ML models and animals must stem from particular neural architectures and representations for memory and memory-policy interactions. We propose a new task that requires rapid and continual learning, the sequential Morris Water Maze (sWM). Drawing inspiration from biology, we show that 1) a content-addressable heteroassociative memory based on the entorhinal-hippocampal circuit with grid cells that retain knowledge across diverse environments, and 2) a spatially invariant convolutional network architecture for rapid adaptation across unfamiliar environments together perform rapid learning, good generalization, and continual learning without forgetting. Our model simultaneously outperforms ANN baselines from both the continual and few-shot learning contexts. It retains knowledge of past environments while rapidly acquiring the skills to navigate new ones, thereby addressing the seemingly opposing challenges of quick knowledge transfer and sustaining proficiency in previously learned tasks.", "keywords": "neuroscience;cognitive science;water maze;continual learning;catastrophic forgetting", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Raymond Wang;Jaedong Hwang;Akhilan Boopathy;Ila R Fiete", "authorids": "~Raymond_Wang1;~Jaedong_Hwang1;~Akhilan_Boopathy1;~Ila_R_Fiete1", "gender": ";M;M;F", "homepage": ";https://jd730.github.io/;;https://fietelab.mit.edu/", "dblp": ";239/1982;230/8358;", "google_scholar": ";https://scholar.google.co.kr/citations?user=bITgqEUAAAAJ;;uE-CihIAAAAJ", "orcid": ";;;0000-0003-4738-2539", "linkedin": "raymond-w2/;;;", "or_profile": "~Raymond_Wang1;~Jaedong_Hwang1;~Akhilan_Boopathy1;~Ila_R_Fiete1", "aff": "University of California, Berkeley;Massachusetts Institute of Technology;Amazon;Massachusetts Institute of Technology", "aff_domain": "berkeley.edu;mit.edu;amazon.com;mit.edu", "position": "Undergrad student;PhD student;Intern;Professor", "bibtex": "@misc{\nwang2024rapid,\ntitle={Rapid Learning without Catastrophic Forgetting in the Morris Water Maze},\nauthor={Raymond Wang and Jaedong Hwang and Akhilan Boopathy and Ila R Fiete},\nyear={2024},\nurl={https://openreview.net/forum?id=AMDKqZcZbi}\n}", "github": "", "project": "", "reviewers": "KBVt;3WUm;ndgr;h9UH", "site": "https://openreview.net/forum?id=AMDKqZcZbi", "pdf_size": 5572537, "rating": "3;5;6;6", "confidence": "3;2;3;4", "soundness": "2;2;3;3", "contribution": "1;2;2;3", "presentation": "2;1;4;3", "wc_summary": "75;75;72;55", "wc_strengths": "9;72;66;36", "wc_weaknesses": "37;280;133;160", "wc_questions": "122;106;93;2", "wc_review": "243;533;364;253", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "357;770;905;541", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 69.25, 8.317902379807062 ], "wc_strengths_avg": [ 45.75, 25.222757581200355 ], "wc_weaknesses_avg": [ 152.5, 86.65015868421708 ], "wc_questions_avg": [ 80.75, 46.61209606958263 ], "wc_review_avg": [ 348.25, 116.75910028772918 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 643.25, 210.340646333513 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.28867513459481287, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16521589547593330114&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu;https://www.amazon.com", "aff_unique_abbr": "UC Berkeley;MIT;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "AMivuI7Bnk", "title": "State-wise Constrained Policy Optimization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Reinforcement Learning (RL) algorithms have shown tremendous success in simulation environments, but their application to real-world problems faces significant challenges, with safety being a major concern. In particular, enforcing state-wise constraints is essential for many challenging tasks such as autonomous driving and robot manipulation. However, existing safe RL algorithms under the framework of Constrained Markov Decision Process (CMDP) do not consider state-wise constraints. To address this gap, we propose State-wise Constrained Policy Optimization (SCPO), the first general-purpose policy search algorithm for state-wise constrained reinforcement learning. SCPO provides guarantees for state-wise constraint satisfaction in expectation. In particular, we introduce the framework of Maximum Markov Decision Process, and prove that the worst-case safety violation is bounded under SCPO. We demonstrate the effectiveness of our approach on training neural network policies for extensive robot locomotion tasks, where the agent must satisfy a variety of state-wise safety constraints. Our results show that SCPO significantly outperforms existing methods and can handle state-wise constraints in high-dimensional robotics tasks.", "keywords": "Safe Reinforcement Learning;State-wise Safety Guarantee;Trust Region Optimization", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/293250a7998d5139e445399979e677c4e2d85462.zip", "author": "Weiye Zhao;Rui Chen;Yifan Sun;Feihan Li;Tianhao Wei;Changliu Liu", "authorids": "~Weiye_Zhao1;~Rui_Chen11;~Yifan_Sun9;~Feihan_Li1;~Tianhao_Wei1;~Changliu_Liu1", "gender": "M;M;M;M;M;F", "homepage": "https://github.com/CaesarAndylaw;https://ruichen.pub/;https://yifansun98.github.io/;;;http://www.cs.cmu.edu/~cliu6/index.html", "dblp": "228/6863;;99/10261-11;;222/5386;166/3563", "google_scholar": "P-79KOcAAAAJ;XiUE0wMAAAAJ;DGhQSYUAAAAJ;;V22j1C0AAAAJ;", "orcid": "0000-0002-8426-5238;0000-0002-8671-8771;0009-0007-2073-7789;0000-0003-1770-4664;;", "linkedin": ";;yifansun1/;;;", "or_profile": "~Weiye_Zhao1;~Rui_Chen11;~Yifan_Sun9;~Feihan_Li1;~Tianhao_Wei1;~Changliu_Liu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Tsinghua University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;andrew.cmu.edu;andrew.cmu.edu;tsinghua.edu.cn;andrew.cmu.edu;cmu.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@misc{\nzhao2024statewise,\ntitle={State-wise Constrained Policy Optimization},\nauthor={Weiye Zhao and Rui Chen and Yifan Sun and Feihan Li and Tianhao Wei and Changliu Liu},\nyear={2024},\nurl={https://openreview.net/forum?id=AMivuI7Bnk}\n}", "github": "", "project": "", "reviewers": "VGHE;bUD6;A4Kd", "site": "https://openreview.net/forum?id=AMivuI7Bnk", "pdf_size": 11387510, "rating": "3;3;8", "confidence": "4;3;3", "soundness": "3;2;3", "contribution": "2;1;2", "presentation": "2;2;3", "wc_summary": "51;122;79", "wc_strengths": "70;180;30", "wc_weaknesses": "158;201;31", "wc_questions": "36;363;36", "wc_review": "315;866;176", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 84.0, 29.20045661743437 ], "wc_strengths_avg": [ 93.33333333333333, 63.42099196813483 ], "wc_weaknesses_avg": [ 130.0, 72.17109301283075 ], "wc_questions_avg": [ 145.0, 154.14927829866735 ], "wc_review_avg": [ 452.3333333333333, 297.96010172877544 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5000000000000001, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1975768402285703586&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CMU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;China" }, { "id": "AN5uo4ByWH", "title": "Curve Your Attention: Mixed-Curvature Transformers for Graph Representation Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Real-world graphs naturally exhibit hierarchical trees and cyclic structures that are unfit for the typical Euclidean space. While there exist graph neural networks that utilize hyperbolic or spherical spaces towards embedding such structures more accurately, these methods are confined under the message-passing paradigm, making them vulnerable against side-effects such as oversmoothing and oversquashing. More recent work have proposed global attention-based graph Transformers that can alleviate such drawbacks and easily model long-range interactions, but their extensions towards non-Euclidean geometry are yet unexplored. To bridge this gap, we propose Fully Product-Stereographic Transformer, a generalization of Transformers towards operating entirely on the product of constant curvature spaces. When combined with tokenized graph Transformers, our model can learn the curvature appropriate for the input graph in an end-to-end fashion, without any additional tuning on different curvature initializations. We also provide a kernelized approach to non-Euclidean attention, which enables our model to run with computational cost linear to the number of nodes and edges while respecting the underlying geometry. Experiments on graph reconstruction and node classification demonstrate the benefits of generalizing Transformers to the non-Euclidean domain.", "keywords": "Non-Euclidean Geometry;Product-Stereographic Space;Transformers", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/ad32c9985e6772b7ce2864b91399ef9420469d8d.zip", "author": "Sungjun Cho;Seunghyuk Cho;Sungwoo Park;Hankook Lee;Honglak Lee;Moontae Lee", "authorids": "~Sungjun_Cho1;~Seunghyuk_Cho1;~Sungwoo_Park3;~Hankook_Lee1;~Honglak_Lee2;~Moontae_Lee1", "gender": "M;M;M;;M;M", "homepage": "https://sc782.github.io/;https://seunghyukcho.github.io;https://hankook.github.io;https://moontae.people.uic.edu;http://web.eecs.umich.edu/~honglak;", "dblp": "254/8021;284/8079;223/4393;132/1761;58/2562;92/6585", "google_scholar": "https://scholar.google.com/citations?hl=en;4OOM9_cAAAAJ;CgqswXUAAAAJ;BMvYy9cAAAAJ;fmSHtE8AAAAJ;B1xpjO8AAAAJ", "orcid": ";0000-0002-9124-2712;;0000-0001-5542-3463;;", "linkedin": "sungjun-cho-46982411a/;4stack/;;moontae-lee-975248123/;;", "or_profile": "~Sungjun_Cho1;~Seunghyuk_Cho1;~Hankook_Lee1;~Moontae_Lee1;~Honglak_Lee1;~Sung_Woo_Park2", "aff": "LG AI Research;Pohang University of Science and Technology;LG AI Research;University of Illinois, Chicago;LG AI Research;University of California, Berkeley", "aff_domain": "lgresearch.ai;postech.ac.kr;lgresearch.ai;uic.edu;lgresearch.ai;berkeley.edu", "position": "Researcher;PhD student;Researcher;Assistant Professor;Chief Scientist;Postdoc", "bibtex": "@misc{\ncho2024curve,\ntitle={Curve Your Attention: Mixed-Curvature Transformers for Graph Representation Learning},\nauthor={Sungjun Cho and Seunghyuk Cho and Sungwoo Park and Hankook Lee and Honglak Lee and Moontae Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=AN5uo4ByWH}\n}", "github": "", "project": "", "reviewers": "g83M;c5LD;Wzei", "site": "https://openreview.net/forum?id=AN5uo4ByWH", "pdf_size": 1349155, "rating": "1;5;5", "confidence": "5;4;4", "soundness": "2;3;2", "contribution": "1;3;2", "presentation": "2;3;3", "wc_summary": "48;88;122", "wc_strengths": "27;28;77", "wc_weaknesses": "220;154;537", "wc_questions": "50;88;29", "wc_review": "345;358;765", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 86.0, 30.243456592570013 ], "wc_strengths_avg": [ 44.0, 23.338094752285727 ], "wc_weaknesses_avg": [ 303.6666666666667, 167.17721801197143 ], "wc_questions_avg": [ 55.666666666666664, 24.417662095749915 ], "wc_review_avg": [ 489.3333333333333, 194.99800568780756 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15592352316861997658&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;2;0;3", "aff_unique_norm": "LG;Pohang University of Science and Technology;University of Illinois at Chicago;University of California, Berkeley", "aff_unique_dep": "LG AI Research;;;", "aff_unique_url": "https://www.lgaires.com;https://www.postech.ac.kr;https://www.uic.edu;https://www.berkeley.edu", "aff_unique_abbr": "LG AI;POSTECH;UIC;UC Berkeley", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Pohang;Chicago;Berkeley", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "ANJxbH4eQQ", "title": "Beyond the training set: an intuitive method for detecting distribution shift in model-based optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Model-based optimization (MBO) is increasingly applied to design problems in science and engineering. A common scenario involves using a fixed training set to train models, with the goal of designing new samples that outperform those present in the training data. A major challenge in this setting is distribution shift, where the distributions of training and designed samples are different. While some shift is expected, as the goal is to create better designs, this change can negatively affect model accuracy and subsequently, design quality. Despite the widespread nature of this problem, addressing it demands deep domain knowledge and artful application. To tackle this issue, we propose a straightforward method for design practitioners that detects distribution shifts. This method trains a binary classifier using knowledge of the unlabeled design distribution to separate the training data from the design data. The classifier\u2019s logit scores are then used as a proxy measure of distribution shift. We validate our method in a real-world application by running offline MBO and evaluate the effect of distribution shift on design quality. We find the intensity of the shift in the design distribution varies based on the number of steps taken by the optimization algorithm, and our simple approach identifies these shifts. This enables users to constrain their search to regions where the model's predictions are reliable, thereby increasing the quality of designs.", "keywords": "protein engineering;sequence design;model-based optimization;distribution shift", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Farhan Damani;David H Brookes;Theodore Sternlieb;Cameron Webster;Stephen Malina;Rishi Jajoo;Kathy Lin;Sam Sinai", "authorids": "~Farhan_Damani1;~David_H_Brookes1;~Theodore_Sternlieb1;~Cameron_Webster1;~Stephen_Malina1;~Rishi_Jajoo1;~Kathy_Lin1;~Sam_Sinai1", "gender": ";;M;M;;M;;M", "homepage": "http://fdamani.com;;;;https://stephenmalina.com/;;;", "dblp": ";;;;;;;", "google_scholar": ";;;;https://scholar.google.com/citations?hl=en;;;4k0EcsIAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;theodore-sternlieb/;cameron-webster-25a5955a/;;rishi-jajoo-3166166;kathy-lin-05093254/;sam-sinai-710a0221/", "or_profile": "~Farhan_Damani1;~David_H_Brookes1;~Theodore_Sternlieb1;~Cameron_Webster1;~Stephen_Malina1;~Rishi_Jajoo1;~Kathy_Lin1;~Sam_Sinai1", "aff": "Dyno Therapeutics;;;Dyno Therapeutics;;;Dyno Therapeutics;Dyno Therapeutics", "aff_domain": "dynotx.com;;;dynotx.com;;;dynotx.com;dynotx.com", "position": "Researcher;;;Researcher;;;Researcher;Principal Researcher", "bibtex": "@misc{\ndamani2024beyond,\ntitle={Beyond the training set: an intuitive method for detecting distribution shift in model-based optimization},\nauthor={Farhan Damani and David H Brookes and Theodore Sternlieb and Cameron Webster and Stephen Malina and Rishi Jajoo and Kathy Lin and Sam Sinai},\nyear={2024},\nurl={https://openreview.net/forum?id=ANJxbH4eQQ}\n}", "github": "", "project": "", "reviewers": "a8tv;6Sc3;nQCc;jVj2;SeaM", "site": "https://openreview.net/forum?id=ANJxbH4eQQ", "pdf_size": 7382108, "rating": "3;5;6;6;6", "confidence": "5;3;4;3;3", "soundness": "2;2;3;3;4", "contribution": "2;2;2;2;3", "presentation": "2;3;3;3;4", "wc_summary": "104;62;179;88;123", "wc_strengths": "43;116;25;17;74", "wc_weaknesses": "263;123;180;2;61", "wc_questions": "290;55;24;79;86", "wc_review": "700;356;408;186;344", "wc_reply_reviewers": "585;0;279;0;0", "wc_reply_authors": "2125;1181;953;603;1253", "reply_reviewers": "2;0;3;0;0", "reply_authors": "5;3;3;2;3", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 111.2, 39.36191052273759 ], "wc_strengths_avg": [ 55.0, 36.24913792078372 ], "wc_weaknesses_avg": [ 125.8, 90.87882041487994 ], "wc_questions_avg": [ 106.8, 94.13479696690271 ], "wc_review_avg": [ 398.8, 167.8837693167508 ], "wc_reply_reviewers_avg": [ 172.8, 232.70874500112797 ], "wc_reply_authors_avg": [ 1223.0, 504.6915889927234 ], "reply_reviewers_avg": [ 1.0, 1.2649110640673518 ], "reply_authors_avg": [ 3.2, 0.9797958971132712 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7717436331412899, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10987622068086027041&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Dyno Therapeutics", "aff_unique_dep": "", "aff_unique_url": "https://www.dynotherapeutics.com", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ANK10b0sp9", "title": "Generalization error bounds for iterative learning algorithms with bounded updates", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper explores the generalization characteristics of iterative learning algorithms with bounded updates for non-convex loss functions, employing information-theoretic techniques. Our key contribution is a novel bound for the generalization error of these algorithms with bounded updates. Our approach introduces two main novelties: 1) we reformulate the mutual information as the uncertainty of updates, providing a new perspective, and 2) instead of using the chaining rule of mutual information, we employ a variance decomposition technique to decompose information across iterations, allowing for a simpler surrogate process. We analyze our generalization bound under various settings and demonstrate improved bounds. To bridge the gap between theory and practice, we also examine the previously observed scaling behavior in large language models. Ultimately, our work takes a further step for developing practical generalization theories.", "keywords": "Information theory; generalization bounds; learning algorithm", "primary_area": "learning theory", "supplementary_material": "", "author": "Jingwen Fu;Nanning Zheng", "authorids": "~Jingwen_Fu1;~Nanning_Zheng1", "gender": "M;M", "homepage": "https://www.jw-fu.cn/;", "dblp": "247/5290;07/256-1", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN", "orcid": ";", "linkedin": ";", "or_profile": "~Jingwen_Fu1;~Nanning_Zheng1", "aff": "Microsoft;Xi'an Jiaotong University", "aff_domain": "microsoft.com;xjtu.edu.cn", "position": "Intern;Full Professor", "bibtex": "@misc{\nfu2024generalization,\ntitle={Generalization error bounds for iterative learning algorithms with bounded updates},\nauthor={Jingwen Fu and Nanning Zheng},\nyear={2024},\nurl={https://openreview.net/forum?id=ANK10b0sp9}\n}", "github": "", "project": "", "reviewers": "j93d;UUy1;QX9v;BS2q", "site": "https://openreview.net/forum?id=ANK10b0sp9", "pdf_size": 404840, "rating": "3;3;3;6", "confidence": "4;3;5;4", "soundness": "2;2;3;3", "contribution": "1;2;2;2", "presentation": "3;2;2;3", "wc_summary": "93;20;51;112", "wc_strengths": "28;20;18;68", "wc_weaknesses": "216;77;245;29", "wc_questions": "119;14;106;78", "wc_review": "456;131;420;287", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.0, 35.88175023601831 ], "wc_strengths_avg": [ 33.5, 20.266968199511243 ], "wc_weaknesses_avg": [ 141.75, 90.93782216437779 ], "wc_questions_avg": [ 79.25, 40.48070528041724 ], "wc_review_avg": [ 323.5, 127.72724846327819 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14415402670218393217&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Xi'an Jiao Tong University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.xjtu.edu.cn", "aff_unique_abbr": "Microsoft;XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "Generalization in diffusion models arises from geometry-adaptive harmonic representations", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19264", "id": "ANvmVS2Yr0", "author_site": "Zahra Kadkhodaie, Florentin Guth, Eero Simoncelli, St\u00e9phane Mallat", "tldr": "", "abstract": "Deep neural networks (DNNs) trained for image denoising are able to generate high-quality samples with score-based reverse diffusion algorithms. These impressive capabilities seem to imply an escape from the curse of dimensionality, but recent reports of memorization of the training set raise the question of whether these networks are learning the \"true\" continuous density of the data. Here, we show that two DNNs trained on non-overlapping subsets of a dataset learn nearly the same score function, and thus the same density, when the number of training images is large enough. In this regime of strong generalization, diffusion-generated images are distinct from the training set, and are of high visual quality, suggesting that the inductive biases of the DNNs are well-aligned with the data density. We analyze the learned denoising functions and show that the inductive biases give rise to a shrinkage operation in a basis adapted to the underlying image. Examination of these bases reveals oscillating harmonic structures along contours and in homogeneous regions. We demonstrate that trained denoisers are inductively biased towards these geometry-adaptive harmonic bases since they arise not only when the network is trained on photographic images, but also when it is trained on image classes supported on low-dimensional manifolds for which the harmonic basis is suboptimal. Finally, we show that when trained on regular image classes for which the optimal basis is known to be geometry-adaptive and harmonic, the denoising performance of the networks is near-optimal.", "keywords": "diffusion models;memorization;generalization;inductive bias;curse of dimensionality;denoising;geometry-adaptive harmonic basis", "primary_area": "generative models", "supplementary_material": "", "author": "Zahra Kadkhodaie;Florentin Guth;Eero P Simoncelli;St\u00e9phane Mallat", "authorids": "~Zahra_Kadkhodaie1;~Florentin_Guth1;~Eero_P_Simoncelli1;~St\u00e9phane_Mallat1", "gender": "F;;M;M", "homepage": ";;https://www.di.ens.fr/~mallat/;https://www.cns.nyu.edu/~eero/", "dblp": "243/3303;223/6081;61/3978;30/5604", "google_scholar": "_b5JdjYAAAAJ;opC_fpQAAAAJ;https://scholar.google.com.tw/citations?user=g_YTmSgAAAAJ;MplR7_cAAAAJ", "orcid": ";;;0000-0002-1206-527X", "linkedin": ";;;eero-simoncelli-445782123", "or_profile": "~Zahra_Kadkhodaie1;~Florentin_Guth1;~St\u00e9phane_Mallat1;~Eero_Peter_Simoncelli1", "aff": "Flatiron Institute;Simons Foundation;;New York University", "aff_domain": "flatironinstitute.org;simonsfoundation.org;;nyu.edu", "position": "Postdoc;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nkadkhodaie2024generalization,\ntitle={Generalization in diffusion models arises from geometry-adaptive harmonic representations},\nauthor={Zahra Kadkhodaie and Florentin Guth and Eero P Simoncelli and St{\\'e}phane Mallat},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=ANvmVS2Yr0}\n}", "github": "", "project": "", "reviewers": "89Fo;SQqg;rH4m;4Rws", "pdf_size": 6910937, "rating": "8;8;8;10", "confidence": "4;4;3;4", "soundness": "3;3;4;4", "contribution": "3;3;4;4", "presentation": "3;4;4;4", "wc_summary": "94;250;238;106", "wc_strengths": "74;169;183;54", "wc_weaknesses": "31;47;85;9", "wc_questions": "1;15;239;178", "wc_review": "200;481;745;347", "wc_reply_reviewers": "0;0;48;4", "wc_reply_authors": "59;61;435;75", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 8.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 172.0, 72.24956747275377 ], "wc_strengths_avg": [ 120.0, 56.66127425323225 ], "wc_weaknesses_avg": [ 43.0, 27.748873851023216 ], "wc_questions_avg": [ 108.25, 102.66298018273189 ], "wc_review_avg": [ 443.25, 200.56965747590039 ], "wc_reply_reviewers_avg": [ 13.0, 20.273134932713294 ], "wc_reply_authors_avg": [ 157.5, 160.3332467082233 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 96, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11172769844580195677&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=ANvmVS2Yr0", "pdf": "https://openreview.net/pdf?id=ANvmVS2Yr0", "email": "flatironinstitute.org;simonsfoundation.org;;nyu.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Flatiron Institute;Simons Foundation;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://flatironinstitute.org;https://www.simonsfoundation.org;https://www.nyu.edu", "aff_unique_abbr": "Flatiron;Simons Foundation;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "OpenChat: Advancing Open-source Language Models with Mixed-Quality Data", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19263", "id": "AOJyfhWYHf", "author_site": "Guan Wang, Sijie Cheng, Xianyuan Zhan, Xiangang Li, Sen Song, Yang Liu", "tldr": "", "abstract": "Nowadays, open-source large language models like LLaMA have emerged. Recent developments have incorporated supervised fine-tuning (SFT) and reinforcement learning fine-tuning (RLFT) to align these models with human goals. However, SFT methods treat all training data with mixed quality equally, while RLFT methods require high-quality pairwise or ranking-based preference data. In this study, we present a novel framework, named OpenChat, to advance open-source language models with mixed-quality data. Specifically, we consider the general SFT training data, consisting of a small amount of expert data mixed with a large proportion of sub-optimal data, without any preference labels. We propose the C(onditioned)-RLFT, which regards different data sources as coarse-grained reward labels and learns a class-conditioned policy to leverage complementary data quality information. Interestingly, the optimal policy in C-RLFT can be easily solved through single-stage, RL-free supervised learning, which is lightweight and avoids costly human preference labeling.\nThrough extensive experiments on three standard benchmarks, our openchat-13b fine-tuned with C-RLFT achieves the highest average performance among all 13b open-source language models. Moreover, we use AGIEval to validate the model generalization performance, in which only openchat-13b surpasses the base model. Finally, we conduct a series of analyses to shed light on the effectiveness and robustness of OpenChat. Our code, data, and models are publicly available at https://github.com/imoneoi/openchat and https://huggingface.co/openchat.", "keywords": "Open-source Language Models;Fine-tuning;Mixed-quality Data", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/ba82bf0464fce3f6226fc5890f5b6949478ddae3.zip", "author": "Guan Wang;Sijie Cheng;Xianyuan Zhan;Xiangang Li;Sen Song;Yang Liu", "authorids": "~Guan_Wang3;~Sijie_Cheng1;~Xianyuan_Zhan1;~Xiangang_Li1;~Sen_Song1;~Yang_Liu19", "gender": ";F;M;M;M;M", "homepage": ";https://adacheng.github.io/;http://zhanxianyuan.xyz/;;https://brain.tsinghua.edu.cn/en/info/1010/1012.htm;http://nlp.csai.tsinghua.edu.cn/~ly/", "dblp": ";160/7320;181/5081;124/9046;33/3456;51/3710-5", "google_scholar": ";pruwctkAAAAJ;pDMnGloAAAAJ;;cYgtRP4AAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ", "orcid": ";;0000-0002-3683-0554;;0000-0001-5587-0730;0000-0002-3087-242X", "linkedin": ";;;;;", "or_profile": "~Guan_Wang3;~Sijie_Cheng1;~Xianyuan_Zhan1;~Xiangang_Li1;~Sen_Song1;~Yang_Liu19", "aff": ";Tsinghua University;Tsinghua University;;;Tsinghua University", "aff_domain": ";mails.tsinghua.edu.cn;tsinghua.edu.cn;;;tsinghua.edu.cn", "position": ";PhD student;Associate Professor;;;Professor", "bibtex": "@inproceedings{\nwang2024openchat,\ntitle={OpenChat: Advancing Open-source Language Models with Mixed-Quality Data},\nauthor={Guan Wang and Sijie Cheng and Xianyuan Zhan and Xiangang Li and Sen Song and Yang Liu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AOJyfhWYHf}\n}", "github": "", "project": "", "reviewers": "g6Xd;yFnS;U8fK;QA7s", "pdf_size": 1454850, "rating": "6;6;6;6", "confidence": "3;3;3;4", "soundness": "3;2;3;4", "contribution": "3;3;2;3", "presentation": "3;3;4;3", "wc_summary": "51;85;91;73", "wc_strengths": "75;54;161;42", "wc_weaknesses": "111;232;94;23", "wc_questions": "76;209;31;52", "wc_review": "313;580;377;190", "wc_reply_reviewers": "15;240;0;0", "wc_reply_authors": "764;1767;874;803", "reply_reviewers": "1;2;0;0", "reply_authors": "2;7;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 15.297058540778355 ], "wc_strengths_avg": [ 83.0, 46.55641738793912 ], "wc_weaknesses_avg": [ 115.0, 75.18310980532794 ], "wc_questions_avg": [ 92.0, 69.40100863820352 ], "wc_review_avg": [ 365.0, 141.1541710329525 ], "wc_reply_reviewers_avg": [ 63.75, 101.9420791430114 ], "wc_reply_authors_avg": [ 1052.0, 414.6848200742342 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 2.165063509461097 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 232, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16578751109969287542&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=AOJyfhWYHf", "pdf": "https://openreview.net/pdf?id=AOJyfhWYHf", "email": ";mails.tsinghua.edu.cn;tsinghua.edu.cn;;;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "AOSsLRKQrX", "title": "DisFormer: Disentangled Object Representations for Learning Visual Dynamics Via Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "We focus on the task of visual dynamics prediction. Recent work has shown that object-centric representations can greatly help improve the accuracy of learning such dynamics in an unsupervised way. Building on top of this work, we ask the question: would it help to learn disentangled object representations, possibly separating the attributes which contribute to the motion dynamics vs which don\u2019t? Though there is some prior work which aims to achieve this, we argue in this paper either it is limiting in their setting, or does not use the learned representation explicitly for predicting visual dynamics, making them sub-optimal. In response, we propose DisFormer, an approach for learning disentangled object representation and use them for predicting visual dynamics. Our architecture extends the notion of slots Locatello et al. (2020) to taking attention over individual objectrepresentations: each slot learns the representation for a block by attending over different parts of an object, and each block is expressed as a linear combination\nover a small set of learned concepts. We perform an iterative refinement over\nthese slots to extract a disentangled representation, which is then fed to a trans-\nformer architecture to predict the next set of latent object representations. Since\nour loss is unsupervised, we need to align the output object masks with those ex-\ntracted from the ground truth image, and we design a novel permutation module\nto achieve this alignment by learning a canonical ordering. We perform a series\nof experiments demonstrating that our learned representations help predict future\ndynamics in the standard setting, where we test on the same environment as train-\ning, and in the setting of transfer, where certain object combinations are never\nseen before. Our method outperforms existing baselines in terms of\npixel prediction and deciphering the dynamics, especially in the zero-shot transfer\nsetting where existing approaches fail miserably. Further analysis reveals that our\nlearned representations indeed help with significantly better disentanglement of\nobjects compared to existing techniques.", "keywords": "Unsupervised Visual dynamics prediction;object centric representation;disentangled representation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Sanket Sanjaykumar Gandhi;Vishal Sharma;Rushil Gupta;Arnab Kumar Mondal;Samanyu Mahajan;Parag Singla", "authorids": "~Sanket_Sanjaykumar_Gandhi1;~Vishal_Sharma1;~Rushil_Gupta1;~Arnab_Kumar_Mondal2;mahajansamanyu@gmail.com;~Parag_Singla1", "gender": "M;M;M;M;;M", "homepage": "https://github.com/sanky29;https://www.cse.iitd.ac.in/~vsharma/;;;;http://www.cse.iitd.ac.in/~parags", "dblp": "382/4942;;;;;14/167", "google_scholar": "https://scholar.google.de/citations?hl=en;HBxIco0AAAAJ;EjrTb2wAAAAJ;MZ8N49AAAAAJ;;https://scholar.google.co.in/citations?user=V49BsgMAAAAJ", "orcid": ";;0009-0006-1402-0426;0000-0001-7297-374X;;", "linkedin": "sanketgandhi29/;;rusgupta/;arnab-mondal-a4448a18/;;", "or_profile": "~Sanket_Sanjaykumar_Gandhi1;~Vishal_Sharma1;~Rushil_Gupta1;~Arnab_Kumar_Mondal2;mahajansamanyu@gmail.com;~Parag_Singla1", "aff": "Indian Institute of Technology, Delhi;Indian Institute of Technology Delhi;Universit\u00e9 de Montr\u00e9al;Fujitsu Research and Development Center Co. Ltm.;;Indian Institute of Technology, Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;umontreal.ca;fujitsu.com;;iitd.ac.in", "position": "PhD student;PhD student;MS student;Researcher;;Full Professor", "bibtex": "@misc{\ngandhi2024disformer,\ntitle={DisFormer: Disentangled Object Representations for Learning Visual Dynamics Via Transformers},\nauthor={Sanket Sanjaykumar Gandhi and Vishal Sharma and Rushil Gupta and Arnab Kumar Mondal and Samanyu Mahajan and Parag Singla},\nyear={2024},\nurl={https://openreview.net/forum?id=AOSsLRKQrX}\n}", "github": "", "project": "", "reviewers": "M2hR;gVGK;987R;HX9L", "site": "https://openreview.net/forum?id=AOSsLRKQrX", "pdf_size": 1414108, "rating": "3;3;3;5", "confidence": "3;4;3;4", "soundness": "2;3;2;3", "contribution": "2;2;2;3", "presentation": "3;2;2;2", "wc_summary": "88;99;251;112", "wc_strengths": "54;47;39;100", "wc_weaknesses": "184;186;701;133", "wc_questions": "9;145;37;717", "wc_review": "335;477;1028;1062", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "58;379;551;295", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;3;3", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 137.5, 66.07760588883347 ], "wc_strengths_avg": [ 60.0, 23.695991222145572 ], "wc_weaknesses_avg": [ 301.0, 231.91485506538817 ], "wc_questions_avg": [ 227.0, 287.42303317584 ], "wc_review_avg": [ 725.5, 323.64370842023175 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 320.75, 177.55896907788127 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8213910590279793768&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Indian Institute of Technology Delhi;Universit\u00e9 de Montr\u00e9al;Fujitsu Research and Development Center", "aff_unique_dep": ";;Research and Development", "aff_unique_url": "https://www.iitdelhi.ac.in;https://www.umontreal.ca;https://www.fujitsu.com/global/", "aff_unique_abbr": "IIT Delhi;UdeM;Fujitsu R&D", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi;", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "India;Canada;Japan" }, { "id": "AOpJ3vPNu8", "title": "A Game Theoretic Approach to Meta-Learning: Nash Model-Agnostic Meta-Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Meta-learning, or learning to learn, aims to develop algorithms that can quickly adapt to new tasks and environments. Model-agnostic meta-learning (MAML), proposed as a bi-level optimization problem, is widely used as a baseline for gradient-based meta-learning algorithms that learn meta-parameters. In MAML, task-specific parameters are adapted independently in the inner-loop. After learning the task-specific parameters, the meta-parameters are learned in the outer-loop by minimizing the average task loss. After MAML, some gradient-based meta-learning research has explored objectives beyond average task losses, such as minimizing worst-case task losses for risk management and improving zero-shot performance in unadaptable environments. However, if the purpose of learning meta-parameters changes, the inner-loop formulation must change accordingly. Therefore, we propose a novel gradient-based meta-learning framework that imposes joint strategy sets and utility functions among tasks, making each task affected by other tasks. To solve this complex problem, we first show the proposed framework can be formulated as a generalized Stackelberg game. After that, we propose the NashMAML algorithm to compute the generalized Stackelberg equilibrium of this model and theoretically prove its convergence. We validate our approach on sinusoidal regression and few-shot image classification tasks. The results demonstrate that our approach outperforms previous methods in handling few-shot learning problems.", "keywords": "Meta learning;Game Theory;Generalized Stackelberg Equilibrium", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/9129609b581da5b697389ab759cd0efb9cc84657.zip", "author": "Jihwan Yu;Jaeyeon Jo;Taeyoung Yun;Jinkyoo Park", "authorids": "~Jihwan_Yu1;~Jaeyeon_Jo1;~Taeyoung_Yun1;~Jinkyoo_Park1", "gender": "M;M;M;M", "homepage": ";;https://dbsxodud-11.github.io;http://silab.kaist.ac.kr/", "dblp": ";273/2679;358/5797.html;156/7535", "google_scholar": ";;_51PhLQAAAAJ;sH2a0nkAAAAJ", "orcid": "0000-0003-0505-3956;0000-0002-6058-1594;0009-0001-4602-6367;0000-0003-2620-1479", "linkedin": ";;;", "or_profile": "~Jihwan_Yu1;~Jaeyeon_Jo1;~Taeyoung_Yun1;~Jinkyoo_Park1", "aff": ";Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": ";kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": ";PhD student;MS student;Associate Professor", "bibtex": "@misc{\nyu2024a,\ntitle={A Game Theoretic Approach to Meta-Learning: Nash Model-Agnostic Meta-Learning},\nauthor={Jihwan Yu and Jaeyeon Jo and Taeyoung Yun and Jinkyoo Park},\nyear={2024},\nurl={https://openreview.net/forum?id=AOpJ3vPNu8}\n}", "github": "", "project": "", "reviewers": "i9rv;azMB;XAaQ;SENj", "site": "https://openreview.net/forum?id=AOpJ3vPNu8", "pdf_size": 750119, "rating": "1;5;5;5", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "contribution": "1;2;2;2", "presentation": "3;3;2;3", "wc_summary": "56;70;53;106", "wc_strengths": "44;34;27;41", "wc_weaknesses": "120;217;266;365", "wc_questions": "4;4;40;4", "wc_review": "224;325;386;516", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.7320508075688772 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.25, 21.063890903629368 ], "wc_strengths_avg": [ 36.5, 6.576473218982953 ], "wc_weaknesses_avg": [ 242.0, 88.33742128905507 ], "wc_questions_avg": [ 13.0, 15.588457268119896 ], "wc_review_avg": [ 362.75, 105.71512427273592 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aE9e3quQ1LsJ:scholar.google.com/&scioq=A+Game+Theoretic+Approach+to+Meta-Learning:+Nash+Model-Agnostic+Meta-Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "AP779Zy70y", "title": "GATE: How to Keep Out Intrusive Neighbors", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Attention Networks (GATs) are designed to provide flexible neighborhood aggregation that assigns weights to neighbors according to their importance. In practice, however, GATs are often unable to switch off task-irrelevant neighborhood aggregation, as we show experimentally and analytically. To address this challenge, we propose GATE, a GAT extension that holds three major advantages: i) It alleviates over-smoothing by addressing its root cause of unnecessary neighborhood aggregation. ii) Similarly to perceptrons, it benefits from higher depth as it can still utilize additional layers for (non-)linear feature transformations in case of (nearly) switched-off neighborhood aggregation. iii) By down-weighting connections to unrelated neighbors, it often outperforms GATs on real-world heterophilic datasets. To further validate our claims, we construct a synthetic test bed to analyze a model's ability to utilize the appropriate amount of neighborhood aggregation, which could be of independent interest.", "keywords": "graph attention networks;GNN architecture;neighborhood aggregation", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/52a22da891dff12b66675a2383ee60f86966d6ae.zip", "author": "Nimrah Mustafa;Rebekka Burkholz", "authorids": "~Nimrah_Mustafa1;~Rebekka_Burkholz1", "gender": "F;F", "homepage": "https://cispa.de/en/people/c01nimu;https://sites.google.com/view/rebekkaburkholz/startseite", "dblp": ";194/3172", "google_scholar": ";https://scholar.google.ch/citations?user=vkWBb2wAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Nimrah_Mustafa1;~Rebekka_Burkholz1", "aff": "CISPA, saarland university, saarland informatics campus;Helmholtz Center CISPA for Information Security", "aff_domain": "cispa.saarland;cispa.saarland", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nmustafa2024gate,\ntitle={{GATE}: How to Keep Out Intrusive Neighbors},\nauthor={Nimrah Mustafa and Rebekka Burkholz},\nyear={2024},\nurl={https://openreview.net/forum?id=AP779Zy70y}\n}", "github": "", "project": "", "reviewers": "8362;nehk;NiY6;J31R;sCet", "site": "https://openreview.net/forum?id=AP779Zy70y", "pdf_size": 2198281, "rating": "5;5;5;6;6", "confidence": "3;4;3;3;4", "soundness": "3;2;2;3;2", "contribution": "3;3;2;3;2", "presentation": "3;4;2;3;3", "wc_summary": "56;63;101;53;36", "wc_strengths": "38;42;22;18;51", "wc_weaknesses": "102;406;217;35;119", "wc_questions": "15;2;6;83;2", "wc_review": "211;513;346;189;208", "wc_reply_reviewers": "27;339;0;12;0", "wc_reply_authors": "589;1011;426;360;458", "reply_reviewers": "1;2;0;1;0", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 61.8, 21.51650529244933 ], "wc_strengths_avg": [ 34.2, 12.4 ], "wc_weaknesses_avg": [ 175.8, 128.9858907012701 ], "wc_questions_avg": [ 21.6, 31.065092950126513 ], "wc_review_avg": [ 293.4, 123.2665404722628 ], "wc_reply_reviewers_avg": [ 75.6, 132.07361583601775 ], "wc_reply_authors_avg": [ 568.8, 233.33015235926968 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.16666666666666666, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14484222735322731887&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1", "aff_unique_norm": "Saarland University;Helmholtz Center CISPA", "aff_unique_dep": "CISPA;Information Security", "aff_unique_url": "https://www.uni-saarland.de;https://www.cispa.de/", "aff_unique_abbr": "Saarland U;CISPA", "aff_campus_unique_index": "0", "aff_campus_unique": "Saarland Informatics Campus;", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "ARFRZh6pzI", "title": "Tuning-Free Accountable Intervention for LLM Deployment - A Metacognitive Approach", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large Language Models (LLMs) have catalyzed transformative advances across a spectrum of natural language processing tasks through few-shot or zero-shot prompting, bypassing the need for parameter tuning. While convenient, this modus operandi aggravates \"hallucination\" concerns, particularly given the enigmatic \"black-box\" nature behind their gigantic model sizes. Such concerns are exacerbated in high-stakes applications (e.g., healthcare), where unaccountable decision errors can lead to devastating consequences. \nIn contrast, human decision-making relies on nuanced cognitive processes, such as the ability to sense and adaptively correct misjudgments through conceptual understanding. Drawing inspiration from human cognition, we propose an innovative *metacognitive* approach, dubbed **CLEAR**, to equip LLMs with capabilities for self-aware error identification and correction. Our framework facilitates the construction of concept-specific sparse subnetworks that illuminate transparent decision pathways. This provides a novel interface for model *intervention* after deployment. Our intervention offers compelling advantages:\n(*i*) at deployment or inference time, our metacognitive LLMs can self-consciously identify potential mispredictions with minimum human involvement, (*ii*) the model has the capability to self-correct its errors efficiently, obviating the need for additional tuning, and (*iii*) the rectification procedure is not only self-explanatory but also user-friendly, enhancing the interpretability and accessibility of the model. By integrating these metacognitive features, our approach pioneers a new path toward engendering greater trustworthiness and accountability in the deployment of LLMs.", "keywords": "Large Language Models;Trustworthiness;Interpretability", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Zhen Tan;Jie Peng;Tianlong Chen;huan liu", "authorids": "~Zhen_Tan2;~Jie_Peng4;~Tianlong_Chen1;~huan_liu1", "gender": "M;M;M;", "homepage": "https://zhen-tan-dmml.github.io/;http://home.ustc.edu.cn/~pengjieb/;https://tianlong-chen.github.io;", "dblp": "13/10345-1.html;;;", "google_scholar": "yMV7JtIAAAAJ;;LE3ctn0AAAAJ;", "orcid": "0009-0006-9548-2330;;0000-0001-7774-8197;", "linkedin": ";;tianlong-chen-783862167/;", "or_profile": "~Zhen_Tan2;~Jie_Peng4;~Tianlong_Chen1;~huan_liu1", "aff": "Amazon;University of Science and Technology of China;Harvard University;", "aff_domain": "amazon.com;ustc.edu.cn;harvard.edu;", "position": "Intern;PhD student;Postdoc;", "bibtex": "@misc{\ntan2024tuningfree,\ntitle={Tuning-Free Accountable Intervention for {LLM} Deployment - A Metacognitive Approach},\nauthor={Zhen Tan and Jie Peng and Tianlong Chen and huan liu},\nyear={2024},\nurl={https://openreview.net/forum?id=ARFRZh6pzI}\n}", "github": "", "project": "", "reviewers": "4fqZ;movx;7QQj;DFsm", "site": "https://openreview.net/forum?id=ARFRZh6pzI", "pdf_size": 1763932, "rating": "5;5;6;8", "confidence": "4;3;2;3", "soundness": "3;3;2;4", "contribution": "3;2;2;3", "presentation": "2;4;3;4", "wc_summary": "135;106;93;91", "wc_strengths": "70;11;52;77", "wc_weaknesses": "52;197;167;80", "wc_questions": "43;7;125;16", "wc_review": "300;321;437;264", "wc_reply_reviewers": "0;16;0;62", "wc_reply_authors": "789;1348;2072;740", "reply_reviewers": "0;1;0;1", "reply_authors": "4;5;5;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 106.25, 17.5695048308141 ], "wc_strengths_avg": [ 52.5, 25.636887486588538 ], "wc_weaknesses_avg": [ 124.0, 59.787122359250574 ], "wc_questions_avg": [ 47.75, 46.5262022950509 ], "wc_review_avg": [ 330.5, 64.77846864506755 ], "wc_reply_reviewers_avg": [ 19.5, 25.391927851189244 ], "wc_reply_authors_avg": [ 1237.25, 537.8798076708216 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.28867513459481287, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14447195481248609480&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Amazon;University of Science and Technology of China;Harvard University", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;http://www.ustc.edu.cn;https://www.harvard.edu", "aff_unique_abbr": "Amazon;USTC;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "ARP0xaE6od", "title": "Adapting Large Language Models for Content Moderation: Pitfalls in Data Engineering and Supervised Fine-tuning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Nowadays, billions of people engage in communication and express their opinions on the internet daily. Unfortunately, not all of these expressions are friendly or compliant, making content moderation an indispensable task. With the successful development of Large Language Models (LLMs) in recent years, LLM-based methods have become a feasible solution for handling tasks in various domains. However, in the field of content moderation, there is still a lack of detailed work that systematically introduces implementation details. In this paper, we introduce how to fine-tune an LLM model that can be privately deployed for content moderation. Specifically, we discuss whether incorporating reasons during the fine-tuning process would be better or if it should be treated as a classification task directly. We also explore the benefits of utilizing reasons generated by more powerful LLMs for fine-tuning privately deployed models and the impact of different processing approaches when the answers generated by the more powerful LLMs are incorrect. We report the entire research process and the key findings in this paper, hoping to provide valuable experience for researchers who are fine-tuning privately deployed models in their domain-specific research.", "keywords": "large language models;content moderation;fine-tuning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Huan Ma;Changqing Zhang;Huazhu Fu;Peilin Zhao;Bingzhe Wu", "authorids": "~Huan_Ma1;~Changqing_Zhang1;~Huazhu_Fu4;~Peilin_Zhao2;~Bingzhe_Wu1", "gender": "M;M;M;;M", "homepage": "https://github.com/MaHuanAAA;http://cic.tju.edu.cn/faculty/zhangchangqing/index.html;https://hzfu.github.io;;", "dblp": ";78/2668;63/7767;84/8411;207/4843", "google_scholar": ";yJGhdykAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=HPeX_YcAAAAJ;_3hgtf8AAAAJ", "orcid": "0009-0000-4448-9897;;0000-0002-9702-5524;0000-0001-8543-3953;", "linkedin": "huan-ma-037711276/;;;;", "or_profile": "~Huan_Ma1;~Changqing_Zhang1;~Huazhu_Fu4;~Peilin_Zhao2;~Bingzhe_Wu1", "aff": "Tencent AI Lab;Tianjin University;Institute of High Performance Computing, Singapore, A*STAR;Tencent;Tencent AI Lab", "aff_domain": "tencent.com;tju.edu.cn;ihpc.a-star.edu.sg;tencent.com;tencent.com", "position": "Intern;Associate Professor;Principal Scientist;Researcher;Researcher", "bibtex": "@misc{\nma2024adapting,\ntitle={Adapting Large Language Models for Content Moderation: Pitfalls in Data Engineering and Supervised Fine-tuning},\nauthor={Huan Ma and Changqing Zhang and Huazhu Fu and Peilin Zhao and Bingzhe Wu},\nyear={2024},\nurl={https://openreview.net/forum?id=ARP0xaE6od}\n}", "github": "", "project": "", "reviewers": "65ce;7bgL;8Sfa", "site": "https://openreview.net/forum?id=ARP0xaE6od", "pdf_size": 733690, "rating": "3;3;6", "confidence": "3;4;3", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "2;2;2", "wc_summary": "107;43;68", "wc_strengths": "51;20;129", "wc_weaknesses": "264;317;127", "wc_questions": "76;3;3", "wc_review": "498;383;327", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 72.66666666666667, 26.335442953471574 ], "wc_strengths_avg": [ 66.66666666666667, 45.85726647859518 ], "wc_weaknesses_avg": [ 236.0, 80.05414834139869 ], "wc_questions_avg": [ 27.333333333333332, 34.41253001774532 ], "wc_review_avg": [ 402.6666666666667, 71.1820826394458 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9620926999652194412&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Tencent;Tianjin University;Institute of High Performance Computing", "aff_unique_dep": "Tencent AI Lab;;", "aff_unique_url": "https://ai.tencent.com;http://www.tju.edu.cn;https://www.ihpc.a-star.edu.sg", "aff_unique_abbr": "Tencent AI Lab;TJU;IHPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "title": "On the hardness of learning under symmetries", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19262", "id": "ARPrtuzAnQ", "author_site": "Bobak Kiani, Thien Le, Hannah Lawrence, Stefanie Jegelka, Melanie Weber", "tldr": "", "abstract": "We study the problem of learning equivariant neural networks via gradient descent. The incorporation of known symmetries (\"equivariance\") into neural nets has empirically improved the performance of learning pipelines, in domains ranging from biology to computer vision. However, a rich yet separate line of learning theoretic research has demonstrated that actually learning shallow, fully-connected (i.e. non-symmetric) networks has exponential complexity in the correlational statistical query (CSQ) model, a framework encompassing gradient descent. In this work, we ask: are known problem symmetries sufficient to alleviate the fundamental hardness of learning neural nets with gradient descent? We answer this question in the negative. In particular, we give lower bounds for shallow graph neural networks, convolutional networks, invariant polynomials, and frame-averaged networks for permutation subgroups, which all scale either superpolynomially or exponentially in the relevant input dimension. Therefore, in spite of the significant inductive bias imparted via symmetry, actually learning the complete classes of functions represented by equivariant neural networks via gradient descent remains hard.", "keywords": "Equivariance;statistical query;lower bound;computational hardness;invariance;symmetry;neural networks", "primary_area": "learning theory", "supplementary_material": "/attachment/31c55803ff649b8bb0d1d92205dad839c65e06f8.pdf", "author": "Bobak Kiani;Thien Le;Hannah Lawrence;Stefanie Jegelka;Melanie Weber", "authorids": "~Bobak_Kiani1;~Thien_Le1;~Hannah_Lawrence1;~Stefanie_Jegelka3;~Melanie_Weber1", "gender": ";M;F;F;", "homepage": ";https://steven-le-thien.github.io;https://hannahlawrence.github.io/;http://people.csail.mit.edu/stefje/;", "dblp": "232/4086;194/5549;251/5474;38/7003;", "google_scholar": ";WhFGh74AAAAJ;;gTWUZlsAAAAJ;", "orcid": ";0000-0001-5476-8451;;;", "linkedin": "bobak-kiani;;hannah-lawrence-417b5a130/;;", "or_profile": "~Bobak_Kiani1;~Thien_Le1;~Hannah_Lawrence1;~Stefanie_Jegelka3;~Melanie_Weber1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;", "position": "PhD student;PhD student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nkiani2024on,\ntitle={On the hardness of learning under symmetries},\nauthor={Bobak Kiani and Thien Le and Hannah Lawrence and Stefanie Jegelka and Melanie Weber},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=ARPrtuzAnQ}\n}", "github": "", "project": "", "reviewers": "Fgas;kFXh;aiE7", "pdf_size": 845675, "rating": "6;8;8", "confidence": "5;3;3", "soundness": "4;4;4", "contribution": "3;4;3", "presentation": "4;4;3", "wc_summary": "401;86;73", "wc_strengths": "111;80;124", "wc_weaknesses": "39;30;106", "wc_questions": "45;62;1", "wc_review": "596;258;304", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "317;511;112", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 4.0, 0.0 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 186.66666666666666, 151.64944957221425 ], "wc_strengths_avg": [ 105.0, 18.457157599876172 ], "wc_weaknesses_avg": [ 58.333333333333336, 33.9050963065371 ], "wc_questions_avg": [ 36.0, 25.703436864876 ], "wc_review_avg": [ 386.0, 149.67520391389706 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 313.3333333333333, 162.91170069157778 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18055256462617413365&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=ARPrtuzAnQ", "pdf": "https://openreview.net/pdf?id=ARPrtuzAnQ", "email": "mit.edu;mit.edu;mit.edu;mit.edu;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ASppt1L3hx", "title": "Cooperative Minibatching in Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Significant computational resources are required to train Graph Neural Networks (GNNs) at a large scale,\nand the process is highly data-intensive.\nOne of the most effective ways to reduce resource requirements is minibatch training \ncoupled with graph sampling.\nGNNs have the unique property that items in a minibatch have overlapping data. \nHowever, the commonly implemented Independent Minibatching approach assigns each Processing \nElement (PE) its own minibatch to process, leading to duplicated computations and input data access across PEs. \nThis amplifies the Neighborhood Explosion Phenomenon (NEP), which is the main bottleneck limiting scaling. \nTo reduce the effects of NEP in the multi-PE setting,\nwe propose a new approach called Cooperative Minibatching. \nOur approach capitalizes on the fact that the size of the sampled subgraph is a concave function of the batch size, leading to \nsignificant reductions in the amount of work per seed vertex as batch sizes increase. Hence, it is favorable for \nprocessors to work on a large minibatch together as a single larger processor, instead of working on separate smaller \nminibatches, even though global batch size is identical.\nWe also show how to take advantage of the same phenomenon in serial execution by generating dependent consecutive minibatches. \nOur experimental evaluations show up to 4x bandwidth savings for fetching vertex embeddings, by simply increasing \nthis dependency without harming model convergence. Combining our proposed approaches, we achieve up to 64\\% \nspeedup over Independent Minibatching on single-node multi-GPU systems and show \nthat load balancing is not an issue despite the use of lock-step communication.", "keywords": "graph learning;graph neural networks;gnn;multigpu training", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/6dfd498f249f8487991ed90b471e5f8be5df2b12.zip", "author": "Muhammed Fatih Balin;Dominique LaSalle;Umit Catalyurek", "authorids": "~Muhammed_Fatih_Balin1;~Dominique_LaSalle1;~Umit_Catalyurek1", "gender": "M;;M", "homepage": "http://mfbal.in;;https://www.cc.gatech.edu/~umit/", "dblp": "234/8533;;https://dblp.uni-trier.de/pid/c/UmitVCatalyurek.html", "google_scholar": "https://scholar.google.com.tr/citations?user=xfzbywYAAAAJ;;OLDMURQAAAAJ", "orcid": "0000-0001-9935-2687;;", "linkedin": "mfbalin/;;catalyurek/", "or_profile": "~Muhammed_Fatih_Balin1;~Dominique_LaSalle1;~Umit_Catalyurek1", "aff": "Georgia Institute of Technology;;Georgia Institute of Technology", "aff_domain": "gatech.edu;;gatech.edu", "position": "PhD student;;Full Professor", "bibtex": "@misc{\nbalin2024cooperative,\ntitle={Cooperative Minibatching in Graph Neural Networks},\nauthor={Muhammed Fatih Balin and Dominique LaSalle and Umit Catalyurek},\nyear={2024},\nurl={https://openreview.net/forum?id=ASppt1L3hx}\n}", "github": "", "project": "", "reviewers": "LvyT;m5Sh;GNH8", "site": "https://openreview.net/forum?id=ASppt1L3hx", "pdf_size": 2004684, "rating": "1;6;6", "confidence": "5;3;3", "soundness": "1;3;2", "contribution": "1;3;2", "presentation": "1;3;3", "wc_summary": "28;109;60", "wc_strengths": "10;48;67", "wc_weaknesses": "212;40;205", "wc_questions": "80;216;71", "wc_review": "330;413;403", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "695;676;873", "reply_reviewers": "0;0;0", "reply_authors": "1;2;2", "rating_avg": [ 4.333333333333333, 2.357022603955158 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 65.66666666666667, 33.30999182761166 ], "wc_strengths_avg": [ 41.666666666666664, 23.697163449568293 ], "wc_weaknesses_avg": [ 152.33333333333334, 79.48305200201835 ], "wc_questions_avg": [ 122.33333333333333, 66.33417084898419 ], "wc_review_avg": [ 382.0, 36.995495221265344 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 748.0, 88.72804892854721 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18112592828352537730&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "GAIA: Zero-shot Talking Avatar Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19261", "id": "ATEawsFUj4", "author_site": "Tianyu He, Junliang Guo, Runyi Yu, Yuchi Wang, jialiang zhu, Kaikai An, Leyi Li, Xu Tan, Chunyu Wang, Han Hu, HsiangTao Wu, sheng zhao, Jiang Bian", "tldr": "", "abstract": "Zero-shot talking avatar generation aims at synthesizing natural talking videos from speech and a single portrait image. Previous methods have relied on domain-specific heuristics such as warping-based motion representation and 3D Morphable Models, which limit the naturalness and diversity of the generated avatars. In this work, we introduce GAIA (Generative AI for Avatar), which eliminates the domain priors in talking avatar generation. In light of the observation that the speech only drives the motion of the avatar while the appearance of the avatar and the background typically remain the same throughout the entire video, we divide our approach into two stages: 1) disentangling each frame into motion and appearance representations; 2) generating motion sequences conditioned on the speech and reference portrait image. We collect a large-scale high-quality talking avatar dataset and train the model on it with different scales (up to 2B parameters). Experimental results verify the superiority, scalability, and flexibility of GAIA as 1) the resulting model beats previous baseline models in terms of naturalness, diversity, lip-sync quality, and visual quality; 2) the framework is scalable since larger models yield better results; 3) it is general and enables different applications like controllable talking avatar generation and text-instructed avatar generation.", "keywords": "Talking Avatar Generation;Video Generation;Disentanglement;Diffusion Models", "primary_area": "generative models", "supplementary_material": "/attachment/5e232f403fc3dd96d63591c6871d42425e9a6f46.zip", "author": "Tianyu He;Junliang Guo;Runyi Yu;Yuchi Wang;jialiang zhu;Kaikai An;Leyi Li;Xu Tan;Chunyu Wang;Han Hu;HsiangTao Wu;sheng zhao;Jiang Bian", "authorids": "~Tianyu_He1;~Junliang_Guo1;~Runyi_Yu1;~Yuchi_Wang1;~jialiang_zhu2;~Kaikai_An1;~Leyi_Li1;~Xu_Tan1;~Chunyu_Wang1;~Han_Hu1;~HsiangTao_Wu1;~sheng_zhao1;~Jiang_Bian1", "gender": "M;M;F;M;M;M;;M;M;M;M;M;M", "homepage": "https://www.microsoft.com/en-us/research/people/tianyuhe/;https://leoguojl.me/;https://ingrid789.github.io/IngridYu/;https://wangyuchi369.github.io/;https://github.com/jialiang-zhu;https://github.com/kkk-an;https://xunmeibuyue.github.io/;https://tan-xu.github.io/;https://www.chunyuwang.org/;https://ancientmooner.github.io/;https://www.microsoft.com/en-us/research/people/musclewu/;https://www.aaai.org/ojs/index.php/AAAI/article/view/4642;https://sites.google.com/view/jiangbian", "dblp": "198/4010;209/9674;94/6433-2;358/5700;;;;96/10484-3;63/7235;;;;09/851-2.html", "google_scholar": "P08KU1YAAAAJ;https://scholar.google.com.sg/citations?user=S88C9ewAAAAJ;https://scholar.google.com.hk/citations?user=jUSqsWkAAAAJ;RxuU_0YAAAAJ;;6TrBRiEAAAAJ;;tob-U1oAAAAJ;https://scholar.google.co.jp/citations?user=VXQV5xwAAAAJ;Jkss014AAAAJ;;689bIIwAAAAJ;pZBEnY8AAAAJ", "orcid": "0000-0002-4828-3228;0000-0001-8360-5483;;;;;;0000-0001-5631-0639;;;;;0000-0002-9472-600X", "linkedin": ";;;;;;;;;;;;jbian/", "or_profile": "~Tianyu_He1;~Junliang_Guo1;~Runyi_Yu1;~Yuchi_Wang1;~jialiang_zhu2;~Kaikai_An1;~Leyi_Li1;~Xu_Tan1;~Chunyu_Wang1;~Han_Hu1;~HsiangTao_Wu1;~sheng_zhao1;~Jiang_Bian1", "aff": "Microsoft Research Asia;Microsoft;Peking University;Peking University;Southeast University;Peking University;Zhejiang University;Microsoft;Microsoft Research Asia;Microsft Research Asia;Microsoft;Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com;pku.edu.cn;pku.edu.cn;seu.edu.cn;pku.edu.cn;zju.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;MS student;MS student;PhD student;MS student;MS student;Principal Researcher;Researcher;Researcher;Principal Researcher;Researcher;Partner Research Manager", "bibtex": "@inproceedings{\nhe2024gaia,\ntitle={{GAIA}: Zero-shot Talking Avatar Generation},\nauthor={Tianyu He and Junliang Guo and Runyi Yu and Yuchi Wang and jialiang zhu and Kaikai An and Leyi Li and Xu Tan and Chunyu Wang and Han Hu and HsiangTao Wu and sheng zhao and Jiang Bian},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=ATEawsFUj4}\n}", "github": "", "project": "", "reviewers": "tNS3;Lkke;ft55;1S6P", "pdf_size": 12849431, "rating": "6;6;6;8", "confidence": "4;3;5;4", "soundness": "3;3;3;3", "contribution": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "60;33;57;109", "wc_strengths": "82;71;41;34", "wc_weaknesses": "49;40;97;190", "wc_questions": "23;44;222;26", "wc_review": "214;188;417;359", "wc_reply_reviewers": "0;25;46;54", "wc_reply_authors": "363;590;1326;1096", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;4;4", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.75, 27.60774347895894 ], "wc_strengths_avg": [ 57.0, 20.03746490951388 ], "wc_weaknesses_avg": [ 94.0, 59.510503274632114 ], "wc_questions_avg": [ 78.75, 83.09444927310128 ], "wc_review_avg": [ 294.5, 96.16262267638086 ], "wc_reply_reviewers_avg": [ 31.25, 20.92098229051399 ], "wc_reply_authors_avg": [ 843.75, 384.6117360403866 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11354634788031823120&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=ATEawsFUj4", "pdf": "https://openreview.net/pdf?id=ATEawsFUj4", "email": "microsoft.com;microsoft.com;pku.edu.cn;pku.edu.cn;seu.edu.cn;pku.edu.cn;zju.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 13, "aff_unique_index": "0;0;1;1;2;1;3;0;0;0;0;0;0", "aff_unique_norm": "Microsoft;Peking University;Southeast University;Zhejiang University", "aff_unique_dep": "Research;;;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/asia;http://www.pku.edu.cn;https://www.seu.edu.cn/;https://www.zju.edu.cn", "aff_unique_abbr": "MSR Asia;Peking U;SEU;ZJU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Asia;", "aff_country_unique_index": "0;1;0;0;0;0;0;1;0;0;1;1;1", "aff_country_unique": "China;United States" }, { "id": "ATFPZbSZia", "title": "Grouplane: End-to-End 3D Lane Detection with Channel-Wise Grouping", "track": "main", "status": "Reject", "tldr": "", "abstract": "Efficiency is quite important for 3D lane detection while previous detectors are either computationally expensive or difficult for optimization. To bridge this gap, we propose a fully convolutional detector named GroupLane, which is simple, fast, and still maintains high detection precision. Specifically, we first propose to split extracted feature into multiple groups along the channel dimension and employ every group to represent a prediction. In this way, GroupLane realizes end-to-end detection like DETR based on pure convolutional neural network. Then, we propose to represent lanes by performing row-wise classification in bird\u2019s eye view and devise a set of corresponding detection heads. Compared with existing row-wise classification implementations that only support recognizing vertical lanes, ours can detect both vertical and horizontal ones. Additionally, a matching algorithm named single-win one-to-one matching is developed to associate prediction with labels during training. Evaluated on 3 benchmarks, OpenLane, Once-3DLanes, and OpenLane-Huawei, GroupLane adopting ConvNext-Base as the backbone outperforms the published state-of-the-art PersFormer by 13.6% F1 score in the OpenLane validation set. Besides, GroupLane with ResNet18 still surpasses PersFormer by 4.9% F1 score, while the inference speed is 7$\\times$ faster.", "keywords": "3D lane detection;end-to-end;row-wise classification;fully convolutional", "primary_area": "applications to robotics, autonomy, planning", "supplementary_material": "/attachment/4f580cb11e17a27feb9056536ca7021a480fb213.zip", "author": "Zhuoling Li;chunrui han;Zheng Ge;Jinrong Yang;En Yu;Haoqian Wang;Hengshuang Zhao;Xiangyu Zhang", "authorids": "~Zhuoling_Li1;~chunrui_han1;~Zheng_Ge1;~Jinrong_Yang1;~En_Yu1;~Haoqian_Wang1;~Hengshuang_Zhao2;~Xiangyu_Zhang1", "gender": "M;F;M;M;M;M;M;M", "homepage": "https://lizhuoling.github.io/;https://github.com/hanchunrui;;https://yancie-yjr.github.io/;https://www.zhihu.com/people/yu-en-47-48;;https://hszhao.github.io;", "dblp": "243/1499;;231/1007;286/5463;213/4929;;185/7848;95/3760-5.html", "google_scholar": "2r6ejykAAAAJ;;hJ-VrrIAAAAJ;8Of_NYQAAAAJ;https://scholar.google.com.hk/citations?user=rWCQMNgAAAAJ;;4uE10I0AAAAJ;yuB-cfoAAAAJ", "orcid": ";;;;;0000-0003-2792-8469;0000-0001-8277-2706;0000-0003-2138-4608", "linkedin": ";;;;;;hengshuang-zhao-347b8391/?originalSubdomain=hk;", "or_profile": "~Zhuoling_Li1;~chunrui_han1;~Zheng_Ge1;~Jinrong_Yang1;~En_Yu1;~Haoqian_Wang1;~Hengshuang_Zhao2;~Xiangyu_Zhang1", "aff": "University of Hong Kong;Megvii Technology Inc.;Megvii Technology Inc.;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Tsinghua University;The University of Hong Kong;MEGVII Technology", "aff_domain": "hku.hk;megvii.com;megvii.com;hust.edu.cn;hust.edu;tsinghua.edu.cn;hku.hk;megvii.com", "position": "PhD student;Researcher;Researcher;PhD student;PhD student;Full Professor;Assistant Professor;Principal Researcher", "bibtex": "@misc{\nli2024grouplane,\ntitle={Grouplane: End-to-End 3D Lane Detection with Channel-Wise Grouping},\nauthor={Zhuoling Li and chunrui han and Zheng Ge and Jinrong Yang and En Yu and Haoqian Wang and Hengshuang Zhao and Xiangyu Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=ATFPZbSZia}\n}", "github": "", "project": "", "reviewers": "5Kts;RkdN;dGoq", "site": "https://openreview.net/forum?id=ATFPZbSZia", "pdf_size": 3193343, "rating": "5;5;6", "confidence": "4;5;3", "soundness": "3;3;2", "contribution": "2;2;3", "presentation": "3;2;3", "wc_summary": "87;63;62", "wc_strengths": "37;11;84", "wc_weaknesses": "37;75;151", "wc_questions": "82;1;35", "wc_review": "243;150;332", "wc_reply_reviewers": "0;12;125", "wc_reply_authors": "648;796;897", "reply_reviewers": "0;1;1", "reply_authors": "2;4;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 70.66666666666667, 11.55662388223981 ], "wc_strengths_avg": [ 44.0, 30.21037349432586 ], "wc_weaknesses_avg": [ 87.66666666666667, 47.39432689913659 ], "wc_questions_avg": [ 39.333333333333336, 33.209770985191824 ], "wc_review_avg": [ 241.66666666666666, 74.3071702835975 ], "wc_reply_reviewers_avg": [ 45.666666666666664, 56.31064631922536 ], "wc_reply_authors_avg": [ 780.3333333333334, 102.25567085605678 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11111051036937699351&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;2;2;3;0;1", "aff_unique_norm": "University of Hong Kong;Megvii Technology;Huazhong University of Science and Technology;Tsinghua University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hku.hk;https://www.megvii.com;http://www.hust.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HKU;Megvii;HUST;THU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "ATQSDgYwqA", "title": "Diffusion Random Feature Model", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion probabilistic models have been successfully used to generate data from noise. However, most diffusion models are computationally expensive and difficult to interpret with a lack of theoretical justification. Random feature models (RFMs) on the other hand have gained popularity due to their interpretability but their application to complex machine learning tasks remains limited. In this work, we present a diffusion model-inspired deep random feature model that is interpretable and gives comparable numerical results to a fully connected neural network having the same number of trainable parameters. Specifically, we extend existing results for random features and derive generalization bounds between the distribution of sampled data and the true distribution using properties of score matching. We validate our findings by generating samples on the fashion MNIST dataset and instrumental audio data.", "keywords": "Diffusion Models;Deep Random Feature Models;Generalization Bounds", "primary_area": "learning theory", "supplementary_material": "/attachment/90d99ba01a4642577c4724dcead598157e441d63.pdf", "author": "Esha Saha;Giang Tran", "authorids": "~Esha_Saha1;~Giang_Tran2", "gender": "F;", "homepage": ";https://uwaterloo.ca/applied-mathematics/people-profiles/giang-tran", "dblp": "254/3655;", "google_scholar": "https://scholar.google.com/citations?hl=en;-zCxA1AAAAAJ", "orcid": "0000-0002-1548-5712;0000-0002-3518-153X", "linkedin": "esha-saha-bb86171b6/?originalSubdomain=ca;", "or_profile": "~Esha_Saha1;~Giang_Tran2", "aff": "University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nsaha2024diffusion,\ntitle={Diffusion Random Feature Model},\nauthor={Esha Saha and Giang Tran},\nyear={2024},\nurl={https://openreview.net/forum?id=ATQSDgYwqA}\n}", "github": "", "project": "", "reviewers": "2YaV;3pnu;vFu7;crxL;58mK;2JVx", "site": "https://openreview.net/forum?id=ATQSDgYwqA", "pdf_size": 966474, "rating": "3;3;3;5;5;6", "confidence": "5;4;4;4;3;3", "soundness": "1;3;1;3;3;3", "contribution": "1;3;1;2;3;3", "presentation": "3;4;3;2;2;3", "wc_summary": "77;54;70;92;100;46", "wc_strengths": "42;44;33;76;79;28", "wc_weaknesses": "172;281;147;415;222;68", "wc_questions": "93;3;20;9;305;2", "wc_review": "384;382;270;592;706;144", "wc_reply_reviewers": "57;26;22;92;0;0", "wc_reply_authors": "539;1217;725;1325;1460;405", "reply_reviewers": "1;1;1;1;0;0", "reply_authors": "2;3;1;4;3;1", "rating_avg": [ 4.166666666666667, 1.2133516482134197 ], "confidence_avg": [ 3.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 2.1666666666666665, 0.8975274678557507 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 73.16666666666667, 19.169564998252365 ], "wc_strengths_avg": [ 50.333333333333336, 19.955506062794353 ], "wc_weaknesses_avg": [ 217.5, 109.88592569872934 ], "wc_questions_avg": [ 72.0, 108.83014288330233 ], "wc_review_avg": [ 413.0, 188.194048790072 ], "wc_reply_reviewers_avg": [ 32.833333333333336, 32.672193410027155 ], "wc_reply_authors_avg": [ 945.1666666666666, 405.882748531587 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.1055415967851334 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.766241095350685, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cZDvD6V_pBkJ:scholar.google.com/&scioq=Diffusion+Random+Feature+Model&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "ATaE46G1eJ", "title": "CosPGD: an efficient white-box adversarial attack for pixel-wise prediction tasks", "track": "main", "status": "Reject", "tldr": "", "abstract": "While neural networks allow highly accurate predictions in many tasks, their lack of robustness towards even slight input perturbations hampers their deployment in many real-world applications.\nWhite-box adversarial attacks such as the seminal projected gradient descent (PGD) offer an effective means to evaluate the model robustness and dedicated solutions have been proposed for example for attacks on semantic segmentation or on optical flow. \nTo streamline the evaluation process, we propose an efficient white-box adversarial attack, termed CosPGD, that can be applied to any pixel-wise prediction task in a unified setting.\nTo this end, CosPGD employs a simple loss scaling based on the cosine similarity between the distributions over the predictions and ground truth (or target, for targeted attacks).\nThis leads to efficient evaluations of a model's robustness for pixelwise classification as well as regression models, providing new insights into their performance at earlier attack stages.\nWe outperform the SotA on semantic segmentation attacks in our experiments on PASCAL VOC2012 and CityScapes.\nFurther, we showcase CosPGD's versatility by evaluating optical flow as well as image restoration models. \nWe provide code for the CosPGD algorithm and example usage at https://anonymous.4open.science/r/cospgd-iclr2024-909/.", "keywords": "adversarial attacks;pgd;fgsm;cospgd;cosine similarity;semantic segmentation;optical flow;benchmarking tool;benchmark adversarial attack;lp norm;l-inf norm;l-2 norm", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Shashank Agnihotri;Steffen Jung;Margret Keuper", "authorids": "~Shashank_Agnihotri1;~Steffen_Jung1;~Margret_Keuper1", "gender": "M;M;F", "homepage": "https://www.uni-mannheim.de/dws/people/researchers/phd-students/shashank/;http://jung.vision;https://www.vc.informatik.uni-siegen.de/en/keuper-margret", "dblp": ";252/0087-1;95/7589", "google_scholar": "vhm_xu8AAAAJ;x5ovaJcAAAAJ;https://scholar.google.de/citations?user=KMqMQAcAAAAJ", "orcid": "0000-0001-6097-8551;0000-0001-8021-791X;0000-0002-8437-7993", "linkedin": "shashank-agnihotri/;jung-vision/;", "or_profile": "~Shashank_Agnihotri1;~Steffen_Jung1;~Margret_Keuper1", "aff": "Universit\u00e4t Siegen;Saarland Informatics Campus, Max-Planck Institute;Max Planck Institute for Informatics", "aff_domain": "uni-siegen.de;mpi-inf.mpg.de;mpi-inf.mpg", "position": "PhD student;PhD student;Researcher", "bibtex": "@misc{\nagnihotri2024cospgd,\ntitle={Cos{PGD}: an efficient white-box adversarial attack for pixel-wise prediction tasks},\nauthor={Shashank Agnihotri and Steffen Jung and Margret Keuper},\nyear={2024},\nurl={https://openreview.net/forum?id=ATaE46G1eJ}\n}", "github": "", "project": "", "reviewers": "3VUb;E2Ss;kcAq;YMdT", "site": "https://openreview.net/forum?id=ATaE46G1eJ", "pdf_size": 11646064, "rating": "5;5;5;8", "confidence": "4;4;4;5", "soundness": "3;2;3;3", "contribution": "2;2;3;3", "presentation": "3;3;2;4", "wc_summary": "61;64;52;123", "wc_strengths": "37;34;65;167", "wc_weaknesses": "91;367;90;176", "wc_questions": "2;4;151;3", "wc_review": "191;469;358;469", "wc_reply_reviewers": "74;0;0;11", "wc_reply_authors": "1037;1328;751;1697", "reply_reviewers": "1;0;0;1", "reply_authors": "4;3;3;4", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.0, 28.062430400804562 ], "wc_strengths_avg": [ 75.75, 54.05263638343647 ], "wc_weaknesses_avg": [ 181.0, 112.91811192187018 ], "wc_questions_avg": [ 40.0, 64.08978077665736 ], "wc_review_avg": [ 371.75, 113.77032785397078 ], "wc_reply_reviewers_avg": [ 21.25, 30.784533454317607 ], "wc_reply_authors_avg": [ 1203.25, 350.54270424585934 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2686630512059451311&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Siegen;Max-Planck Institute;Max Planck Institute for Informatics", "aff_unique_dep": ";Informatics;", "aff_unique_url": "https://www.uni-siegen.de;https://www.mpi-sws.org;https://mpi-inf.mpg.de", "aff_unique_abbr": "Uni Siegen;MPI-SWS;MPII", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saarland", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "ATuX4zRnJo", "title": "NoiseOut: Learning to Gate Improves Robustness in Deep Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deep Neural Networks (DNNs) achieve impressive performance, when trained on datasets of similar distributions. However, they struggle to generalize to novel data, such as image perturbations, when they differ from the training distribution. Using the Integrated Gradients method, we visualize several perturbed features contributing to the higher classification errors. To filter out such distractor features, we take inspiration from the thalamus, which is a biological gating mechanism that improves the signal fidelity of novel stimuli for task completion. Similarly, we propose a novel method called NoiseOut which is a lightweight modular gating mechanism that can be easily integrated with existing DNNs to enhance its robustness to novel image perturbations. When training on the clean datasets, we randomly replaced a subset of the hidden states with normally-sampled values and, augmented the Integrated Gradients analysis method into an additional objective function. With these processes, NoiseOut gradually learned suitable dynamic gating policies to filter out distractor signals and pass task relevant information to the classifier. When evaluating on perturbed datasets, NoiseOut uses the prior learned gating policies to filter out features that negatively influence classification. We demonstrate that our modular NoiseOut mechanism improves existing DNN's robustness to novel perturbations by achieving strong results on the MNIST-C and ImageNet-C benchmarks.", "keywords": "robust classifiers; bionic algorithms", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Leon Guertler;M Ganesh Kumar;Cheston Tan", "authorids": "~Leon_Guertler1;~M_Ganesh_Kumar1;~Cheston_Tan1", "gender": "M;M;M", "homepage": ";https://mgkumar138.github.io/;", "dblp": ";230/0379;136/9366", "google_scholar": ";sFfy1q4AAAAJ;Up0UYEYAAAAJ", "orcid": ";0000-0001-5559-6428;", "linkedin": "leon-gurtler-6b3847165/;m-ganesh-kumar-28682792/;cheston-tan/", "or_profile": "~Leon_Guertler1;~M_Ganesh_Kumar1;~Cheston_Tan1", "aff": "Nanyang Technological University;Harvard University;Singapore University of Technology and Design", "aff_domain": "ntu.edu.sg;harvard.edu;sutd.edu.sg", "position": "Undergrad student;Postdoc;Assistant Professor", "bibtex": "@misc{\nguertler2024noiseout,\ntitle={NoiseOut: Learning to Gate Improves Robustness in Deep Neural Networks},\nauthor={Leon Guertler and M Ganesh Kumar and Cheston Tan},\nyear={2024},\nurl={https://openreview.net/forum?id=ATuX4zRnJo}\n}", "github": "", "project": "", "reviewers": "ZBE7;zW13;G3rD;oCeg", "site": "https://openreview.net/forum?id=ATuX4zRnJo", "pdf_size": 2996681, "rating": "3;3;3;3", "confidence": "3;5;4;3", "soundness": "2;2;2;2", "contribution": "1;2;2;2", "presentation": "3;3;2;3", "wc_summary": "100;98;47;71", "wc_strengths": "38;131;36;48", "wc_weaknesses": "302;262;67;250", "wc_questions": "4;2;39;44", "wc_review": "444;493;189;413", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.0, 21.737065119284157 ], "wc_strengths_avg": [ 63.25, 39.37876966082104 ], "wc_weaknesses_avg": [ 220.25, 90.54936498949068 ], "wc_questions_avg": [ 22.25, 19.343926695477318 ], "wc_review_avg": [ 384.75, 116.55980224760164 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7xX8jFi8Fd8J:scholar.google.com/&scioq=NoiseOut:+Learning+to+Gate+Improves+Robustness+in+Deep+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Nanyang Technological University;Harvard University;Singapore University of Technology and Design", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.harvard.edu;https://www.sutd.edu.sg", "aff_unique_abbr": "NTU;Harvard;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "A differentiable brain simulator bridging brain simulation and brain-inspired computing", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19260", "id": "AU2gS9ut61", "author_site": "Chaoming Wang, Tianqiu Zhang, Sichao He, Hongyaoxing Gu, Shangyang Li, Si Wu", "tldr": "", "abstract": "Brain simulation builds dynamical models to mimic the structure and functions of the brain, while brain-inspired computing (BIC) develops intelligent systems by learning from the structure and functions of the brain. The two fields are intertwined and should share a common programming framework to facilitate each other's development. However, none of the existing software in the fields can achieve this goal, because traditional brain simulators lack differentiability for training, while existing deep learning (DL) frameworks fail to capture the biophysical realism and complexity of brain dynamics. In this paper, we introduce BrainPy, a differentiable brain simulator developed using JAX and XLA, with the aim of bridging the gap between brain simulation and BIC. BrainPy expands upon the functionalities of JAX, a powerful AI framework, by introducing complete capabilities for flexible, efficient, and scalable brain simulation. It offers a range of sparse and event-driven operators for efficient and scalable brain simulation, an abstraction for managing the intricacies of synaptic computations, a modular and flexible interface for constructing multi-scale brain models, and an object-oriented just-in-time compilation approach to handle the memory-intensive nature of brain dynamics. We showcase the efficiency and scalability of BrainPy on benchmark tasks, and highlight its differentiable simulation for biologically plausible spiking models.", "keywords": "brain simulator;brain simulation;computational neuroscience;brain-inspired computing", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "/attachment/943a8eb1c80560e7df5cc2d12a0bb25667d6e630.zip", "author": "Chaoming Wang;Tianqiu Zhang;Sichao He;Hongyaoxing Gu;Shangyang Li;Si Wu", "authorids": "~Chaoming_Wang1;~Tianqiu_Zhang1;~Sichao_He1;~Hongyaoxing_Gu1;~Shangyang_Li1;~Si_Wu1", "gender": "M;M;M;M;M;M", "homepage": "https://brainpy.tech/;https://ztqakita.github.io/;https://routhleck.com;;https://mgv.pku.edu.cn/english/people/lbd/soeeace/267528.htm;https://gitee.com/guhongyaoxing", "dblp": ";356/8708;;274/8227;25/437-1;360/4816.html", "google_scholar": ";https://scholar.google.co.jp/citations?user=q-7tMTwAAAAJ;;g77hKJcAAAAJ;;", "orcid": ";0009-0000-6418-1961;;;;", "linkedin": ";;;;;", "or_profile": "~Chaoming_Wang1;~Tianqiu_Zhang1;~Sichao_He1;~Shangyang_Li1;~Si_Wu1;~Gu_Hongyaoxing1", "aff": ";Peking University;Beijing Jiaotong University;Peking University;Peking University;University of Chinese Academy of Sciences", "aff_domain": ";pku.edu.cn;bjtu.edu.cn;pku.edu.cn;pku.edu.cn;ucas.edu.cn", "position": ";PhD student;Undergrad student;PhD student;Full Professor;MS student", "bibtex": "@inproceedings{\nwang2024a,\ntitle={A differentiable brain simulator bridging brain simulation and brain-inspired computing},\nauthor={Chaoming Wang and Tianqiu Zhang and Sichao He and Hongyaoxing Gu and Shangyang Li and Si Wu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AU2gS9ut61}\n}", "github": "", "project": "", "reviewers": "zrYr;g6hc;kvGz;wQtY;afG7", "pdf_size": 13405406, "rating": "6;6;6;8;10", "confidence": "3;3;4;4;4", "soundness": "2;2;3;3;4", "contribution": "3;2;2;3;4", "presentation": "3;3;3;3;4", "wc_summary": "66;64;34;40;49", "wc_strengths": "56;40;18;33;27", "wc_weaknesses": "117;68;108;182;36", "wc_questions": "127;40;4;2;90", "wc_review": "366;212;164;257;202", "wc_reply_reviewers": "0;0;143;0;4", "wc_reply_authors": "682;680;614;681;299", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 7.2, 1.6 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "contribution_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 50.6, 12.705904139414873 ], "wc_strengths_avg": [ 34.8, 12.82809416865966 ], "wc_weaknesses_avg": [ 102.2, 49.34126062434968 ], "wc_questions_avg": [ 52.6, 49.02081190678098 ], "wc_review_avg": [ 240.2, 69.52524721279313 ], "wc_reply_reviewers_avg": [ 29.4, 56.82112283297471 ], "wc_reply_authors_avg": [ 591.2, 148.38787012421196 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6123724356957946, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17450486356494803188&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=AU2gS9ut61", "pdf": "https://openreview.net/pdf?id=AU2gS9ut61", "email": ";pku.edu.cn;bjtu.edu.cn;pku.edu.cn;pku.edu.cn;ucas.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Peking University;Beijing Jiao Tong University;University of Chinese Academy of Sciences", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;http://www.njtu.edu.cn/en;http://www.ucas.ac.cn", "aff_unique_abbr": "Peking U;BJTU;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "AVBw2Ul4X9", "title": "Towards Precise Prediction Uncertainty in GNNs: Refining GNNs with Topology-grouping Strategy", "track": "main", "status": "Reject", "tldr": "", "abstract": "The calibration of model predictions has recently gained increasing attention in the domain of graph neural networks (GNNs), with a particular emphasis on the underconfidence exhibited by these networks. Among the critical factors identified to be associated with GNN calibration, the concept of neighborhood prediction similarity has been recognized as a pivotal component. Building upon this insight, modern GNN calibration techniques adapt GNNs by smoothing the confidence of individual nodes with those of adjacent nodes. However, these approaches often engage in superficial learning across varying affinity levels, thereby failing to effectively accommodate diverse local topologies. Through an in-depth analysis, we unveil that calibrated logits from preceding research significantly contradict their foundational assumption of nearby affinity, necessitating a re-evaluation of the existing GNN-founded calibration strategies. To address this, we introduce Simi-Mailbox, which categorizes nodes based on both neighborhood representational similarity and their own confidence, irrespective of proximity or connectivity. Our method effectively mitigates miscalibration for nodes exhibiting analogous similarity levels by adjusting their predictions with group-specific temperatures. This encourages a more sophisticated calibration, where each group-wise temperature is tailored to address affiliated nodes with similar topology. Extensive experiments demonstrate the effectiveness of Simi-Mailbox across diverse datasets on different GNN architectures.", "keywords": "Graph Neural Networks;Post-hoc Calibration", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/532ff41721a330d7a6ff13286c6cc462d3ac895f.zip", "author": "Hyunjin Seo;Kyusung Seo;Joonhyung Park;Eunho Yang", "authorids": "~Hyunjin_Seo2;~Kyusung_Seo1;~Joonhyung_Park1;~Eunho_Yang1", "gender": "F;M;M;M", "homepage": "https://github.com/hyunjin72;;;https://sites.google.com/site/hleehome2/", "dblp": ";;306/1374;96/2621", "google_scholar": "MFDOhRUAAAAJ;;https://scholar.google.com/citations?hl=ko;", "orcid": ";;;", "linkedin": "hyunjin-seo-97525629a/?originalSubdomain=kr;kyusung-seo-513137172/;joonhyung-park-495527145/;", "or_profile": "~Hyunjin_Seo2;~Kyusung_Seo1;~Joonhyung_Park1;~Eunho_Yang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;MS student;PhD student;Associate Professor", "bibtex": "@misc{\nseo2024towards,\ntitle={Towards Precise Prediction Uncertainty in {GNN}s: Refining {GNN}s with Topology-grouping Strategy},\nauthor={Hyunjin Seo and Kyusung Seo and Joonhyung Park and Eunho Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=AVBw2Ul4X9}\n}", "github": "", "project": "", "reviewers": "75Vm;jzwr;3Tgz", "site": "https://openreview.net/forum?id=AVBw2Ul4X9", "pdf_size": 16214180, "rating": "6;6;6", "confidence": "5;3;2", "soundness": "3;3;3", "contribution": "2;2;2", "presentation": "3;3;3", "wc_summary": "74;88;56", "wc_strengths": "67;38;54", "wc_weaknesses": "244;200;60", "wc_questions": "8;31;73", "wc_review": "393;357;243", "wc_reply_reviewers": "142;82;57", "wc_reply_authors": "1427;2097;1193", "reply_reviewers": "1;2;1", "reply_authors": "4;8;5", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.66666666666667, 13.097921802925667 ], "wc_strengths_avg": [ 53.0, 11.86029791643813 ], "wc_weaknesses_avg": [ 168.0, 78.45168364456347 ], "wc_questions_avg": [ 37.333333333333336, 26.911377189252544 ], "wc_review_avg": [ 331.0, 63.93746945258312 ], "wc_reply_reviewers_avg": [ 93.66666666666667, 35.6682242650545 ], "wc_reply_authors_avg": [ 1572.3333333333333, 383.09731864835015 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.666666666666667, 1.699673171197595 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YQssGAScMasJ:scholar.google.com/&scioq=Towards+Precise+Prediction+Uncertainty+in+GNNs:+Refining+GNNs+with+Topology-grouping+Strategy&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "M3C: A Framework towards Convergent, Flexible, and Unsupervised Learning of Mixture Graph Matching and Clustering", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19259", "id": "AXC9KydyZq", "author_site": "Jiaxin Lu, Zetian Jiang, Tianzhe Wang, Junchi Yan", "tldr": "", "abstract": "Existing graph matching methods typically assume that there are similar structures between graphs and they are matchable. This work addresses a more realistic scenario where graphs exhibit diverse modes, requiring graph grouping before or along with matching, a task termed mixture graph matching and clustering. Specifically, we introduce Minorize-Maximization Matching and Clustering (M3C), a learning-free algorithm that guarantees theoretical convergence through the Minorize-Maximization framework and offers enhanced flexibility via relaxed clustering. Building on M3C, we further develop UM3C, an unsupervised model that incorporates novel edge-wise affinity learning and pseudo label selection. Extensive experimental results on public benchmarks demonstrate that our method outperforms state-of-the-art graph matching and mixture graph matching and clustering approaches in both accuracy and efficiency.", "keywords": "Graph Matching; Joint Optimization; Unsupervised Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Jiaxin Lu;Zetian Jiang;Tianzhe Wang;Junchi Yan", "authorids": "~Jiaxin_Lu1;~Zetian_Jiang1;~Tianzhe_Wang1;~Junchi_Yan2", "gender": "F;M;M;M", "homepage": "https://jiaxin-lu.github.io/;http://thinklab.sjtu.edu.cn/member.html;https://sites.google.com/view/tianzhe-wang/home;http://thinklab.sjtu.edu.cn/", "dblp": ";;243/6770;60/7949.html", "google_scholar": "VWTpWhEAAAAJ;;;ga230VoAAAAJ", "orcid": "0009-0004-4485-9615;;;0000-0001-9639-7679", "linkedin": "jiaxin-lu-9a422127a/;;;", "or_profile": "~Jiaxin_Lu1;~Zetian_Jiang1;~Tianzhe_Wang1;~Junchi_Yan1", "aff": "University of Texas at Austin;Shanghai Jiaotong University;Georgia Institute of Technology;Shanghai Jiaotong University", "aff_domain": "utexas.edu;sjtu.edu.cn;gatech.edu;sjtu.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlu2024mc,\ntitle={M3C: A Framework towards Convergent, Flexible, and Unsupervised Learning of Mixture Graph Matching and Clustering},\nauthor={Jiaxin Lu and Zetian Jiang and Tianzhe Wang and Junchi Yan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AXC9KydyZq}\n}", "github": "", "project": "", "reviewers": "euAu;uZV9;ra71;sBU3", "pdf_size": 11516658, "rating": "6;6;8;8", "confidence": "4;3;5;3", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "1;2;4;3", "wc_summary": "47;145;48;91", "wc_strengths": "28;72;54;49", "wc_weaknesses": "49;265;192;31", "wc_questions": "310;183;127;70", "wc_review": "434;665;421;241", "wc_reply_reviewers": "47;48;70;0", "wc_reply_authors": "1291;930;1123;685", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 82.75, 40.08974307725107 ], "wc_strengths_avg": [ 50.75, 15.674421839417235 ], "wc_weaknesses_avg": [ 134.25, 97.926949814645 ], "wc_questions_avg": [ 172.5, 88.87209910877542 ], "wc_review_avg": [ 440.25, 150.51806370000912 ], "wc_reply_reviewers_avg": [ 41.25, 25.52817071393875 ], "wc_reply_authors_avg": [ 1007.25, 225.67939095096833 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6076462582348668008&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=AXC9KydyZq", "pdf": "https://openreview.net/pdf?id=AXC9KydyZq", "email": "utexas.edu;sjtu.edu.cn;gatech.edu;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Texas at Austin;Shanghai Jiao Tong University;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.sjtu.edu.cn;https://www.gatech.edu", "aff_unique_abbr": "UT Austin;SJTU;Georgia Tech", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Protein-ligand binding representation learning from fine-grained interactions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19258", "id": "AXbN2qMNiW", "author_site": "Shikun Feng, Minghao Li, Yinjun JIA, Wei-Ying Ma, Yanyan Lan", "tldr": "", "abstract": "The binding between proteins and ligands plays a crucial role in the realm of drug discovery. Previous deep learning approaches have shown promising results over traditional computationally intensive methods, but resulting in poor generalization due to limited supervised data. In this paper, we propose to learn protein-ligand binding representation in a self-supervised learning manner. Different from existing pre-training approaches which treat proteins and ligands individually, we emphasize to discern the intricate binding patterns from fine-grained interactions. Specifically, this self-supervised learning problem is formulated as a prediction of the conclusive binding complex structure given a pocket and ligand with a Transformer based interaction module, which naturally emulates the binding process. To ensure the representation of rich binding information, we introduce two pre-training tasks, i.e. atomic pairwise distance map prediction and mask ligand reconstruction, which comprehensively model the fine-grained interactions from both structure and feature space. Extensive experiments have demonstrated the superiority of our method across various binding tasks, including protein-ligand affinity prediction, virtual screening and protein-ligand docking.", "keywords": "Protein-ligand binding;representation learning;self-supervised", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Shikun Feng;Minghao Li;Yinjun Jia;Wei-Ying Ma;Yanyan Lan", "authorids": "~Shikun_Feng3;~Minghao_Li8;~Yinjun_Jia1;~Wei-Ying_Ma2;~Yanyan_Lan2", "gender": "M;M;M;;M", "homepage": "https://fengshikun.github.io;https://github.com/limh1317;https://air.tsinghua.edu.cn/en/info/1046/1189.htm;;https://github.com/EBGU", "dblp": ";;m/WYMa.html;00/6040.html;", "google_scholar": ";;SToCbu8AAAAJ;;", "orcid": ";;;;", "linkedin": ";;wei-ying-ma-16a0171/;;", "or_profile": "~Shikun_Feng3;~Minghao_Li8;~Wei-Ying_Ma2;~Yanyan_Lan2;~Yinjun_Harold_Jia1", "aff": "Tsinghua University;University of Chinese Academy of Sciences;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;ucas.ac.cn;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "PhD student;MS student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nfeng2024proteinligand,\ntitle={Protein-ligand binding representation learning from fine-grained interactions},\nauthor={Shikun Feng and Minghao Li and Yinjun Jia and Wei-Ying Ma and Yanyan Lan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AXbN2qMNiW}\n}", "github": "", "project": "", "reviewers": "kw5e;7LCi;PEGf", "pdf_size": 3069624, "rating": "5;6;6", "confidence": "4;5;3", "soundness": "2;3;2", "contribution": "2;3;2", "presentation": "2;4;1", "wc_summary": "110;190;41", "wc_strengths": "24;96;21", "wc_weaknesses": "136;187;120", "wc_questions": "80;86;39", "wc_review": "350;559;221", "wc_reply_reviewers": "109;92;14", "wc_reply_authors": "2186;1882;2277", "reply_reviewers": "1;1;1", "reply_authors": "8;7;7", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_summary_avg": [ 113.66666666666667, 60.88422528774501 ], "wc_strengths_avg": [ 47.0, 34.66987164671943 ], "wc_weaknesses_avg": [ 147.66666666666666, 28.56960311628816 ], "wc_questions_avg": [ 68.33333333333333, 20.885933597094056 ], "wc_review_avg": [ 376.6666666666667, 139.27032067968474 ], "wc_reply_reviewers_avg": [ 71.66666666666667, 41.362892656206824 ], "wc_reply_authors_avg": [ 2115.0, 168.89247072225177 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 7.333333333333333, 0.4714045207910317 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10146790950636761999&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=AXbN2qMNiW", "pdf": "https://openreview.net/pdf?id=AXbN2qMNiW", "email": "tsinghua.edu.cn;ucas.ac.cn;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Tsinghua University;University of Chinese Academy of Sciences", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "THU;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Unleashing the Power of Pre-trained Language Models for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19257", "id": "AY6aM13gGF", "author_site": "Ruizhe Shi, Yuyao Liu, Yanjie Ze, Simon Du, Huazhe Xu", "tldr": "", "abstract": "Offline reinforcement learning (RL) aims to find a near-optimal policy using pre-collected datasets. Given recent advances in Large Language Models (LLMs) and their few-shot learning prowess, this paper introduces $\\textbf{La}$nguage Models for $\\textbf{Mo}$tion Control ($\\textbf{LaMo}$), a general framework based on Decision Transformers to effectively use pre-trained Language Models (LMs) for offline RL. Our framework highlights four crucial components: (1) Initializing Decision Transformers with sequentially pre-trained LMs, (2) employing the LoRA fine-tuning method, in contrast to full-weight fine-tuning, to combine the pre-trained knowledge from LMs and in-domain knowledge effectively, (3) using the non-linear MLP transformation instead of linear projections, to generate embeddings, and (4) integrating an auxiliary language prediction loss during fine-tuning to stabilize the LMs and retain their original abilities on languages. Empirical results indicate $\\textbf{LaMo}$ achieves state-of-the-art performance in sparse-reward tasks and closes the gap between value-based offline RL methods and decision transformers in dense-reward tasks. In particular, our method demonstrates superior performance in scenarios with limited data samples.", "keywords": "Offline Reinforcement Learning;Decision Transformer;Motion Control", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/a32db095adf9241993ade6a962055977b99c0bcb.pdf", "author": "Ruizhe Shi;Yuyao Liu;Yanjie Ze;Simon Shaolei Du;Huazhe Xu", "authorids": "~Ruizhe_Shi1;~Yuyao_Liu1;~Yanjie_Ze1;~Simon_Shaolei_Du1;~Huazhe_Xu1", "gender": "M;M;M;M;M", "homepage": "http://srzer.github.io;;http://yanjieze.com;http://simonshaoleidu.com;http://hxu.rocks", "dblp": "304/0634.html;;312/5407;176/5602;164/9006", "google_scholar": "0tlXSPkAAAAJ;https://scholar.google.com/citations?hl=en;BO_b2O8AAAAJ;OttawxUAAAAJ;t9HPFawAAAAJ", "orcid": ";;;;", "linkedin": ";;yanjie-ze-a71a0a247/;;", "or_profile": "~Ruizhe_Shi1;~Yuyao_Liu1;~Yanjie_Ze1;~Simon_Shaolei_Du1;~Huazhe_Xu1", "aff": "University of Washington;Massachusetts Institute of Technology;Stanford University;University of Washington;Tsinghua University", "aff_domain": "uw.edu;mit.edu;stanford.edu;washington.edu;tsinghua.edu.cn", "position": "Intern;Visiting Student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nshi2024unleashing,\ntitle={Unleashing the Power of Pre-trained Language Models for Offline Reinforcement Learning},\nauthor={Ruizhe Shi and Yuyao Liu and Yanjie Ze and Simon Shaolei Du and Huazhe Xu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AY6aM13gGF}\n}", "github": "", "project": "", "reviewers": "mYMk;ei7j;1Dwk;GHKm", "pdf_size": 1915431, "rating": "5;6;6;8", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "contribution": "2;2;2;3", "presentation": "4;3;3;4", "wc_summary": "96;101;88;107", "wc_strengths": "142;113;58;205", "wc_weaknesses": "128;43;212;560", "wc_questions": "16;104;42;199", "wc_review": "382;361;400;1071", "wc_reply_reviewers": "0;17;62;420", "wc_reply_authors": "1065;520;862;2283", "reply_reviewers": "0;1;1;2", "reply_authors": "3;1;2;6", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 98.0, 6.96419413859206 ], "wc_strengths_avg": [ 129.5, 53.011791141216875 ], "wc_weaknesses_avg": [ 235.75, 196.51001882855743 ], "wc_questions_avg": [ 90.25, 70.45699042678449 ], "wc_review_avg": [ 553.5, 299.0973921651608 ], "wc_reply_reviewers_avg": [ 124.75, 171.96129651755945 ], "wc_reply_authors_avg": [ 1182.5, 664.5549262476353 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.8708286933869707 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8269125061704282950&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=AY6aM13gGF", "pdf": "https://openreview.net/pdf?id=AY6aM13gGF", "email": "uw.edu;mit.edu;stanford.edu;washington.edu;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of Washington;Massachusetts Institute of Technology;Stanford University;Tsinghua University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.washington.edu;https://web.mit.edu;https://www.stanford.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UW;MIT;Stanford;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Adaptive Regret for Bandits Made Possible: Two Queries Suffice", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19256", "id": "AY9KyTGcnk", "author_site": "Zhou Lu, Qiuyi (Richard) Zhang, Xinyi Chen, Fred Zhang, David Woodruff, Elad Hazan", "tldr": "", "abstract": "Fast changing states or volatile environments pose a significant challenge to online optimization, which needs to perform rapid adaptation under limited observation. In this paper, we give query and regret optimal bandit algorithms under the strict notion of strongly adaptive regret, which measures the maximum regret over any contiguous interval $I$. Due to its worst-case nature, there is an almost-linear $\\Omega(|I|^{1-\\epsilon})$ regret lower bound, when only one query per round is allowed [Daniely el al, ICML 2015]. Surprisingly, with just two queries per round, we give Strongly Adaptive Bandit Learner (StABL) that achieves $\\widetilde{O}(\\sqrt{n|I|})$ adaptive regret for multi-armed bandits with $n$ arms. The bound is tight and cannot be improved in general. Our algorithm leverages a multiplicative update scheme of varying stepsizes and a carefully chosen observation distribution to control the variance. Furthermore, we extend our results and provide optimal algorithms in the bandit convex optimization setting. Finally, we empirically demonstrate the superior performance of our algorithms under volatile environments and for downstream tasks, such as algorithm selection for hyperparameter optimization.", "keywords": "adaptive regret;multi arm bandit", "primary_area": "learning theory", "supplementary_material": "/attachment/3056c93ece1d7c6cad8ccff9309bc32f2935e068.pdf", "author": "Zhou Lu;Qiuyi Zhang;Xinyi Chen;Fred Zhang;David Woodruff;Elad Hazan", "authorids": "~Zhou_Lu1;~Qiuyi_Zhang1;~Xinyi_Chen1;~Fred_Zhang1;~David_Woodruff1;~Elad_Hazan1", "gender": ";M;F;M;M;M", "homepage": "https://leozoroaster.github.io/;https://qiuyiz.github.io;;http://fredzhang.me/;http://www.cs.cmu.edu/~dwoodruf/;https://www.ehazan.com", "dblp": "68/11524;133/8559;84/6214;232/9071;w/DPWoodruff;72/739", "google_scholar": "17_nX_kAAAAJ;mE11hO8AAAAJ;;guJ_kBQAAAAJ;https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;LnhCGNMAAAAJ", "orcid": ";;;;;", "linkedin": ";;;fred-zhang-0/;;", "or_profile": "~Zhou_Lu1;~Qiuyi_Zhang1;~Xinyi_Chen1;~Fred_Zhang1;~David_Woodruff1;~Elad_Hazan1", "aff": ";Google;Google DeepMind;University of California, Berkeley;Carnegie Mellon University;Princeton University", "aff_domain": ";google.com;google.com;berkeley.edu;cmu.edu;princeton.edu", "position": ";Researcher;Researcher;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlu2024adaptive,\ntitle={Adaptive Regret for Bandits Made Possible: Two Queries Suffice},\nauthor={Zhou Lu and Qiuyi Zhang and Xinyi Chen and Fred Zhang and David Woodruff and Elad Hazan},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AY9KyTGcnk}\n}", "github": "", "project": "", "reviewers": "gNKg;EXm4;kZqs;wKMs", "pdf_size": 723821, "rating": "5;8;8;8", "confidence": "5;4;3;3", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "3;2;4;3", "wc_summary": "91;143;73;357", "wc_strengths": "90;173;24;180", "wc_weaknesses": "172;339;229;103", "wc_questions": "63;1;2;139", "wc_review": "416;656;328;779", "wc_reply_reviewers": "11;0;4;0", "wc_reply_authors": "406;38;376;233", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;2", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 166.0, 113.22985472038724 ], "wc_strengths_avg": [ 116.75, 64.19258134706845 ], "wc_weaknesses_avg": [ 210.75, 86.447599735331 ], "wc_questions_avg": [ 51.25, 56.543677807514435 ], "wc_review_avg": [ 544.75, 180.83607908821736 ], "wc_reply_reviewers_avg": [ 3.75, 4.493050188902857 ], "wc_reply_authors_avg": [ 263.25, 145.55304016062323 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784892, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qF043ycUjHEJ:scholar.google.com/&scioq=Adaptive+Regret+for+Bandits+Made+Possible:+Two+Queries+Suffice&hl=en&as_sdt=0,33", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=AY9KyTGcnk", "pdf": "https://openreview.net/pdf?id=AY9KyTGcnk", "email": ";google.com;google.com;berkeley.edu;cmu.edu;princeton.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Google;University of California, Berkeley;Carnegie Mellon University;Princeton University", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;https://www.berkeley.edu;https://www.cmu.edu;https://www.princeton.edu", "aff_unique_abbr": "Google;UC Berkeley;CMU;Princeton", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Berkeley", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Towards Cross Domain Generalization of Hamiltonian Representation via Meta Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19255", "id": "AZGIwqCyYY", "author_site": "Yeongwoo Song, Hawoong Jeong", "tldr": "", "abstract": "Recent advances in deep learning for physics have focused on discovering shared representations of target systems by incorporating physics priors or inductive biases into neural networks. While effective, these methods are limited to the system domain, where the type of system remains consistent and thus cannot ensure the adaptation to new, or unseen physical systems governed by different laws. For instance, a neural network trained on a mass-spring system cannot guarantee accurate predictions for the behavior of a two-body system or any other system with different physical laws.\nIn this work, we take a significant leap forward by targeting cross domain generalization within the field of Hamiltonian dynamics. \nWe model our system with a graph neural network (GNN) and employ a meta learning algorithm to enable the model to gain experience over a distribution of systems and make it adapt to new physics. Our approach aims to learn a unified Hamiltonian representation that is generalizable across multiple system domains, thereby overcoming the limitations of system-specific models. \nWe demonstrate that the meta-trained model captures the generalized Hamiltonian representation that is consistent across different physical domains.\nOverall, through the use of meta learning, we offer a framework that achieves cross domain generalization, providing a step towards a unified model for understanding a wide array of dynamical systems via deep learning.", "keywords": "hamiltonian dynamics;cross domain generalization;learning physics;meta learning", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/13c5178c183aa265ef6bcfdad34ad2fea13afaa0.zip", "author": "Yeongwoo Song;Hawoong Jeong", "authorids": "~Yeongwoo_Song1;~Hawoong_Jeong1", "gender": "M;M", "homepage": "https://ywssng.github.io/;", "dblp": "335/1745;07/3681", "google_scholar": "https://scholar.google.com/citations?hl=ko;", "orcid": ";", "linkedin": ";", "or_profile": "~Yeongwoo_Song1;~Hawoong_Jeong1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nsong2024towards,\ntitle={Towards Cross Domain Generalization of Hamiltonian Representation via Meta Learning},\nauthor={Yeongwoo Song and Hawoong Jeong},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AZGIwqCyYY}\n}", "github": "", "project": "", "reviewers": "GtBu;xLig;jozM;hDu2", "pdf_size": 6274027, "rating": "5;6;6;6", "confidence": "3;2;3;4", "soundness": "2;3;2;3", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "68;71;71;70", "wc_strengths": "118;61;34;70", "wc_weaknesses": "246;31;174;140", "wc_questions": "85;33;35;5", "wc_review": "517;196;314;285", "wc_reply_reviewers": "41;15;30;349", "wc_reply_authors": "725;368;414;1218", "reply_reviewers": "1;1;1;2", "reply_authors": "3;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.0, 1.224744871391589 ], "wc_strengths_avg": [ 70.75, 30.326349928733592 ], "wc_weaknesses_avg": [ 147.75, 77.51249899209805 ], "wc_questions_avg": [ 39.5, 28.822734082664677 ], "wc_review_avg": [ 328.0, 117.4627600561131 ], "wc_reply_reviewers_avg": [ 108.75, 139.01506213356882 ], "wc_reply_authors_avg": [ 681.25, 338.9552883493633 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11513760535408203006&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=AZGIwqCyYY", "pdf": "https://openreview.net/pdf?id=AZGIwqCyYY", "email": "kaist.edu;kaist.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "AZVmYg3LvS", "title": "Improved Function Space Variational Inference with Informative Priors", "track": "main", "status": "Reject", "tldr": "", "abstract": "Function space variational inference allows Bayesian neural network (BNN) to introduce the prior distribution on the function space directly. Moreover, Recent linear approximation scheme for KL divergence between two random functions, has presented the tractable training objective and thus facilitates imposing the function space prior on BNNs. On the other hand, despite of its tractability, the existing inference suffers from the interpretability issue because the this function space prior is obtained by mapping the pre-defined weight-space prior to the function output via the complex neural network, and thus seems to be less interpretable. Alternatively, thought the uniform function space prior, that imposes a zero mean prior on the function space to encourage the model to be uncertain for out-of-training set, has been considered, this prior can introduce unnecessary uncertainty into the function outputs of the training datasets. Thus, this can cause the trade-off between the uncertainty estimation performances on the in-training and out-of-training sets.\n\n\nIn this work, we aim at refining the function space variational inference to handle the mentioned issue. To this end, we first reconsider the role of the function space prior in view of Bayesian Model prediction, and then build the function space prior to help improve the uncertainty estimation of the BNNs. Additionally, we propose a refined variational distribution on function space to encourage the useful predictive functions in sense of Bayesian model averaging, to be sampled, and thus improving the prediction of the BNNs.", "keywords": "Bayesian Neural Network;Function space variational inference", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Yohan Jung;Juho Lee", "authorids": "~Yohan_Jung1;~Juho_Lee2", "gender": "M;M", "homepage": "https://e2ee22.github.io/;https://juho.lee.github.io", "dblp": "256/1530;55/3410-1", "google_scholar": "https://scholar.google.co.kr/citations?user=DwAJS14AAAAJ;Py4URJUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yohan_Jung1;~Juho_Lee2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "Postdoc;Associate Professor", "bibtex": "@misc{\njung2024improved,\ntitle={Improved Function Space Variational Inference with Informative Priors},\nauthor={Yohan Jung and Juho Lee},\nyear={2024},\nurl={https://openreview.net/forum?id=AZVmYg3LvS}\n}", "github": "", "project": "", "reviewers": "hyeW;3j4p;QnS6;1KC1", "site": "https://openreview.net/forum?id=AZVmYg3LvS", "pdf_size": 1307371, "rating": "3;3;6;6", "confidence": "4;3;3;3", "soundness": "1;1;3;3", "contribution": "1;2;3;2", "presentation": "1;2;3;2", "wc_summary": "29;146;67;105", "wc_strengths": "32;73;23;117", "wc_weaknesses": "419;279;116;290", "wc_questions": "12;98;4;492", "wc_review": "492;596;210;1004", "wc_reply_reviewers": "0;38;100;269", "wc_reply_authors": "702;843;421;639", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 1.0 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.75, 43.49928160326329 ], "wc_strengths_avg": [ 61.25, 37.298625980054545 ], "wc_weaknesses_avg": [ 276.0, 107.53371564304844 ], "wc_questions_avg": [ 151.5, 200.01187464748187 ], "wc_review_avg": [ 575.5, 284.8661966608183 ], "wc_reply_reviewers_avg": [ 101.75, 102.94749875543359 ], "wc_reply_authors_avg": [ 651.25, 152.07625554306628 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LrZndIaURf0J:scholar.google.com/&scioq=Improved+Function+Space+Variational+Inference+with+Informative+Priors&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Enhancing Instance-Level Image Classification with Set-Level Labels", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19254", "id": "AZW3qlCGTe", "author_site": "Renyu Zhang, Aly Khan, Yuxin Chen, Robert Grossman", "tldr": "", "abstract": "Instance-level image classification tasks have traditionally relied on single-instance labels to train models, e.g., few-shot learning and transfer learning. However, set-level coarse-grained labels that capture relationships among instances can provide richer information in real-world scenarios. In this paper, we present a novel approach to enhance instance-level image classification by leveraging set-level labels. We provide a theoretical analysis of the proposed method, including recognition conditions for fast excess risk rate, shedding light on the theoretical foundations of our approach. We conducted experiments on two distinct categories of datasets: natural image datasets and histopathology image datasets. Our experimental results demonstrate the effectiveness of our approach, showcasing improved classification performance compared to traditional single-instance label-based methods. Notably, our algorithm achieves 13\\% improvement in classification accuracy compared to the strongest baseline on the histopathology image classification benchmarks. Importantly, our experimental findings align with the theoretical analysis, reinforcing the robustness and reliability of our proposed method. This work bridges the gap between instance-level and set-level image classification, offering a promising avenue for advancing the capabilities of image classification models with set-level coarse-grained labels.", "keywords": "set-level labels;fast excess risk rate;representation learning;few-shot learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/6da5ddfffa12a56a47e07353ad6df009d1936ab5.pdf", "author": "Renyu Zhang;Aly A Khan;Yuxin Chen;Robert L. Grossman", "authorids": "~Renyu_Zhang2;~Aly_A_Khan1;~Yuxin_Chen1;~Robert_L._Grossman2", "gender": "M;;;M", "homepage": "https://zhangrenyuuchicago.github.io/;http://ttic.uchicago.edu/~aakhan/;http://yuxinchen.org/;https://rgrossman.com", "dblp": "152/4749;46/2390;11/5123-1;g/RobertLGrossman.html", "google_scholar": "-4Hr3l0AAAAJ;xIVBCnEAAAAJ;-k1N7HAAAAAJ;3KG8arsAAAAJ", "orcid": ";0000-0003-3933-8538;;0000-0003-3741-5739", "linkedin": ";;;robertgrossman/", "or_profile": "~Renyu_Zhang2;~Aly_A_Khan1;~Yuxin_Chen1;~Robert_L._Grossman2", "aff": "Department of Computer Science, University of Chicago;Toyota Technological Institute at Chicago;University of Chicago;University of Chicago", "aff_domain": "cs.uchicago.edu;ttic.edu;uchicago.edu;uchicago.edu", "position": "PhD student;Courtesy Faculty;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024enhancing,\ntitle={Enhancing Instance-Level Image Classification with Set-Level Labels},\nauthor={Renyu Zhang and Aly A Khan and Yuxin Chen and Robert L. Grossman},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AZW3qlCGTe}\n}", "github": "", "project": "", "reviewers": "jPxq;xN2u;4JEd", "pdf_size": 41441022, "rating": "5;6;6", "confidence": "4;3;3", "soundness": "3;3;3", "contribution": "2;3;3", "presentation": "3;2;2", "wc_summary": "87;70;76", "wc_strengths": "138;21;24", "wc_weaknesses": "223;92;49", "wc_questions": "69;35;154", "wc_review": "517;218;303", "wc_reply_reviewers": "0;77;0", "wc_reply_authors": "2347;1010;1979", "reply_reviewers": "0;1;0", "reply_authors": "5;3;4", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 77.66666666666667, 7.039570693980959 ], "wc_strengths_avg": [ 61.0, 54.46099521676041 ], "wc_weaknesses_avg": [ 121.33333333333333, 74.00150148626865 ], "wc_questions_avg": [ 86.0, 50.04664490919113 ], "wc_review_avg": [ 346.0, 125.79613136605857 ], "wc_reply_reviewers_avg": [ 25.666666666666668, 36.29814810090944 ], "wc_reply_authors_avg": [ 1778.6666666666667, 563.910355365894 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JmCxTlZUXiUJ:scholar.google.com/&scioq=Enhancing+Instance-Level+Image+Classification+with+Set-Level+Labels&hl=en&as_sdt=0,33", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=AZW3qlCGTe", "pdf": "https://openreview.net/pdf?id=AZW3qlCGTe", "email": "cs.uchicago.edu;ttic.edu;uchicago.edu;uchicago.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Chicago;Toyota Technological Institute at Chicago", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.uchicago.edu;https://www.tti-chicago.org", "aff_unique_abbr": "UChicago;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Aarj9MrG8Y", "title": "Towards the Universal Learning Principle for Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph neural networks (GNNs) are currently highly regarded in graph representation learning tasks due to their significant performance. Although various propagation mechanisms and graph filters were proposed, few works have considered the convergence and stability of graph filters under infinite-depth scenarios. To address this problem, we elucidate the criterion for the graph filter formed by power series and further establish a scalable regularized learning principle, which can guide us on how to design infinite deep GNN. Following the framework, we develop Adaptive Power GNN (APGNN), a deep GNN that employs exponentially decaying weights to aggregate graph information of different orders so as to mine the deeper neighbor information. Different from existing GNNs, APGNN can be seamlessly extended to an infinite-depth network. Moreover, we analyze the generalization of the proposed learning framework via uniform convergence and present its upper bound in theory. Experimental results show that APGNN obtains superior performance against the state-of-the-art GNNs.", "keywords": "Graph Neural Network;Graph Filter;Learning Principle", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/024f072f7b6145a4bf4484f38b3e1a1ec9dac4aa.zip", "author": "Foping Chen;Junhong Zhang;Guangfei Liang;Richard Yi Da Xu;Zhihui Lai", "authorids": "~Foping_Chen1;~Junhong_Zhang1;~Guangfei_Liang1;~Richard_Yi_Da_Xu1;~Zhihui_Lai1", "gender": "M;M;M;M;M", "homepage": ";;;https://www.math.hkbu.edu.hk/people/xu-yi-da/;https://www.scholat.com/laizhihui.cn", "dblp": "https://dblp.org/;;368/5290;38/3064;61/7577-1", "google_scholar": ";ribcEAIAAAAJ;0Tz_-WQAAAAJ;ykOUWa4AAAAJ;CkK6ULsAAAAJ", "orcid": ";;0000-0002-5515-7414;0000-0003-2080-4762;0000-0002-4388-3080", "linkedin": ";;;richard-xu-0221a943/;", "or_profile": "~Foping_Chen1;~Junhong_Zhang1;~Guangfei_Liang1;~Richard_Yi_Da_Xu1;~Zhihui_Lai1", "aff": "Shenzhen University;Shenzhen University;Shenzhen University;Hong Kong Baptist University;Shenzhen University", "aff_domain": "szu.edu.cn;szu.edu.cn;szu.edu.cn;hkbu.edu.hk;szu.edu.cn", "position": "MS student;PhD student;MS student;Full Professor;Full Professor", "bibtex": "@misc{\nchen2024towards,\ntitle={Towards the Universal Learning Principle for Graph Neural Networks},\nauthor={Foping Chen and Junhong Zhang and Guangfei Liang and Richard Yi Da Xu and Zhihui Lai},\nyear={2024},\nurl={https://openreview.net/forum?id=Aarj9MrG8Y}\n}", "github": "", "project": "", "reviewers": "W4db;2jyF;X3Vf", "site": "https://openreview.net/forum?id=Aarj9MrG8Y", "pdf_size": 678064, "rating": "3;3;3", "confidence": "3;3;4", "soundness": "2;3;3", "contribution": "1;2;2", "presentation": "1;3;3", "wc_summary": "75;94;32", "wc_strengths": "44;51;53", "wc_weaknesses": "177;175;362", "wc_questions": "102;110;5", "wc_review": "398;430;452", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 67.0, 25.93581821856921 ], "wc_strengths_avg": [ 49.333333333333336, 3.8586123009300755 ], "wc_weaknesses_avg": [ 238.0, 87.68504243408147 ], "wc_questions_avg": [ 72.33333333333333, 47.72374205314956 ], "wc_review_avg": [ 426.6666666666667, 22.17105219775452 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VevL8YYxxNoJ:scholar.google.com/&scioq=Towards+the+Universal+Learning+Principle+for+Graph+Neural+Networks&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Shenzhen University;Hong Kong Baptist University", "aff_unique_dep": ";", "aff_unique_url": "https://www.szu.edu.cn;https://www.hkbu.edu.hk", "aff_unique_abbr": "SZU;HKBU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Implicit regularization of deep residual networks towards neural ODEs", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19253", "id": "AbXGwqb5Ht", "author_site": "Pierre Marion, Yu-Han Wu, Michael Sander, G\u00e9rard Biau", "tldr": "", "abstract": "Residual neural networks are state-of-the-art deep learning models. Their continuous-depth analog, neural ordinary differential equations (ODEs), are also widely used. Despite their success, the link between the discrete and continuous models still lacks a solid mathematical foundation. In this article, we take a step in this direction by establishing an implicit regularization of deep residual networks towards neural ODEs, for nonlinear networks trained with gradient flow. We prove that if the network is initialized as a discretization of a neural ODE, then such a discretization holds throughout training. Our results are valid for a finite training time, and also as the training time tends to infinity provided that the network satisfies a Polyak-\u0141ojasiewicz condition. Importantly, this condition holds for a family of residual networks where the residuals are two-layer perceptrons with an overparameterization in width that is only linear, and implies the convergence of gradient flow to a global minimum. Numerical experiments illustrate our results.", "keywords": "deep learning theory;residual networks;neural ODEs;optimization;implicit regularization;gradient flow", "primary_area": "learning theory", "supplementary_material": "/attachment/3420e969258ff49b5456cd02f8a157685dcbeb41.zip", "author": "Pierre Marion;Yu-Han Wu;Michael Eli Sander;G\u00e9rard Biau", "authorids": "~Pierre_Marion1;~Yu-Han_Wu1;~Michael_Eli_Sander1;~G\u00e9rard_Biau1", "gender": "M;M;M;M", "homepage": "https://pierremarion23.github.io/;https://github.com/pojoowu;https://michaelsdr.github.io/;https://perso.lpsm.paris/~biau", "dblp": "250/2318;;285/5131;", "google_scholar": "https://scholar.google.fr/citations?user=Q8H5LgIAAAAJ;;COqAqcMAAAAJ;WiW_7VkAAAAJ", "orcid": ";;;", "linkedin": "pierre-marion-816474130/;yu-han-wu-716a9715b/;;", "or_profile": "~Pierre_Marion1;~Yu-Han_Wu1;~Michael_Eli_Sander1;~G\u00e9rard_Biau1", "aff": "EPFL - EPF Lausanne;Ecole Normale Superieure Paris;Ecole Normale Sup\u00e9rieure de Paris;Sorbonne University", "aff_domain": "epfl.ch;ens.psl.eu;ens.fr;sorbonne-universite.fr", "position": "Postdoc;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmarion2024implicit,\ntitle={Implicit regularization of deep residual networks towards neural {ODE}s},\nauthor={Pierre Marion and Yu-Han Wu and Michael Eli Sander and G{\\'e}rard Biau},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AbXGwqb5Ht}\n}", "github": "", "project": "", "reviewers": "Hmrt;NuSB;Htsq;6832", "pdf_size": 2284080, "rating": "6;6;8;8", "confidence": "2;2;4;4", "soundness": "3;3;4;4", "contribution": "3;3;4;3", "presentation": "3;3;4;4", "wc_summary": "67;67;28;155", "wc_strengths": "26;90;158;142", "wc_weaknesses": "23;94;27;54", "wc_questions": "1;17;212;43", "wc_review": "117;268;425;394", "wc_reply_reviewers": "0;0;33;0", "wc_reply_authors": "39;602;562;409", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 79.25, 46.54231945230061 ], "wc_strengths_avg": [ 104.0, 51.57518783291051 ], "wc_weaknesses_avg": [ 49.5, 28.324018076537094 ], "wc_questions_avg": [ 68.25, 84.3367505895265 ], "wc_review_avg": [ 301.0, 121.41869707750945 ], "wc_reply_reviewers_avg": [ 8.25, 14.289419162443238 ], "wc_reply_authors_avg": [ 403.0, 222.15647638545224 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2815881658153338920&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=AbXGwqb5Ht", "pdf": "https://openreview.net/pdf?id=AbXGwqb5Ht", "email": "epfl.ch;ens.psl.eu;ens.fr;sorbonne-universite.fr", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "EPFL;Ecole Normale Superieure;Ecole Normale Sup\u00e9rieure de Paris;Sorbonne University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.epfl.ch;https://www.ens.fr;https://www.ens.fr;https://www.sorbonne.universite.fr", "aff_unique_abbr": "EPFL;ENS;ENS Paris;Sorbonne", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Lausanne;Paris;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Switzerland;France" }, { "title": "Forward Learning of Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19252", "id": "Abr7dU98ME", "author_site": "Namyong Park, Xing Wang, Antoine Simoulin, Shuai Yang, Grey Yang, Ryan Rossi, Puja Trivedi, Nesreen Ahmed", "tldr": "", "abstract": "Graph neural networks (GNNs) have achieved remarkable success across a wide range of applications, such as recommendation, drug discovery, and question answering. Behind the success of GNNs lies the backpropagation (BP) algorithm, which is the de facto standard for training deep neural networks (NNs). However, despite its effectiveness, BP imposes several constraints, which are not only biologically implausible, but also limit the scalability, parallelism, and flexibility in learning NNs. Examples of such constraints include storage of neural activities computed in the forward pass for use in the subsequent backward pass, and the dependence of parameter updates on non-local signals. To address these limitations, the forward-forward algorithm (FF) was recently proposed as an alternative to BP in the image classification domain, which trains NNs by performing two forward passes over positive and negative data. Inspired by this advance, we propose ForwardGNN in this work, a new forward learning procedure for GNNs, which avoids the constraints imposed by BP via an effective layer-wise local forward training. ForwardGNN extends the original FF to deal with graph data and GNNs, and makes it possible to operate without generating negative inputs (hence no longer forward-forward). Further, ForwardGNN enables each layer to learn from both the bottom-up and top-down signals without relying on the backpropagation of errors. Extensive experiments on real-world datasets show the effectiveness and generality of the proposed forward graph learning framework. We release our code at https://github.com/facebookresearch/forwardgnn.", "keywords": "graph neural networks;forward learning;forward-forward algorithm", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Namyong Park;Xing Wang;Antoine Simoulin;Shuai Yang;Grey Yang;Ryan A. Rossi;Puja Trivedi;Nesreen K. Ahmed", "authorids": "~Namyong_Park1;~Xing_Wang8;~Antoine_Simoulin1;~Shuai_Yang9;~Grey_Yang1;~Ryan_A._Rossi2;~Puja_Trivedi1;~Nesreen_K._Ahmed2", "gender": ";M;M;;M;F;F;M", "homepage": "https://namyongpark.github.io/;https://scholar.google.com/citations?user=_nDiQQ0AAAAJ&hl=en;http://www.llf.cnrs.fr/fr/Gens/Simoulin;;;https://pujacomputes.github.io/;http://nesreenahmed.com;http://ryanrossi.com", "dblp": "116/9404;;211/7662;72/7503;;274/2080;33/11518;17/5085", "google_scholar": "YBTXGb8AAAAJ;;https://scholar.google.fr/citations?hl=en;ef7yzckAAAAJ;B11lRXUAAAAJ;1y9cR50AAAAJ;AFV0nLcAAAAJ;_Dc6lbQAAAAJ", "orcid": ";;0000-0001-8433-7919;;;0000-0003-1874-8992;;0000-0001-9758-0635", "linkedin": ";;antoine-simoulin;;;;nkahmed/;", "or_profile": "~Namyong_Park1;~Xing_Wang8;~Antoine_Simoulin1;~Shuai_Yang9;~Grey_Yang1;~Puja_Trivedi1;~Nesreen_Ahmed1;~Ryan_Rossi1", "aff": "Meta AI;;Meta AI;Meta;Meta Platforms, Inc;University of Michigan;Intel AI Research;Adobe Research", "aff_domain": "meta.com;;meta.com;meta.com;meta.com;umich.edu;intel.com;adobe.com", "position": "Researcher;;Researcher;Researcher;Researcher;PhD student;Principal Researcher;Senior Research Scientist", "bibtex": "@inproceedings{\npark2024forward,\ntitle={Forward Learning of Graph Neural Networks},\nauthor={Namyong Park and Xing Wang and Antoine Simoulin and Shuai Yang and Grey Yang and Ryan A. Rossi and Puja Trivedi and Nesreen K. Ahmed},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=Abr7dU98ME}\n}", "github": "", "project": "", "reviewers": "TBYv;BuBQ;ztUM;yhLp", "pdf_size": 792820, "rating": "6;6;6;8", "confidence": "3;4;2;4", "soundness": "4;3;3;3", "contribution": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "118;91;120;74", "wc_strengths": "61;58;45;37", "wc_weaknesses": "85;426;79;12", "wc_questions": "280;190;15;168", "wc_review": "544;765;259;291", "wc_reply_reviewers": "0;0;0;9", "wc_reply_authors": "1685;1346;730;868", "reply_reviewers": "0;0;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 100.75, 19.22725929507375 ], "wc_strengths_avg": [ 50.25, 9.730750228014282 ], "wc_weaknesses_avg": [ 150.5, 161.62069793191714 ], "wc_questions_avg": [ 163.25, 95.32411814436051 ], "wc_review_avg": [ 464.75, 205.51931174466307 ], "wc_reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 1157.25, 380.9050898846063 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16316806855315001275&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=Abr7dU98ME", "pdf": "https://openreview.net/pdf?id=Abr7dU98ME", "email": "meta.com;;meta.com;meta.com;meta.com;umich.edu;intel.com;adobe.com", "author_num": 8, "aff_unique_index": "0;0;0;0;1;2;3", "aff_unique_norm": "Meta;University of Michigan;Intel;Adobe", "aff_unique_dep": "Meta AI;;Intel AI Research;Adobe Research", "aff_unique_url": "https://meta.com;https://www.umich.edu;https://www.intel.com/research;https://research.adobe.com", "aff_unique_abbr": "Meta;UM;Intel AI;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Abt6oSKkb4", "title": "Model2Scene: Learning 3D Scene Representation via Contrastive Language-CAD Models Pre-training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Current successful methods of 3D scene perception rely on the large-scale annotated point cloud, which is tedious and expensive to acquire. In this paper, we propose Model2Scene, a novel paradigm that learns free 3D scene representation from Computer-Aided Design (CAD) models and languages. The main challenges are the domain gaps between the CAD models and the real scene's objects, including model-to-scene (from a single model to the scene) and synthetic-to-real (from synthetic model to real scene's object). To handle the above challenges, Model2Scene first simulates a crowded scene by mixing data-augmented CAD models. Next, we propose a novel feature regularization operation, termed Deep Convex-hull Regularization (DCR), to project point features into a unified convex hull space, reducing the domain gap. Ultimately, we impose contrastive loss on language embedding and the point features of CAD models to pre-train the 3D network. Extensive experiments verify the learned 3D scene representation is beneficial for various downstream tasks, including label-free 3D object salient detection, label-efficient 3D scene perception and zero-shot 3D semantic segmentation. Notably, Model2Scene yields impressive label-free 3D object salient detection with an average mAP of 46.08\\% and 55.49\\% on the ScanNet and S3DIS datasets, respectively. The code will be publicly available.", "keywords": "3D representation learning;segmentation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/c4871e35d8266ca3a10d5e3e5e3f9ca02bb243c0.zip", "author": "Runnan Chen;Xinge ZHU;Nenglun Chen;Dawei Wang;Wei Li;Yuexin Ma;Ruigang Yang;Tongliang Liu;Wenping Wang", "authorids": "~Runnan_Chen1;~Xinge_ZHU2;~Nenglun_Chen1;~Dawei_Wang3;~Wei_Li28;~Yuexin_Ma2;~Ruigang_Yang1;~Tongliang_Liu1;~Wenping_Wang1", "gender": "M;M;M;M;F;M;M;M;M", "homepage": "https://scholar.google.com.hk/citations?hl=en&user=Uq2DuzkAAAAJ&view_op=list_works&sortby=pubdate;https://scholar.google.com/citations?user=UhjTC7AAAAAJ;https://dawei.site;;http://yuexinma.me/aboutme.html;https://www.engr.uky.edu/directory/yang-ruigang;https://tongliang-liu.github.io/;https://engineering.tamu.edu/cse/profiles/Wang-Wenping.html;https://xingezhu.me/aboutme.html", "dblp": "232/1849;230/7699.html;39/2537-6;64/6025-111;209/5925;08/5690;150/6667;;204/3002", "google_scholar": "https://scholar.google.com.hk/citations?hl=en;UhjTC7AAAAAJ;DsmzUgsAAAAJ;i8jP6q8AAAAJ;;https://scholar.google.com.tw/citations?user=yveq40QAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;28shvv0AAAAJ;https://scholar.google.com.hk/citations?user=yHAcRooAAAAJ", "orcid": ";;0000-0003-2440-220X;0000-0002-0059-3745;;;;0000-0002-2284-3952;", "linkedin": ";;;;;;;;", "or_profile": "~Runnan_Chen1;~Nenglun_Chen1;~Dawei_Wang3;~Wei_Li28;~Yuexin_Ma2;~Ruigang_Yang1;~Tongliang_Liu1;~Wenping_Wang1;~Xinge_Zhu3", "aff": "the University of Hong Kong, University of Hong Kong;Nanjing University of Information Science and Technology;University of Hong Kong;Inceptio;ShanghaiTech University;Inceptio ;Mohamed bin Zayed University of Artificial Intelligence;Texas A&M University - College Station;The Chinese University of Hong Kong", "aff_domain": "cs.hku.hk;nuist.edu.cn;hku.hk;inceptio.ai;shanghaitech.edu.cn;inceptio.ai;mbzuai.ac.ae;tamu.edu;cuhk.edu.hk", "position": "Postdoc;Lecturer;Postdoc;Researcher;Assistant Professor;Instructor;Affiliated Associate Professor;Full Professor;PhD student", "bibtex": "@misc{\nchen2024modelscene,\ntitle={Model2Scene: Learning 3D Scene Representation via Contrastive Language-{CAD} Models Pre-training},\nauthor={Runnan Chen and Xinge ZHU and Nenglun Chen and Dawei Wang and Wei Li and Yuexin Ma and Ruigang Yang and Tongliang Liu and Wenping Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=Abt6oSKkb4}\n}", "github": "", "project": "", "reviewers": "xgfx;gV7n;yhuq;U8AX", "site": "https://openreview.net/forum?id=Abt6oSKkb4", "pdf_size": 4517451, "rating": "3;3;5;6", "confidence": "4;3;3;3", "soundness": "2;2;2;3", "contribution": "1;2;2;3", "presentation": "1;2;2;2", "wc_summary": "357;47;77;57", "wc_strengths": "22;25;23;53", "wc_weaknesses": "445;111;95;169", "wc_questions": "233;78;2;53", "wc_review": "1057;261;197;332", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 134.5, 128.91373084353737 ], "wc_strengths_avg": [ 30.75, 12.891373084353738 ], "wc_weaknesses_avg": [ 205.0, 141.27278577277366 ], "wc_questions_avg": [ 91.5, 86.16408764676848 ], "wc_review_avg": [ 461.75, 346.96928898679204 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16656736670335244446&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;2;3;2;4;5;6", "aff_unique_norm": "University of Hong Kong;Nanjing University of Information Science and Technology;Inceptio;ShanghaiTech University;Mohamed bin Zayed University of Artificial Intelligence;Texas A&M University;Chinese University of Hong Kong", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.hku.hk;http://www.nuist.edu.cn;;https://www.shanghaitech.edu.cn;https://mbzuai.ac.ae;https://www.tamu.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "HKU;;;ShanghaiTech;MBZUAI;TAMU;CUHK", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Hong Kong SAR;;College Station", "aff_country_unique_index": "0;0;0;0;2;3;0", "aff_country_unique": "China;;United Arab Emirates;United States" }, { "id": "Ac7f7xL4bU", "title": "Universal Clustering Bounds", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper seamlessly integrates several fundamental learning tasks under the umbrella of subspace clustering, namely orthogonal nonnegative matrix factorization, and K-means clustering. Within this framework, we unveil a unified, closed-form solution that elegantly addresses these tasks. Our main theoretical contribution establishes that our deterministic solution achieves perfect accuracy when the data exhibits sufficiently well-defined clusters. Furthermore, the immediate relaxation of our solution yields practical algorithms that not only rival but also surpass the current state-of-the-art in these complex problem domains. This achievement is corroborated by a comprehensive array of experiments conducted on synthetic datasets, as well as on a diverse set of five real-world datasets.", "keywords": "nonnegative matrix factorization;orthogonal;subspace;k-means;clustering", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/44e8c82e13d461285eac62500ab46c6906917dc6.zip", "author": "Daniel L. Pimentel-Alarc\u00f3n", "authorids": "~Daniel_L._Pimentel-Alarc\u00f3n1", "gender": "", "homepage": "https://danielpimentel.github.io/", "dblp": "150/6256", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Daniel_L._Pimentel-Alarc\u00f3n1", "aff": "University of Wisconsin, Madison", "aff_domain": "wisc.edu", "position": "Assistant Professor", "bibtex": "@misc{\npimentel-alarc{\\'o}n2024universal,\ntitle={Universal Clustering Bounds},\nauthor={Daniel L. Pimentel-Alarc{\\'o}n},\nyear={2024},\nurl={https://openreview.net/forum?id=Ac7f7xL4bU}\n}", "github": "", "project": "", "reviewers": "B34v;Eiut;Wvoi;qhZS", "site": "https://openreview.net/forum?id=Ac7f7xL4bU", "pdf_size": 1217525, "rating": "3;3;3;5", "confidence": "3;4;4;4", "soundness": "2;3;2;3", "contribution": "2;1;1;2", "presentation": "2;1;3;2", "wc_summary": "98;99;84;116", "wc_strengths": "19;6;113;70", "wc_weaknesses": "231;172;736;150", "wc_questions": "47;10;41;186", "wc_review": "395;287;974;522", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.25, 11.344051304538427 ], "wc_strengths_avg": [ 52.0, 42.573465914816005 ], "wc_weaknesses_avg": [ 322.25, 240.70768060034976 ], "wc_questions_avg": [ 71.0, 67.8638342565464 ], "wc_review_avg": [ 544.5, 261.54970846858157 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NVg3jXFFhTQJ:scholar.google.com/&scioq=Universal+Clustering+Bounds&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Wisconsin", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "AcGUW5655J", "title": "Constraining Non-Negative Matrix Factorization to Improve Signature Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Collaborative filtering approaches are fundamental for learning meaningful low-dimensional representations when only association data is available. Among these methods, Non-negative Matrix Factorization (NMF) has gained prominence due to its capability to yield interpretable and meaningful low-dimensional representations. However, one significant challenge for NMF is the vast number of solutions for the same problem instance, making the selection of high-quality signatures a complex task. In response to this challenge, our work introduces a novel approach, Self-Matrix Factorization (SMF), which leverages NMF by incorporating constraints that preserve the relationships inherent in the original data. This is achieved by drawing inspiration from a distinct family of matrix decomposition methods, known as Self-Expressive Models (SEM).\nIn our experimental analyses, conducted on two diverse benchmark datasets, our findings present a compelling narrative. SMF consistently delivers competitive or even superior performance when compared to NMF in predictive tasks. However, what truly sets SMF apart, as validated by our empirical results, is its remarkable ability to consistently generate significantly more meaningful object representations.", "keywords": "Representation Learning;Colaborative Filtering (CF);Recommender Systems;Link Prediction", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/de9467f65ce858889ef8f7d3c372978f31a907e9.zip", "author": "Aldo Galeano;Suzana Santos;Ruben Jimenez;Alberto Paccanaro", "authorids": "~Aldo_Galeano1;suzana.santos@fgv.br;ruben.franco@fgv.br;~Alberto_Paccanaro1", "gender": "M;;;M", "homepage": ";;;https://paccanarolab.org/", "dblp": ";;;94/3076", "google_scholar": ";;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-8059-1346", "linkedin": "aldogaleanoalfonso18;;;", "or_profile": "~Aldo_Galeano1;suzana.santos@fgv.br;ruben.franco@fgv.br;~Alberto_Paccanaro1", "aff": "Funda\u00e7\u00e3o Getulio Vargas;;;Royal Holloway, University of London", "aff_domain": "fgv.br;;;rhul.ac.uk", "position": "PhD student;;;Full Professor", "bibtex": "@misc{\ngaleano2024constraining,\ntitle={Constraining Non-Negative Matrix Factorization to Improve Signature Learning},\nauthor={Aldo Galeano and Suzana Santos and Ruben Jimenez and Alberto Paccanaro},\nyear={2024},\nurl={https://openreview.net/forum?id=AcGUW5655J}\n}", "github": "", "project": "", "reviewers": "G6Gq;oagP;NTRu", "site": "https://openreview.net/forum?id=AcGUW5655J", "pdf_size": 856512, "rating": "3;3;6", "confidence": "5;4;3", "soundness": "2;2;2", "contribution": "2;1;2", "presentation": "2;3;2", "wc_summary": "38;30;105", "wc_strengths": "5;14;55", "wc_weaknesses": "97;134;30", "wc_questions": "4;9;61", "wc_review": "144;187;251", "wc_reply_reviewers": "0;0;12", "wc_reply_authors": "485;825;750", "reply_reviewers": "0;0;1", "reply_authors": "1;2;2", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 57.666666666666664, 33.62869145371091 ], "wc_strengths_avg": [ 24.666666666666668, 21.761331658599286 ], "wc_weaknesses_avg": [ 87.0, 43.04261454264444 ], "wc_questions_avg": [ 24.666666666666668, 25.772509040103607 ], "wc_review_avg": [ 194.0, 43.96210489349511 ], "wc_reply_reviewers_avg": [ 4.0, 5.656854249492381 ], "wc_reply_authors_avg": [ 686.6666666666666, 145.84999904772786 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HiH0204EvqEJ:scholar.google.com/&scioq=Constraining+Non-Negative+Matrix+Factorization+to+Improve+Signature+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Funda\u00e7\u00e3o Getulio Vargas;University of London", "aff_unique_dep": ";", "aff_unique_url": "https://www.fgv.br;https://www.royalholloway.ac.uk", "aff_unique_abbr": "FGV;RHUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Royal Holloway", "aff_country_unique_index": "0;1", "aff_country_unique": "Brazil;United Kingdom" }, { "title": "Rethinking Model Ensemble in Transfer-based Adversarial Attacks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19251", "id": "AcJrSoArlh", "author_site": "Huanran Chen, Yichi Zhang, Yinpeng Dong, Xiao Yang, Hang Su, Jun Zhu", "tldr": "", "abstract": "It is widely recognized that deep learning models lack robustness to adversarial examples. An intriguing property of adversarial examples is that they can transfer across different models, which enables black-box attacks without any knowledge of the victim model. An effective strategy to improve the transferability is attacking an ensemble of models. However, previous works simply average the outputs of different models, lacking an in-depth analysis on how and why model ensemble methods can strongly improve the transferability. In this paper, we rethink the ensemble in adversarial attacks and define the common weakness of model ensemble with two properties: 1) the flatness of loss landscape; and 2) the closeness to the local optimum of each model. We empirically and theoretically show that both properties are strongly correlated with the transferability and propose a Common Weakness Attack (CWA) to generate more transferable adversarial examples by promoting these two properties. Experimental results on both image classification and object detection tasks validate the effectiveness of our approach to improving the adversarial transferability, especially when attacking adversarially trained models. We also successfully apply our method to attack a black-box large vision-language model -- Google's Bard, showing the practical effectiveness. Code is available at \\url{https://github.com/huanranchen/AdversarialAttacks}.", "keywords": "Adversarial attack;transferability;ensemble attack;robustness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Huanran Chen;Yichi Zhang;Yinpeng Dong;Xiao Yang;Hang Su;Jun Zhu", "authorids": "~Huanran_Chen1;~Yichi_Zhang4;~Yinpeng_Dong2;~Xiao_Yang4;~Hang_Su3;~Jun_Zhu2", "gender": "M;M;M;M;M;M", "homepage": "https://huanranchen.github.io/;https://zycheiheihei.github.io;https://dongyp13.github.io;https://ml.cs.tsinghua.edu.cn/~xiaoyang/;http://ml.cs.tsinghua.edu.cn/~jun;", "dblp": "329/6558;;183/0980;57/33851;50/2644-1;26/5371-6", "google_scholar": "https://scholar.google.co.jp/citations?user=QYsKXccAAAAJ;HzgDakoAAAAJ;6_4ad84AAAAJ;bwkwp0MAAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ", "orcid": ";0000-0002-1894-3977;;0000-0001-9502-9962;;", "linkedin": ";;;;;", "or_profile": "~Huanran_Chen1;~Yichi_Zhang4;~Yinpeng_Dong2;~Xiao_Yang4;~Jun_Zhu2;~Hang_Su2", "aff": ";Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": ";tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": ";PhD student;Postdoc;Postdoc;Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2024rethinking,\ntitle={Rethinking Model Ensemble in Transfer-based Adversarial Attacks},\nauthor={Huanran Chen and Yichi Zhang and Yinpeng Dong and Xiao Yang and Hang Su and Jun Zhu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AcJrSoArlh}\n}", "github": "", "project": "", "reviewers": "GTKq;JLTP;9Y8Q;s2Ct", "pdf_size": 2167419, "rating": "6;6;8;8", "confidence": "4;3;3;2", "soundness": "3;3;4;3", "contribution": "3;3;3;3", "presentation": "3;2;3;4", "wc_summary": "62;120;61;53", "wc_strengths": "42;84;74;42", "wc_weaknesses": "76;149;87;78", "wc_questions": "24;178;4;50", "wc_review": "204;531;226;223", "wc_reply_reviewers": "67;0;39;12", "wc_reply_authors": "1286;799;361;653", "reply_reviewers": "3;0;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.0, 26.78619047195775 ], "wc_strengths_avg": [ 60.5, 18.83480820183736 ], "wc_weaknesses_avg": [ 97.5, 30.02082610455615 ], "wc_questions_avg": [ 64.0, 67.80855403265875 ], "wc_review_avg": [ 296.0, 135.93932470039712 ], "wc_reply_reviewers_avg": [ 29.5, 25.85053190942113 ], "wc_reply_authors_avg": [ 774.75, 334.65532641809244 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9671466880746340903&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=AcJrSoArlh", "pdf": "https://openreview.net/pdf?id=AcJrSoArlh", "email": ";tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Out-of-Distribution Detection by Leveraging Between-Layer Transformation Smoothness", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19250", "id": "AcRfzLS6se", "author_site": "Fran Jeleni\u0107, Josip Juki\u0107, Martin Tutek, Mate Puljiz, Jan Snajder", "tldr": "", "abstract": "Effective out-of-distribution (OOD) detection is crucial for reliable machine learning models, yet most current methods are limited in practical use due to requirements like access to training data or intervention in training. We present a novel method for detecting OOD data in Transformers based on transformation smoothness between intermediate layers of a network (BLOOD), which is applicable to pre-trained models without access to training data. BLOOD utilizes the tendency of between-layer representation transformations of in-distribution (ID) data to be smoother than the corresponding transformations of OOD data, a property that we also demonstrate empirically. We evaluate BLOOD on several text classification tasks with Transformer networks and demonstrate that it outperforms methods with comparable resource requirements. Our analysis also suggests that when learning simpler tasks, OOD data transformations maintain their original sharpness, whereas sharpness increases with more complex tasks.", "keywords": "out-of-distribution detection;deep neural networks;transformers;representation analysis;uncertainty quantification;text classification", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/9c4c6570df8fcd5ab6e2335f73d107f295739f7c.zip", "author": "Fran Jeleni\u0107;Josip Juki\u0107;Martin Tutek;Mate Puljiz;Jan Snajder", "authorids": "~Fran_Jeleni\u01071;~Josip_Juki\u01071;~Martin_Tutek1;~Mate_Puljiz1;~Jan_Snajder1", "gender": ";;M;M;M", "homepage": ";;;https://my-web-bbeba.firebaseapp.com/;http://www.zemris.fer.hr/~jan/", "dblp": "333/1019;333/0711;186/7079;198/6470.html;34/5404", "google_scholar": "6zH9ZZcAAAAJ;0NzyWBoAAAAJ;https://scholar.google.hr/citations?user=3MK-3e0AAAAJ;https://scholar.google.hr/citations?user=U5cL9tYAAAAJ;https://scholar.google.hr/citations?user=7h0lKgIAAAAJ", "orcid": ";;;0000-0003-0912-8345;", "linkedin": ";;mtutek/;;", "or_profile": "~Fran_Jeleni\u01071;~Josip_Juki\u01071;~Martin_Tutek1;~Mate_Puljiz1;~Jan_Snajder1", "aff": "University of Zagreb;Faculty of Electrical Engineering and Computing, University of Zagreb;Technion - Israel Institute of Technology, Technion;;UniZg-FER, University of Zagreb", "aff_domain": "fer.hr;fer.hr;technion.ac.il;;fer.unizg.hr", "position": "Researcher;PhD student;Postdoc;;Full Professor", "bibtex": "@inproceedings{\njeleni{\\'c}2024outofdistribution,\ntitle={Out-of-Distribution Detection by Leveraging Between-Layer Transformation Smoothness},\nauthor={Fran Jeleni{\\'c} and Josip Juki{\\'c} and Martin Tutek and Mate Puljiz and Jan Snajder},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AcRfzLS6se}\n}", "github": "", "project": "", "reviewers": "Bnog;Txyj;ELpL;woCv", "pdf_size": 1559264, "rating": "5;5;6;6", "confidence": "4;3;2;4", "soundness": "2;3;3;2", "contribution": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "42;79;93;66", "wc_strengths": "57;67;51;68", "wc_weaknesses": "131;111;158;79", "wc_questions": "34;14;19;109", "wc_review": "264;271;321;322", "wc_reply_reviewers": "0;0;59;0", "wc_reply_authors": "273;352;314;902", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.0, 18.774983355518586 ], "wc_strengths_avg": [ 60.75, 7.084313657652377 ], "wc_weaknesses_avg": [ 119.75, 28.838992700855556 ], "wc_questions_avg": [ 44.0, 38.242646351945886 ], "wc_review_avg": [ 294.5, 27.115493725912497 ], "wc_reply_reviewers_avg": [ 14.75, 25.54774941164094 ], "wc_reply_authors_avg": [ 460.25, 256.5700440425577 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2061125423693659380&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=AcRfzLS6se", "pdf": "https://openreview.net/pdf?id=AcRfzLS6se", "email": "fer.hr;fer.hr;technion.ac.il;;fer.unizg.hr", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Zagreb;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.unizg.hr;https://www.technion.ac.il", "aff_unique_abbr": "UNIZG;Technion", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zagreb", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Croatia;Israel" }, { "title": "Distinguished In Uniform: Self-Attention Vs. Virtual Nodes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19249", "id": "AcSChDWL6V", "author_site": "Eran Rosenbluth, Jan T\u00f6nshoff, Martin Ritzert, Berke Kisin, Martin Grohe", "tldr": "", "abstract": "Graph Transformers (GTs) such as SAN and GPS are graph processing models that combine Message-Passing GNNs (MPGNNs) with global Self-Attention. They were shown to be universal function approximators, with two reservations: 1. The initial node features must be augmented with certain positional encodings. 2. The approximation is non-uniform: Graphs of different sizes may require a different approximating network.\n\nWe first clarify that this form of universality is not unique to GTs: Using the same positional encodings, also pure MPGNNs and even 2-layer MLPs are non-uniform universal approximators. We then consider uniform expressivity: The target function is to be approximated by a single network for graphs of all sizes. There, we compare GTs to the more efficient MPGNN + Virtual Node architecture. The essential difference between the two model definitions is in their global computation method: Self-Attention Vs Virtual Node. We prove that none of the models is a uniform-universal approximator, before proving our main result: Neither model\u2019s uniform expressivity subsumes the other\u2019s. We demonstrate the theory with experiments on synthetic data. We further augment our study with real-world datasets, observing mixed results which indicate no clear ranking in practice as well.", "keywords": "Graph Neural Networks;Message Passing;Graph Transformers;Virtual Nodes;Expressivity;Uniform Expressivity", "primary_area": "learning theory", "supplementary_material": "/attachment/115096b1e19ffa39a7d9ee9fd3e4a1dac92a30a1.zip", "author": "Eran Rosenbluth;Jan T\u00f6nshoff;Martin Ritzert;Berke Kisin;Martin Grohe", "authorids": "~Eran_Rosenbluth1;~Jan_T\u00f6nshoff1;~Martin_Ritzert1;~Berke_Kisin1;~Martin_Grohe1", "gender": ";M;M;;M", "homepage": ";https://www.lics.rwth-aachen.de/;;;http://www.lics.rwth-aachen.de/~grohe", "dblp": ";;194/2447;;g/MGrohe", "google_scholar": ";;https://scholar.google.de/citations?user=ZNioUNgAAAAJ;xqVvCc4AAAAJ;https://scholar.google.com.tw/citations?user=Sou5ih0AAAAJ", "orcid": ";;0000-0002-5322-3684;;0000-0002-0292-9142", "linkedin": ";;martin-ritzert/;;", "or_profile": "~Eran_Rosenbluth1;~Jan_T\u00f6nshoff1;~Martin_Ritzert1;~Berke_Kisin1;~Martin_Grohe1", "aff": ";RWTH Aachen University;Georg-August Universit\u00e4t G\u00f6ttingen;Rheinisch Westf\u00e4lische Technische Hochschule Aachen;RWTH Aachen University", "aff_domain": ";rwth-aachen.de;uni-goettingen.de;rwth-aachen.de;rwth-aachen.de", "position": ";PhD student;Postdoc;MS student;Full Professor", "bibtex": "@inproceedings{\nrosenbluth2024distinguished,\ntitle={Distinguished In Uniform: Self-Attention Vs. Virtual Nodes},\nauthor={Eran Rosenbluth and Jan T{\\\"o}nshoff and Martin Ritzert and Berke Kisin and Martin Grohe},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AcSChDWL6V}\n}", "github": "", "project": "", "reviewers": "MBi3;BMpU;Uru7;EnRw", "pdf_size": 445579, "rating": "6;6;6;8", "confidence": "2;3;3;4", "soundness": "3;2;3;3", "contribution": "3;3;3;3", "presentation": "4;2;3;3", "wc_summary": "178;241;92;157", "wc_strengths": "92;87;129;130", "wc_weaknesses": "79;300;274;63", "wc_questions": "106;1;50;4", "wc_review": "455;629;545;354", "wc_reply_reviewers": "11;192;0;0", "wc_reply_authors": "523;1267;276;342", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 167.0, 53.2024435529046 ], "wc_strengths_avg": [ 109.5, 20.081085628023203 ], "wc_weaknesses_avg": [ 179.0, 108.53801177467736 ], "wc_questions_avg": [ 40.25, 42.6402098962939 ], "wc_review_avg": [ 495.75, 102.38987987101069 ], "wc_reply_reviewers_avg": [ 50.75, 81.67427685630281 ], "wc_reply_authors_avg": [ 602.0, 394.44327855852737 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12750433681330117626&as_sdt=5,30&sciodt=0,30&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=AcSChDWL6V", "pdf": "https://openreview.net/pdf?id=AcSChDWL6V", "email": ";rwth-aachen.de;uni-goettingen.de;rwth-aachen.de;rwth-aachen.de", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "RWTH Aachen University;Georg-August Universit\u00e4t G\u00f6ttingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.rwth-aachen.de;https://www.uni-goettingen.de", "aff_unique_abbr": "RWTH;GAU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Aachen;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Risk Bounds of Accelerated SGD for Overparameterized Linear Regression", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19248", "id": "AcoXPIPh4A", "author_site": "Xuheng Li, Yihe Deng, Jingfeng Wu, Dongruo Zhou, Quanquan Gu", "tldr": "", "abstract": "Accelerated stochastic gradient descent (ASGD) is a workhorse in deep learning and often achieves better generalization performance than SGD. However, existing optimization theory can only explain the faster convergence of ASGD, but cannot explain its better generalization. In this paper, we study the generalization of ASGD for overparameterized linear regression, which is possibly the simplest setting of learning with overparameterization. We establish an instance-dependent excess risk bound for ASGD within each eigen-subspace of the data covariance matrix. Our analysis shows that (i) ASGD outperforms SGD in the subspace of small eigenvalues, exhibiting a faster rate of exponential decay for bias error, while in the subspace of large eigenvalues, its bias error decays slower than SGD; and (ii) the variance error of ASGD is always larger than that of SGD. Our result suggests that ASGD can outperform SGD when the difference between the initialization and the true weight vector is mostly confined to the subspace of small eigenvalues. Additionally, when our analysis is specialized to linear regression in the strongly convex setting, it yields a tighter bound for bias error than the best-known result.", "keywords": "Accelerated stochastic gradient descent;excess risk;linear regression;overparameterization", "primary_area": "learning theory", "supplementary_material": "/attachment/81ed44f42960f9d6a83ba25e890d44117641bc92.pdf", "author": "Xuheng Li;Yihe Deng;Jingfeng Wu;Dongruo Zhou;Quanquan Gu", "authorids": "~Xuheng_Li1;~Yihe_Deng1;~Jingfeng_Wu1;~Dongruo_Zhou1;~Quanquan_Gu1", "gender": "M;F;M;M;M", "homepage": "http://www.pku.edu.cn;;https://uuujf.github.io;;http://web.cs.ucla.edu/~qgu/", "dblp": "330/7681;230/8011;;215/3401;50/4597", "google_scholar": ";7Lix1poAAAAJ;z-KILD8AAAAJ;1780wr0AAAAJ;GU9HgNAAAAAJ", "orcid": ";;0009-0009-3414-4487;;", "linkedin": ";;jingfeng-wu-79205b184/;;", "or_profile": "~Xuheng_Li1;~Yihe_Deng1;~Jingfeng_Wu1;~Dongruo_Zhou1;~Quanquan_Gu1", "aff": "ByteDance Inc.;University of California, Los Angeles;University of California, Berkeley;Indiana University;University of California, Los Angeles", "aff_domain": "bytedance.com;ucla.edu;berkeley.edu;iu.edu;cs.ucla.edu", "position": "Intern;PhD student;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nli2024risk,\ntitle={Risk Bounds of Accelerated {SGD} for Overparameterized Linear Regression},\nauthor={Xuheng Li and Yihe Deng and Jingfeng Wu and Dongruo Zhou and Quanquan Gu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AcoXPIPh4A}\n}", "github": "", "project": "", "reviewers": "MrsP;BSyC;hi74", "pdf_size": 1086933, "rating": "6;6;6", "confidence": "3;4;3", "soundness": "3;4;3", "contribution": "3;2;3", "presentation": "3;3;3", "wc_summary": "130;68;146", "wc_strengths": "15;61;106", "wc_weaknesses": "15;170;1", "wc_questions": "75;40;1", "wc_review": "235;339;254", "wc_reply_reviewers": "0;103;0", "wc_reply_authors": "633;793;8", "reply_reviewers": "0;1;0", "reply_authors": "1;3;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 114.66666666666667, 33.6386021641143 ], "wc_strengths_avg": [ 60.666666666666664, 37.15134213217905 ], "wc_weaknesses_avg": [ 62.0, 76.58111168340838 ], "wc_questions_avg": [ 38.666666666666664, 30.225081564084416 ], "wc_review_avg": [ 276.0, 45.217990519998416 ], "wc_reply_reviewers_avg": [ 34.333333333333336, 48.554665641476255 ], "wc_reply_authors_avg": [ 478.0, 338.6984893185481 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5460095850641278501&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "openreview": "https://openreview.net/forum?id=AcoXPIPh4A", "pdf": "https://openreview.net/pdf?id=AcoXPIPh4A", "email": "bytedance.com;ucla.edu;berkeley.edu;iu.edu;cs.ucla.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "ByteDance;University of California, Los Angeles;University of California, Berkeley;Indiana University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.bytedance.com;https://www.ucla.edu;https://www.berkeley.edu;https://www.indiana.edu", "aff_unique_abbr": "ByteDance;UCLA;UC Berkeley;IU", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Los Angeles;Berkeley", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Rotation Has Two Sides: Evaluating Data Augmentation for Deep One-class Classification", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19247", "id": "Ad81awoBVS", "author_site": "Guodong Wang, Yunhong Wang, Xiuguo Bao, Di Huang", "tldr": "", "abstract": "One-class classification (OCC) involves predicting whether a new data is normal or anomalous based solely on the data from a single class during training. Various attempts have been made to learn suitable representations for OCC within a self-supervised framework. Notably, discriminative methods that use geometric visual transformations, such as rotation, to generate pseudo-anomaly samples have exhibited impressive detection performance. Although rotation is commonly viewed as a distribution-shifting transformation and is widely used in the literature, the cause of its effectiveness remains a mystery. In this study, we are the first to make a surprising observation: there exists a strong linear relationship (Pearson's Correlation, $r > 0.9$) between the accuracy of rotation prediction and the performance of OCC. This suggests that a classifier that effectively distinguishes different rotations is more likely to excel in OCC, and vice versa. The root cause of this phenomenon can be attributed to the transformation bias in the dataset, where representations learned from transformations already present in the dataset tend to be less effective, making it essential to accurately estimate the transformation distribution before utilizing pretext tasks involving these transformations for reliable self-supervised representation learning. To the end, we propose a novel two-stage method to estimate the transformation distribution within the dataset. In the first stage, we learn general representations through standard contrastive pre-training. In the second stage, we select potentially semantics-preserving samples from the entire augmented dataset, which includes all rotations, by employing density matching with the provided reference distribution. By sorting samples based on semantics-preserving versus shifting transformations, we achieve improved performance on OCC benchmarks.", "keywords": "self-supervised learning;deep one-class cilassification", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Guodong Wang;Yunhong Wang;Xiuguo Bao;Di Huang", "authorids": "~Guodong_Wang3;~Yunhong_Wang1;~Xiuguo_Bao3;~Di_Huang4", "gender": "M;;M;M", "homepage": "https://gdwang08.github.io/;;https://dblp.org/rec/conf/ijcai/LiCZB022;http://irip.buaa.edu.cn/dihuang/index.html", "dblp": ";;;45/780-1", "google_scholar": "9xCdnoQAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-2412-9330", "linkedin": ";;;", "or_profile": "~Guodong_Wang3;~Yunhong_Wang1;~Xiuguo_Bao3;~Di_Huang4", "aff": "Beihang University;;;Beihang University", "aff_domain": "buaa.edu.cn;;;buaa.edu.cn", "position": "PhD student;;;Full Professor", "bibtex": "@inproceedings{\nwang2024rotation,\ntitle={Rotation Has Two Sides: Evaluating Data Augmentation for Deep One-class Classification},\nauthor={Guodong Wang and Yunhong Wang and Xiuguo Bao and Di Huang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=Ad81awoBVS}\n}", "github": "", "project": "", "reviewers": "Yjs1;jwSM;vKuc;Soam;h8uB", "pdf_size": 878803, "rating": "3;6;6;6;6", "confidence": "4;5;4;3;2", "soundness": "2;3;3;2;3", "contribution": "1;3;3;3;3", "presentation": "2;3;3;3;2", "wc_summary": "64;50;222;70;56", "wc_strengths": "31;21;97;129;93", "wc_weaknesses": "181;61;173;102;149", "wc_questions": "3;101;104;61;31", "wc_review": "279;233;596;362;329", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "555;887;635;537;212", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 92.4, 65.15704106234413 ], "wc_strengths_avg": [ 74.2, 41.40724574274411 ], "wc_weaknesses_avg": [ 133.2, 45.397797303393475 ], "wc_questions_avg": [ 60.0, 39.2632143360678 ], "wc_review_avg": [ 359.8, 125.97682326523399 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 565.2, 216.3685744279885 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.19611613513818404, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8500072982366343769&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=Ad81awoBVS", "pdf": "https://openreview.net/pdf?id=Ad81awoBVS", "email": "buaa.edu.cn;;;buaa.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Ghost on the Shell: An Expressive Representation of General 3D Shapes", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19246", "id": "Ad87VjRqUw", "author_site": "Zhen Liu, Yao Feng, Yuliang Xiu, Weiyang Liu, Liam Paull, Michael J Black, Bernhard Schoelkopf", "tldr": "", "abstract": "The creation of photorealistic virtual worlds requires the accurate modeling of 3D surface geometry for a wide range of objects. For this, meshes are appealing since they enable 1) fast physics-based rendering with realistic material and lighting, 2) physical simulation, and 3) are memory-efficient for modern graphics pipelines. Recent work on reconstructing and statistically modeling 3D shape, however, has critiqued meshes as being topologically inflexible. To capture a wide range of object shapes, any 3D representation must be able to model solid, watertight, shapes as well as thin, open, surfaces. Recent work has focused on the former, and methods for reconstructing open surfaces do not support fast reconstruction with material and lighting or unconditional generative modelling. Inspired by the observation that open surfaces can be seen as islands floating on watertight surfaces, we parametrize open surfaces by defining a manifold signed distance field on watertight templates. With this parametrization, we further develop a grid-based and differentiable representation that parametrizes both watertight and non-watertight meshes of arbitrary topology. Our new representation, called Ghost-on-the-Shell (G-Shell), enables two important applications: differentiable rasterization-based reconstruction from multiview images and generative modelling of non-watertight meshes. We empirically demonstrate that G-Shell achieves state-of-the-art performance on non-watertight mesh reconstruction and generation tasks, while also performing effectively for watertight meshes.", "keywords": "Non-watertight mesh; generative model; 3D geometry; differentiable rendering", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Zhen Liu;Yao Feng;Yuliang Xiu;Weiyang Liu;Liam Paull;Michael J. Black;Bernhard Sch\u00f6lkopf", "authorids": "~Zhen_Liu6;~Yao_Feng3;~Yuliang_Xiu2;~Weiyang_Liu1;~Liam_Paull1;~Michael_J._Black1;~Bernhard_Sch\u00f6lkopf1", "gender": "M;F;M;M;;;", "homepage": ";https://ps.is.tuebingen.mpg.de/person/yfeng;http://xiuyuliang.cn;http://wyliu.com/;;;", "dblp": "77/35-19;05/9861;215/3940;137/1532;;;", "google_scholar": "I1IiJCAAAAAJ;wNQQhSIAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;DMjROf0AAAAJ;;;", "orcid": ";0000-0002-9481-9783;0000-0003-0165-5909;;;;", "linkedin": ";;yuliangxiu;;;;", "or_profile": "~Zhen_Liu6;~Yao_Feng3;~Yuliang_Xiu2;~Weiyang_Liu1;~Liam_Paull1;~Michael_J._Black1;~Bernhard_Sch\u00f6lkopf1", "aff": "University of Montreal;ETHZ - ETH Zurich;Max Planck Institute for Intelligent Systems, Max-Planck Institute;University of Cambridge;;;", "aff_domain": "umontreal.ca;ethz.ch;tuebingen.mpg.de;cam.ac.uk;;;", "position": "PhD student;PhD student;PhD student;Researcher;;;", "bibtex": "@inproceedings{\nliu2024ghost,\ntitle={Ghost on the Shell: An Expressive Representation of General 3D Shapes},\nauthor={Zhen Liu and Yao Feng and Yuliang Xiu and Weiyang Liu and Liam Paull and Michael J. Black and Bernhard Sch{\\\"o}lkopf},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=Ad87VjRqUw}\n}", "github": "", "project": "", "reviewers": "RYwJ;9iXa;dsfR;hJqu", "pdf_size": 14168139, "rating": "5;8;8;8", "confidence": "5;3;5;4", "soundness": "4;3;4;3", "contribution": "3;3;3;3", "presentation": "4;4;3;2", "wc_summary": "75;66;40;97", "wc_strengths": "47;215;24;45", "wc_weaknesses": "86;72;39;292", "wc_questions": "4;188;72;262", "wc_review": "212;541;175;696", "wc_reply_reviewers": "0;41;21;0", "wc_reply_authors": "520;817;535;2092", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;3", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 69.5, 20.426698215815495 ], "wc_strengths_avg": [ 82.75, 76.88424741128705 ], "wc_weaknesses_avg": [ 122.25, 99.47958333246073 ], "wc_questions_avg": [ 131.5, 100.02374718035712 ], "wc_review_avg": [ 406.0, 219.8419887100733 ], "wc_reply_reviewers_avg": [ 15.5, 17.03672503740082 ], "wc_reply_authors_avg": [ 991.0, 646.5783015227158 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11053737743132381826&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=Ad87VjRqUw", "pdf": "https://openreview.net/pdf?id=Ad87VjRqUw", "email": "umontreal.ca;ethz.ch;tuebingen.mpg.de;cam.ac.uk;;;", "author_num": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Montreal;ETH Zurich;Max Planck Institute for Intelligent Systems;University of Cambridge", "aff_unique_dep": ";;Intelligent Systems;", "aff_unique_url": "https://wwwumontreal.ca;https://www.ethz.ch;https://www.mpi-is.mpg.de;https://www.cam.ac.uk", "aff_unique_abbr": "UM;ETHZ;MPI-IS;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "Canada;Switzerland;Germany;United Kingdom" }, { "id": "Aemqy6Hjdj", "title": "Enhancing Compositional Generalization via Compositional Feature Alignment", "track": "main", "status": "Reject", "tldr": "", "abstract": "Real-world applications of machine learning (ML) models often confront data distribution shifts, wherein discrepancies exist between the training and test data distributions. In the common multi-domain multi-class setup, as the number of classes and domains scales up, it becomes infeasible to gather training data for every domain-class combination. This challenge naturally leads the quest for models with Compositional Generalization (CG) ability, where models can generalize to unseen domain-class combinations. To delve into the CG challenge, we develop CG-Bench, a suite of CG benchmarks derived from existing real-world image datasets, and observe that the prevalent pretraining-finetuning paradigm on foundational models, such as CLIP and DINOv2, struggles with the challenge. To address this challenge, we propose Compositional Feature Alignment (CFA), a simple two-stage finetuning technique that i) learns two orthogonal linear heads on a pretrained encoder with respect to class and domain labels, and ii) fine-tunes the encoder with the newly learned head frozen. We theoretically and empirically justify that CFA encourages compositional feature learning of pretrained models. We further conduct extensive experiments on CG-Bench for CLIP and DINOv2, two powerful pretrained vision foundation models. Experiment results show that CFA outperforms common finetuning techniques in compositional generalization, corroborating CFA's efficacy in compositional feature learning.", "keywords": "OOD Generalization;Fine-tuning;Compositional Generalization", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/4c14940f660ae48e6cb30c38a01c856ccbca7457.zip", "author": "Haoxiang Wang;Haozhe Si;Huajie Shao;Han Zhao", "authorids": "~Haoxiang_Wang1;~Haozhe_Si1;~Huajie_Shao1;~Han_Zhao1", "gender": "M;M;M;M", "homepage": "https://haoxiang-wang.github.io/;https://ehzoahis.github.io/;https://huajieshao.github.io/;https://hanzhaoml.github.io/", "dblp": ";;179/4173;03/3520-2", "google_scholar": "bcInPlwAAAAJ;DUcnRMMAAAAJ;5-D7ZLsAAAAJ;x942ipYAAAAJ", "orcid": ";;0000-0001-7627-5615;0000-0002-8579-1600", "linkedin": "haoxiang-wang-071414ab/;haozhesi-468811146/;huajie-shao-508465113/;", "or_profile": "~Haoxiang_Wang1;~Haozhe_Si1;~Huajie_Shao1;~Han_Zhao1", "aff": "University of Illinois, Urbana Champaign;University of Illinois Urbana-Champaign;College of William and Mary;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illunois.edu;wm.edu;illinois.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nwang2024enhancing,\ntitle={Enhancing Compositional Generalization via Compositional Feature Alignment},\nauthor={Haoxiang Wang and Haozhe Si and Huajie Shao and Han Zhao},\nyear={2024},\nurl={https://openreview.net/forum?id=Aemqy6Hjdj}\n}", "github": "", "project": "", "reviewers": "ZjmZ;d1KC;H15m", "site": "https://openreview.net/forum?id=Aemqy6Hjdj", "pdf_size": 5692695, "rating": "5;6;6", "confidence": "4;3;4", "soundness": "2;3;3", "contribution": "2;3;2", "presentation": "3;2;3", "wc_summary": "66;113;50", "wc_strengths": "74;85;49", "wc_weaknesses": "298;41;117", "wc_questions": "51;28;14", "wc_review": "489;267;230", "wc_reply_reviewers": "104;0;89", "wc_reply_authors": "1796;436;1300", "reply_reviewers": "1;0;1", "reply_authors": "4;1;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 76.33333333333333, 26.737406173540634 ], "wc_strengths_avg": [ 69.33333333333333, 15.062831370260005 ], "wc_weaknesses_avg": [ 152.0, 107.79919603905526 ], "wc_questions_avg": [ 31.0, 15.253414918196734 ], "wc_review_avg": [ 328.6666666666667, 114.37462811111368 ], "wc_reply_reviewers_avg": [ 64.33333333333333, 45.90085934804368 ], "wc_reply_authors_avg": [ 1177.3333333333333, 561.9521529651988 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FlompfGzqNAJ:scholar.google.com/&scioq=Enhancing+Compositional+Generalization+via+Compositional+Feature+Alignment&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;College of William and Mary", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.wm.edu", "aff_unique_abbr": "UIUC;WM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "AfSpl24oUJ", "title": "A graph transformer for symbolic regression", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Inferring the underlying mathematical expressions from real-world observed data is a central challenge in scientific discovery. Symbolic regression (SR) techniques stand out as a primary method for addressing this challenge, as they explore a function space characterized by interpretable analytical expressions. Recently, transformer-based approaches have gained widespread popularity for solving symbolic regression problems. However, these existing transformer-based models rely on pre-order traversal of expressions as supervision, essentially compressing the information within a computation tree into a token sequence. This compression makes the derived formula highly sensitive to the order of decoded tokens. To address this sensitivity issue, we introduce a novel model architecture called the Graph Transformer (GT), which is purpose-built for directly predicting the tree structure of mathematical formulas. In empirical evaluations, our proposed method demonstrates significant improvements in terms of formula skeleton recovery rates and R-squared scores for data fitting when compared to state-of-the-art transformer-based approaches.", "keywords": "attention mechanism;graph transformer;symbolic regression", "primary_area": "generative models", "supplementary_material": "", "author": "Weiheng Zhong;Hadi Meidani", "authorids": "~Weiheng_Zhong1;~Hadi_Meidani1", "gender": "M;Not Specified", "homepage": ";https://uq.cee.illinois.edu", "dblp": ";", "google_scholar": "fgLb_DsAAAAJ;", "orcid": "0000-0002-7902-3568;", "linkedin": "weiheng-zhong-796481238/;", "or_profile": "~Weiheng_Zhong1;~Hadi_Meidani1", "aff": "University of Illinois Urbana Champaign;", "aff_domain": "illinois.edu;", "position": "PhD student;", "bibtex": "@misc{\nzhong2024a,\ntitle={A graph transformer for symbolic regression},\nauthor={Weiheng Zhong and Hadi Meidani},\nyear={2024},\nurl={https://openreview.net/forum?id=AfSpl24oUJ}\n}", "github": "", "project": "", "reviewers": "YZQC;mTB4;dKpQ;SCmX", "site": "https://openreview.net/forum?id=AfSpl24oUJ", "pdf_size": 806007, "rating": "3;3;5;5", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "contribution": "2;2;2;3", "presentation": "1;2;2;2", "wc_summary": "113;56;26;73", "wc_strengths": "115;44;39;42", "wc_weaknesses": "773;265;184;35", "wc_questions": "247;100;185;162", "wc_review": "1248;465;434;312", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.0, 31.44041984452498 ], "wc_strengths_avg": [ 60.0, 31.804087787578503 ], "wc_weaknesses_avg": [ 314.25, 277.4088814367702 ], "wc_questions_avg": [ 173.5, 52.60465758846834 ], "wc_review_avg": [ 614.75, 370.0536278703399 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cVyLaAYYqowJ:scholar.google.com/&scioq=A+graph+transformer+for+symbolic+regression&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "General Stability Analysis for Zeroth-Order Optimization Algorithms", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19245", "id": "AfhNyr73Ma", "author_site": "Xinyue Liu, Hualin Zhang, Bin Gu, Hong Chen", "tldr": "", "abstract": "Zeroth-order optimization algorithms are widely used for black-box optimization problems, such as those in machine learning and prompt engineering, where the gradients are approximated using function evaluations. Recently, a generalization result was provided for zeroth-order stochastic gradient descent (SGD) algorithms through stability analysis. However, this result was limited to the vanilla 2-point zeroth-order estimate of Gaussian distribution used in SGD algorithms. To address these limitations, we propose a general proof framework for stability analysis that applies to convex, strongly convex, and non-convex conditions, and yields results for popular zeroth-order optimization algorithms, including SGD, GD, and SVRG, as well as various zeroth-order estimates, such as 1-point and 2-point with different distributions and coordinate estimates. Our general analysis shows that coordinate estimation can lead to tighter generalization bounds for SGD, GD, and SVRG versions of zeroth-order optimization algorithms, due to the smaller expansion brought by coordinate estimates to stability analysis.", "keywords": "Stability Analysis; Zeroth-Order Optimization; Black-Box Learning", "primary_area": "learning theory", "supplementary_material": "/attachment/ce5a38524d3d18e6062d950399c839cc2e0e1042.zip", "author": "Xinyue Liu;Hualin Zhang;Bin Gu;Hong Chen", "authorids": "~Xinyue_Liu3;~Hualin_Zhang1;~Bin_Gu1;~Hong_Chen1", "gender": "F;M;M;", "homepage": ";https://github.com/zhanghualin0;https://mbzuai.ac.ae/study/faculty/bin-gu/;https://chenhongml.github.io/", "dblp": ";303/7916;29/1758-1;https://dblp.uni-trier.de/pers/hd/c/Chen_0004:Hong", "google_scholar": ";;Vo8OgCgAAAAJ;", "orcid": "0009-0005-4885-1917;;0000-0001-6049-1815;", "linkedin": ";;;", "or_profile": "~Xinyue_Liu3;~Hualin_Zhang1;~Bin_Gu1;~Hong_Chen1", "aff": "Huazhong Agricultural University;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Huazhong Agricultural University", "aff_domain": "hzau.edu.cn;mbzuai.ac.ae;mbzuai.ac.ae;hzau.edu.cn", "position": "Undergrad student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024general,\ntitle={General Stability Analysis for Zeroth-Order Optimization Algorithms},\nauthor={Xinyue Liu and Hualin Zhang and Bin Gu and Hong Chen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AfhNyr73Ma}\n}", "github": "", "project": "", "reviewers": "qUEd;PNwi;N2Y8;x656", "pdf_size": 1050024, "rating": "6;6;8;8", "confidence": "4;4;3;3", "soundness": "3;3;3;4", "contribution": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "118;45;61;185", "wc_strengths": "63;34;63;49", "wc_weaknesses": "122;73;27;30", "wc_questions": "55;5;15;28", "wc_review": "358;157;166;292", "wc_reply_reviewers": "25;0;0;10", "wc_reply_authors": "1178;385;365;175", "reply_reviewers": "1;0;0;1", "reply_authors": "3;1;1;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 102.25, 54.94258366695181 ], "wc_strengths_avg": [ 52.25, 11.986972094736853 ], "wc_weaknesses_avg": [ 63.0, 38.61994303465504 ], "wc_questions_avg": [ 25.75, 18.7533330370897 ], "wc_review_avg": [ 243.25, 85.07459961704198 ], "wc_reply_reviewers_avg": [ 8.75, 10.231690964840562 ], "wc_reply_authors_avg": [ 525.75, 385.39160278864404 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10860145463877914785&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=AfhNyr73Ma", "pdf": "https://openreview.net/pdf?id=AfhNyr73Ma", "email": "hzau.edu.cn;mbzuai.ac.ae;mbzuai.ac.ae;hzau.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Huazhong Agricultural University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.hzau.edu.cn/;https://mbzuai.ac.ae", "aff_unique_abbr": "HAU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United Arab Emirates" }, { "id": "AfiM6F2YPY", "title": "Applying language models to algebraic topology: generating simplicial cycles using multi-labeling in Wu's formula", "track": "main", "status": "Reject", "tldr": "", "abstract": "Computing homotopy groups of spheres has long been a fundamental objective in algebraic topology. Various theoretical and algorithmic approaches have been developed to tackle this problem. In this paper we take a step towards the goal of comprehending the group-theoretic structure of the generators of these homotopy groups by leveraging the power of machine learning. Specifically, in the simplicial group setting of Wu's formula, we reformulate the problem of generating simplicial cycles as a problem of sampling from the intersection of algorithmic datasets related to Dyck languages. We present and evaluate language modelling approaches that employ multi-label information for input sequences, along with the necessary group-theoretic toolkit and non-neural baselines.", "keywords": "transformers;group theory;algebraic topology", "primary_area": "generative models", "supplementary_material": "/attachment/41fddd281731146639d01291e86d6c2c55ce872f.zip", "author": "Kirill Brilliantov;Fedor Pavutnitskiy;Dmitry Pasechnyuk;German Magai", "authorids": "~Kirill_Brilliantov1;~Fedor_Pavutnitskiy1;~Dmitry_Pasechnyuk1;~German_Magai1", "gender": "M;M;M;M", "homepage": "https://github.com/kibrq;;http://dmivilensky.ru/;https://www.hse.ru/en/org/persons/364631586", "dblp": "350/5533;285/5395;242/6650;318/9190", "google_scholar": "thgwrhYAAAAJ;;yUfa6X8AAAAJ;", "orcid": ";0000-0002-8676-6941;0000-0002-1208-1659;", "linkedin": ";;;german-magai-0b7a69233/", "or_profile": "~Kirill_Brilliantov1;~Fedor_Pavutnitskiy1;~Dmitry_Pasechnyuk1;~German_Magai1", "aff": "ETHZ - ETH Zurich;Beijing Institute of Mathematical Sciences and Applications;Mohamed bin Zayed University of Artificial Intelligence;Higher School of Economics", "aff_domain": "ethz.ch;bimsa.cn;mbzuai.ac.ae;hse.ru", "position": "MS student;Assistant Professor;Researcher;PhD student", "bibtex": "@misc{\nbrilliantov2024applying,\ntitle={Applying language models to algebraic topology: generating simplicial cycles using multi-labeling in Wu's formula},\nauthor={Kirill Brilliantov and Fedor Pavutnitskiy and Dmitry Pasechnyuk and German Magai},\nyear={2024},\nurl={https://openreview.net/forum?id=AfiM6F2YPY}\n}", "github": "", "project": "", "reviewers": "VtUd;LHhA;aKQP", "site": "https://openreview.net/forum?id=AfiM6F2YPY", "pdf_size": 785192, "rating": "3;3;6", "confidence": "3;4;3", "soundness": "3;3;4", "contribution": "2;3;4", "presentation": "2;2;4", "wc_summary": "60;283;102", "wc_strengths": "58;85;29", "wc_weaknesses": "306;184;61", "wc_questions": "40;436;49", "wc_review": "464;988;241", "wc_reply_reviewers": "180;230;77", "wc_reply_authors": "720;804;298", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 148.33333333333334, 96.75513193394734 ], "wc_strengths_avg": [ 57.333333333333336, 22.866763848189994 ], "wc_weaknesses_avg": [ 183.66666666666666, 100.02110888318637 ], "wc_questions_avg": [ 175.0, 184.59144075498193 ], "wc_review_avg": [ 564.3333333333334, 313.10523612499503 ], "wc_reply_reviewers_avg": [ 162.33333333333334, 63.698944697346086 ], "wc_reply_authors_avg": [ 607.3333333333334, 221.4036033030076 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ri0V03MH7o8J:scholar.google.com/&scioq=Applying+language+models+to+algebraic+topology:+generating+simplicial+cycles+using+multi-labeling+in+Wu%27s+formula&hl=en&as_sdt=0,5", "gs_version_total": 9, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "ETH Zurich;Beijing Institute of Mathematical Sciences and Applications;Mohamed bin Zayed University of Artificial Intelligence;Higher School of Economics", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ethz.ch;;https://mbzuai.ac.ae;https://www.hse.ru", "aff_unique_abbr": "ETHZ;;MBZUAI;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "Switzerland;China;United Arab Emirates;Russian Federation" }, { "title": "Role of Locality and Weight Sharing in Image-Based Tasks: A Sample Complexity Separation between CNNs, LCNs, and FCNs", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19244", "id": "AfnsTnYphT", "author_site": "Aakash Sunil Lahoti, Stefani Karp, Ezra Winston, Aarti Singh, Yuanzhi Li", "tldr": "", "abstract": "Vision tasks are characterized by the properties of locality and translation invariance. \n The superior performance of convolutional neural networks (CNNs) on these tasks is widely attributed to the inductive bias of locality and weight sharing baked into their architecture.\n Existing attempts to quantify the statistical benefits of these biases in CNNs over locally connected convolutional neural networks (LCNs) and fully connected neural networks (FCNs) fall into one of the following categories: either they disregard the optimizer and only provide uniform convergence upper bounds with no separating lower bounds, \n or they consider simplistic tasks that do not truly mirror the locality and translation invariance as found in real-world vision tasks.\n To address these deficiencies, we introduce the Dynamic Signal Distribution (DSD) classification task that models an image as consisting of $k$ patches, each of dimension $d$, and the label is determined by a $d$-sparse signal vector that can freely appear in any one of the $k$ patches. \n On this task, for any orthogonally equivariant algorithm like gradient descent, we prove that CNNs require $\\tilde{O}(k+d)$ samples, whereas LCNs require $\\Omega(kd)$ samples, establishing the statistical advantages of weight sharing in translation invariant tasks. \n Furthermore, LCNs need $\\tilde{O}(k(k+d))$ samples, compared to $\\Omega(k^2d)$ samples for FCNs, showcasing the benefits of locality in local tasks.\n Additionally, we develop information theoretic tools for analyzing randomized algorithms, which may be of interest for statistical research.", "keywords": "Deep Learning Theory;Sample Complexity;Convolutional Neural Networks", "primary_area": "learning theory", "supplementary_material": "", "author": "Aakash Lahoti;Stefani Karp;Ezra Winston;Aarti Singh;Yuanzhi Li", "authorids": "~Aakash_Lahoti1;~Stefani_Karp1;~Ezra_Winston1;~Aarti_Singh1;~Yuanzhi_Li1", "gender": "M;F;;F;M", "homepage": ";;https://ezrawinston.github.io;https://www.cs.cmu.edu/~aarti;", "dblp": ";280/1111;66/9442;64/5328;73/3628", "google_scholar": "wGUvxZQAAAAJ;iMknz8EAAAAJ;;vGBcNVAAAAAJ;", "orcid": ";;;;", "linkedin": "aakashlahoti/;;;;", "or_profile": "~Aakash_Lahoti1;~Stefani_Karp1;~Ezra_Winston1;~Aarti_Singh1;~Yuanzhi_Li1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Machine Learning Department, School of Computer Science;University of Wisconsin - Madison;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu;mld.cs.cmu.edu;wisc.edu;andrew.cmu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlahoti2024role,\ntitle={Role of Locality and Weight Sharing in Image-Based Tasks: A Sample Complexity Separation between {CNN}s, {LCN}s, and {FCN}s},\nauthor={Aakash Lahoti and Stefani Karp and Ezra Winston and Aarti Singh and Yuanzhi Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AfnsTnYphT}\n}", "github": "", "project": "", "reviewers": "yeJ4;o7GU;jtyo;LSto", "pdf_size": 1988737, "rating": "6;8;8;8", "confidence": "3;5;3;3", "soundness": "3;3;3;4", "contribution": "3;2;2;3", "presentation": "3;3;3;4", "wc_summary": "28;78;22;176", "wc_strengths": "49;46;23;176", "wc_weaknesses": "167;367;226;84", "wc_questions": "9;1;49;36", "wc_review": "253;492;320;472", "wc_reply_reviewers": "0;36;12;0", "wc_reply_authors": "596;1889;332;195", "reply_reviewers": "0;1;1;0", "reply_authors": "2;4;2;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.0, 61.69278726074872 ], "wc_strengths_avg": [ 73.5, 60.02707722353305 ], "wc_weaknesses_avg": [ 211.0, 103.23032500191017 ], "wc_questions_avg": [ 23.75, 19.51121472384536 ], "wc_review_avg": [ 384.25, 100.82751360615812 ], "wc_reply_reviewers_avg": [ 12.0, 14.696938456699069 ], "wc_reply_authors_avg": [ 753.0, 671.5188009281646 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17650709807965419878&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=AfnsTnYphT", "pdf": "https://openreview.net/pdf?id=AfnsTnYphT", "email": "andrew.cmu.edu;cmu.edu;mld.cs.cmu.edu;wisc.edu;andrew.cmu.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;University of Wisconsin-Madison", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.wisc.edu", "aff_unique_abbr": "CMU;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AgCz44ebFe", "title": "May the Forgetting Be with You: Alternate Replay for Learning with Noisy Labels", "track": "main", "status": "Reject", "tldr": "", "abstract": "Forgetting presents a significant challenge during incremental training, making it particularly demanding for contemporary AI systems to assimilate new knowledge in streaming data environments. To address this issue, most approaches in Continual Learning (CL) rely on the replay of a restricted buffer of past data. However, the presence of noise in real-world scenarios, where human annotation is constrained by time limitations, frequently renders these strategies vulnerable. In this study, we address the problem of CL under Noisy labels (CLN) by introducing Alternate Experience Replay (AER), a novel strategy that takes advantage of forgetting to maintain a clear differentiation between clean, complex, and noisy samples in the memory buffer. The idea is that complex or mislabeled examples, which hardly fit the previously learned data distribution, are the ones most likely to be forgotten. To grasp the benefits of such a separation, we equip AER with Asymmetric Balanced Sampling: a new sample selection strategy that prioritizes purity on the current task while retaining relevant samples from the past. Through extensive computational comparisons, we demonstrate the effectiveness of our approach in terms of both accuracy and purity of the obtained buffer, resulting in a remarkable average gain of $7.45\\%$ points in accuracy w.r.t. existing loss-based purification strategies.", "keywords": "continual learning;lifelong learning;noisy labels;forgetting;rehearsal;incremental learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/2b9e56ab884c20e4677d96cf7ae97e2e37311c78.zip", "author": "Monica Millunzi;Lorenzo Bonicelli;Angelo Porrello;Jacopo Credi;Petter N. Kolm;Simone Calderara", "authorids": "~Monica_Millunzi1;~Lorenzo_Bonicelli1;~Angelo_Porrello1;~Jacopo_Credi1;~Petter_N._Kolm1;~Simone_Calderara1", "gender": ";M;M;M;;M", "homepage": ";https://lorenzobonicelli.net/;;;;", "dblp": ";299/8442;223/4466;;;13/422", "google_scholar": ";ovXU58MAAAAJ;b3-5Ys4AAAAJ;XOG3FBEAAAAJ;;https://scholar.google.it/citations?user=CZd-WXkAAAAJ", "orcid": ";0000-0002-9717-5602;0000-0002-9022-8484;;;0000-0001-9056-1538", "linkedin": ";;;jacopocredi/;;", "or_profile": "~Monica_Millunzi1;~Lorenzo_Bonicelli1;~Angelo_Porrello1;~Jacopo_Credi1;~Petter_N._Kolm1;~Simone_Calderara1", "aff": ";University of Modena and Reggio Emilia;University of Modena and Reggio Emilia, AimageLab;;;University of Modena and Reggio Emilia", "aff_domain": ";unimore.it;unimore.it;;;unimore.it", "position": ";PhD student;Postdoc;;;Full Professor", "bibtex": "@misc{\nmillunzi2024may,\ntitle={May the Forgetting Be with You: Alternate Replay for Learning with Noisy Labels},\nauthor={Monica Millunzi and Lorenzo Bonicelli and Angelo Porrello and Jacopo Credi and Petter N. Kolm and Simone Calderara},\nyear={2024},\nurl={https://openreview.net/forum?id=AgCz44ebFe}\n}", "github": "", "project": "", "reviewers": "QKiJ;Jj1M;2XeP;aFAe", "site": "https://openreview.net/forum?id=AgCz44ebFe", "pdf_size": 1654384, "rating": "3;5;5;6", "confidence": "4;4;3;4", "soundness": "2;2;2;2", "contribution": "1;2;2;2", "presentation": "2;3;2;3", "wc_summary": "88;58;165;65", "wc_strengths": "23;19;79;34", "wc_weaknesses": "183;265;66;502", "wc_questions": "15;6;82;4", "wc_review": "309;348;392;605", "wc_reply_reviewers": "0;178;0;483", "wc_reply_authors": "403;1529;980;1896", "reply_reviewers": "0;2;0;2", "reply_authors": "1;4;2;4", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 94.0, 42.46763473517215 ], "wc_strengths_avg": [ 38.75, 23.878599205146017 ], "wc_weaknesses_avg": [ 254.0, 159.69502183850315 ], "wc_questions_avg": [ 26.75, 32.16655872175325 ], "wc_review_avg": [ 413.5, 114.39514849852681 ], "wc_reply_reviewers_avg": [ 165.25, 197.321279896518 ], "wc_reply_authors_avg": [ 1202.0, 564.8561763847501 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1183970600880101647&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Modena and Reggio Emilia", "aff_unique_dep": "", "aff_unique_url": "https://www.unimore.it", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "title": "Large Language Models as Analogical Reasoners", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19243", "id": "AgDICX1h50", "author_site": "Michihiro Yasunaga, Xinyun Chen, Yujia Li, Panupong Pasupat, Jure Leskovec, Percy Liang, Ed H. Chi, Denny Zhou", "tldr": "", "abstract": "Chain-of-thought (CoT) prompting for language models demonstrates impressive performance across reasoning tasks, but typically needs labeled exemplars of the reasoning process. In this work, we introduce a new prompting approach, analogical prompting, designed to automatically guide the reasoning process of large language models. Inspired by analogical reasoning, a cognitive process in which humans draw from relevant past experiences to tackle new problems, our approach prompts language models to self-generate relevant exemplars or knowledge in the context, before proceeding to solve the given problem. This method presents several advantages: it obviates the need for labeling or retrieving exemplars, offering generality and convenience; it can also tailor the generated exemplars and knowledge to each problem, offering adaptability. Experimental results show that our approach outperforms 0-shot CoT and manual few-shot CoT in a variety of reasoning tasks, including math problem solving in GSM8K and MATH, code generation in Codeforces, and other reasoning tasks in BIG-Bench.", "keywords": "large language model;prompting;analogical reasoning;reasoning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/c7a7583e50c0cdf2ca4cefbfce8ba37d8735af9a.pdf", "author": "Michihiro Yasunaga;Xinyun Chen;Yujia Li;Panupong Pasupat;Jure Leskovec;Percy Liang;Ed H. Chi;Denny Zhou", "authorids": "~Michihiro_Yasunaga1;~Xinyun_Chen1;~Yujia_Li1;~Panupong_Pasupat1;~Jure_Leskovec1;~Percy_Liang1;~Ed_H._Chi1;~Denny_Zhou1", "gender": ";M;M;;;;M;F", "homepage": ";https://yujiali.github.io/;https://ppasupat.github.io/;http://cs.stanford.edu/~jure/;https://cs.stanford.edu/~pliang/;https://dennyzhou.github.io/;http://edchi.net;https://jungyhuk.github.io/", "dblp": "202/1809;67/3069;124/9178;l/JureLeskovec;04/1701;178/3277;13/310;", "google_scholar": "SieJYoEAAAAJ;https://scholar.google.ca/citations?user=UY7CtEwAAAAJ;BqKXIA8AAAAJ;Q_kKkIUAAAAJ;pouyVyUAAAAJ;UwLsYw8AAAAJ;VuWl-KUAAAAJ;d4W1UT0AAAAJ", "orcid": ";;;0000-0002-5411-923X;;;0000-0003-3230-5338;", "linkedin": ";;;leskovec/;;;edchi/;", "or_profile": "~Michihiro_Yasunaga1;~Yujia_Li1;~Panupong_Pasupat1;~Jure_Leskovec1;~Percy_Liang1;~Dengyong_Zhou2;~Ed_Chi1;~Xinyun_Chen2", "aff": "Stanford University;Google DeepMind;Google;Kumo.AI;Stanford University;Google DeepMind;Google;Google", "aff_domain": "stanford.edu;google.com;google.com;kumo.ai;stanford.edu;google.com;google.com;google.com", "position": "PhD student;Research Scientist;Employee;Chief Scientist;Associate Professor;Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nyasunaga2024large,\ntitle={Large Language Models as Analogical Reasoners},\nauthor={Michihiro Yasunaga and Xinyun Chen and Yujia Li and Panupong Pasupat and Jure Leskovec and Percy Liang and Ed H. Chi and Denny Zhou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AgDICX1h50}\n}", "github": "", "project": "", "reviewers": "fGoM;ExxM;DVcm;ey56", "pdf_size": 557807, "rating": "5;5;5;8", "confidence": "5;4;3;4", "soundness": "3;3;2;3", "contribution": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "49;40;39;32", "wc_strengths": "76;40;192;70", "wc_weaknesses": "87;117;7;264", "wc_questions": "41;45;35;80", "wc_review": "253;242;273;446", "wc_reply_reviewers": "38;375;0;16", "wc_reply_authors": "1071;1262;508;388", "reply_reviewers": "1;3;0;1", "reply_authors": "2;4;1;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 40.0, 6.041522986797286 ], "wc_strengths_avg": [ 94.5, 57.92020372892347 ], "wc_weaknesses_avg": [ 118.75, 93.00100805905278 ], "wc_questions_avg": [ 50.25, 17.541023345289748 ], "wc_review_avg": [ 303.5, 83.01957600469903 ], "wc_reply_reviewers_avg": [ 107.25, 155.17308883952785 ], "wc_reply_authors_avg": [ 807.25, 367.99549929312997 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6114845287628255960&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=AgDICX1h50", "pdf": "https://openreview.net/pdf?id=AgDICX1h50", "email": "stanford.edu;google.com;google.com;kumo.ai;stanford.edu;google.com;google.com;google.com", "author_num": 8, "aff_unique_index": "0;1;1;2;0;1;1;1", "aff_unique_norm": "Stanford University;Google;Kumo.AI", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.stanford.edu;https://deepmind.com;https://www.kumo.ai", "aff_unique_abbr": "Stanford;DeepMind;Kumo.AI", "aff_campus_unique_index": "0;2;0;2;2", "aff_campus_unique": "Stanford;;Mountain View", "aff_country_unique_index": "0;1;0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "OMNI: Open-endedness via Models of human Notions of Interestingness", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19242", "id": "AgM3MzT99c", "author_site": "Jenny Zhang, Joel Lehman, Kenneth Stanley, Jeff Clune", "tldr": "", "abstract": "Open-ended algorithms aim to learn new, interesting behaviors forever. That requires a vast environment search space, but there are thus infinitely many possible tasks. Even after filtering for tasks the current agent can learn (i.e., learning progress), countless learnable yet uninteresting tasks remain (e.g., minor variations of previously learned tasks). An Achilles Heel of open-endedness research is the inability to quantify (and thus prioritize) tasks that are not just learnable, but also $\\textit{interesting}$ (e.g., worthwhile and novel). We propose solving this problem by $\\textit{Open-endedness via Models of human Notions of Interestingness}$ (OMNI). The insight is that we can utilize foundation models (FMs) as a model of interestingness (MoI), because they $\\textit{already}$ internalize human concepts of interestingness from training on vast amounts of human-generated data, where humans naturally write about what they find interesting or boring. We show that FM-based MoIs improve open-ended learning by focusing on tasks that are both learnable $\\textit{and interesting}$, outperforming baselines based on uniform task sampling or learning progress alone. This approach has the potential to dramatically advance the ability to intelligently select which tasks to focus on next (i.e., auto-curricula), and could be seen as AI selecting its own next task to learn, facilitating self-improving AI and AI-Generating Algorithms.", "keywords": "Open-endedness;Auto-Curriculum Learning;Reinforcement Learning", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Jenny Zhang;Joel Lehman;Kenneth Stanley;Jeff Clune", "authorids": "~Jenny_Zhang1;~Joel_Lehman1;~Kenneth_Stanley1;~Jeff_Clune3", "gender": ";;M;", "homepage": ";http://joellehman.com;https://www.kenstanley.net/;", "dblp": ";47/8285;s/KennethOStanley;", "google_scholar": ";GcvxHWQAAAAJ;https://scholar.google.com.tw/citations?user=6Q6oO1MAAAAJ;", "orcid": ";;;", "linkedin": ";;kenneth-stanley-3a159b/;", "or_profile": "~Jenny_Zhang1;~Joel_Lehman1;~Kenneth_Stanley1;~Jeff_Clune3", "aff": ";Carper.AI;;", "aff_domain": ";carper.ai;;", "position": ";Research Advisor;;", "bibtex": "@inproceedings{\nzhang2024omni,\ntitle={{OMNI}: Open-endedness via Models of human Notions of Interestingness},\nauthor={Jenny Zhang and Joel Lehman and Kenneth Stanley and Jeff Clune},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AgM3MzT99c}\n}", "github": "", "project": "", "reviewers": "MxZx;LSLH;CZLW;qXzy", "pdf_size": 27141831, "rating": "3;6;8;8", "confidence": "4;5;3;4", "soundness": "2;2;3;3", "contribution": "2;3;2;3", "presentation": "1;4;3;2", "wc_summary": "211;127;109;134", "wc_strengths": "61;115;115;52", "wc_weaknesses": "192;236;223;459", "wc_questions": "190;102;2;11", "wc_review": "654;580;449;656", "wc_reply_reviewers": "0;0;12;31", "wc_reply_authors": "823;864;767;805", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 145.25, 39.04084399702445 ], "wc_strengths_avg": [ 85.75, 29.422567868899545 ], "wc_weaknesses_avg": [ 277.5, 106.00117923872357 ], "wc_questions_avg": [ 76.25, 76.44074502515004 ], "wc_review_avg": [ 584.75, 84.14682109265922 ], "wc_reply_reviewers_avg": [ 10.75, 12.676257334087218 ], "wc_reply_authors_avg": [ 814.75, 34.888214342382156 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.34554737023254406, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16474108468889890747&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=AgM3MzT99c", "pdf": "https://openreview.net/pdf?id=AgM3MzT99c", "email": ";carper.ai;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Carper.AI", "aff_unique_dep": "", "aff_unique_url": "https://www.carper.ai", "aff_unique_abbr": "Carper.AI", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "BatchPrompt: Accomplish more with less", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19241", "id": "Agyicd577r", "author_site": "Jianzhe Lin, Maurice Diesendruck, Liang Du, Robin Abraham", "tldr": "", "abstract": "The ever-increasing token limits of large language models (LLMs) have enabled long context as input. Many LLMs are trained and fine-tuned to perform zero/few-shot inference using instruction-based prompts. Prompts typically include a detailed task instruction, several examples, and a single data point for inference. This baseline is referred to as \u201cSinglePrompt\u201d in this paper. In terms of token count, when the data input is small compared to instructions and examples, this results in lower token utilization, compared with encoder-based models like fine-tuned BERT. This cost inefficiency, affecting inference speed and compute budget, counteracts many of the benefits that LLMs offer. This paper aims to alleviate this problem by batching multiple data points in each prompt, a strategy we refer to as \u201cBatchPrompt\u201d. We improve token utilization by increasing the \u201cdensity\u201d of data points, however, this cannot be done naively. Simple batching can degrade performance, especially as batch size increases, and data points can yield different answers depending on their position within a prompt. To address the quality issue while retaining high token utilization, we introduce Batch Permutation and Ensembling (BPE) for BatchPrompt \u2013 a simple majority vote over repeated permutations of data, that recovers label quality at the cost of more token usage. To counterbalance this cost, we further propose Self-reflection-guided EArly Stopping (SEAS), which can terminate the voting process early for data points that the LLM handles confidently. Our comprehensive experimental evaluation demonstrates that BPE + SEAS can boost the performance of BatchPrompt by a striking margin on a range of popular NLP tasks, including question answering (Boolq), textual entailment (RTE), and duplicate questions identification (QQP). This performance is even competitive with/higher than single-data prompting (SinglePrompt), while using far fewer LLM calls and input tokens. At batch size 32, our BatchPrompt + BPE + SEAS uses 15.7% the number of LLM calls, and achieves: Boolq accuracy 90.6% \u2192 90.9% with 27.4% tokens, QQP accuracy 87.2% \u2192 88.4% with 18.6% tokens, RTE accuracy 91.5% \u2192 91.1% with 30.8% tokens. We hope our simple yet effective approach will shed light on the future research of large language models. Code: github.com/microsoft/BatchPrompt", "keywords": "large language models;token-resource utilization;prompt", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/50783e8b30f12efd8f8bac8609eee0b52536a03c.zip", "author": "Jianzhe Lin;Maurice Diesendruck;Liang Du;Robin Abraham", "authorids": "~Jianzhe_Lin1;~Maurice_Diesendruck1;~Liang_Du3;~Robin_Abraham1", "gender": "M;M;;", "homepage": "http://jianzhelin.github.io;https://github.com/diesendruck;;", "dblp": "https://dblp.org/pers/l/Lin:Jianzhe.html;;;", "google_scholar": "https://scholar.google.ca/citations?user=6709egkAAAAJ;;aq4dG-AAAAAJ;aONpOEwAAAAJ", "orcid": ";;;", "linkedin": "jianzhe-lin-a4135baa/?originalSubdomain=ca;;;robin-abraham-b206059/", "or_profile": "~Jianzhe_Lin1;~Maurice_Diesendruck1;~Liang_Du3;~Robin_Abraham1", "aff": "Microsoft;Apple;Microsoft;Microsoft", "aff_domain": "microsoft.com;apple.com;microsoft.com;microsoft.com", "position": "Senior Applied Scientist;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nlin2024batchprompt,\ntitle={BatchPrompt: Accomplish more with less},\nauthor={Jianzhe Lin and Maurice Diesendruck and Liang Du and Robin Abraham},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=Agyicd577r}\n}", "github": "", "project": "", "reviewers": "7rFs;Nbfh;PJJX;pc53", "pdf_size": 808169, "rating": "5;6;6;8", "confidence": "4;4;4;3", "soundness": "3;3;3;4", "contribution": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "84;105;238;31", "wc_strengths": "36;51;86;21", "wc_weaknesses": "80;59;129;115", "wc_questions": "4;96;70;85", "wc_review": "204;311;523;252", "wc_reply_reviewers": "0;0;25;0", "wc_reply_authors": "322;430;802;270", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 114.5, 76.23155514614666 ], "wc_strengths_avg": [ 48.5, 24.109126902482387 ], "wc_weaknesses_avg": [ 95.75, 27.725214156071004 ], "wc_questions_avg": [ 63.75, 35.70976757135224 ], "wc_review_avg": [ 322.5, 121.80414607064901 ], "wc_reply_reviewers_avg": [ 6.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 456.0, 207.9326814139615 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7802084067485629782&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=Agyicd577r", "pdf": "https://openreview.net/pdf?id=Agyicd577r", "email": "microsoft.com;apple.com;microsoft.com;microsoft.com", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Microsoft;Apple", "aff_unique_dep": "Microsoft Corporation;Apple Inc.", "aff_unique_url": "https://www.microsoft.com;https://www.apple.com", "aff_unique_abbr": "Microsoft;Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "AhCdJ93Wmi", "title": "Graph Inference Acceleration by Bridging GNNs and MLPs with Self-Supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have demonstrated their effectiveness in a variety of graph learning tasks such as node classification and link prediction. However, GNN inference mainly relies on neighborhood aggregation, which limits the deployment in latency-sensitive (i.e., real-time) applications such as financial fraud detection. To solve this problem, recent works have proposed to distill knowledge from teacher GNNs to student Multi-Layer Perceptrons (MLPs) trained on node content for inference acceleration. Despite the progress, these studies still suffer insufficient exploration of structural information when inferring unseen nodes. To address this issue, we propose a new method (namely {\\bf SSL-GM}) to fully integrate rich structural information into MLPs by bridging \\textbf{G}NNs and \\textbf{M}LPs with Self-Supervised Learning (\\textbf{SSL}) for graph inference acceleration while improving model generalization capability. A key new insight of SSL-GM is that, without fetching their neighborhoods, the structural information of unseen nodes can be inferred solely from the nodes themselves with SSL. Specifically, SSL-GM employs self-supervised contrastive learning to align the representations encoded by graph context-aware GNNs and neighborhood dependency-free MLPs, fully integrating the structural information into MLPs. In particular, SSL-GM approximates the representations of GNNs using a non-parametric aggregator to avoid potential model collapse and exploits augmentation to facilitate the training; additionally, SSL-GM further incorporates reconstruction regulation to prevent representation shift caused by augmentation. Theoretically, we interpret our proposed SSL-GM through the principle of information bottleneck, demonstrating its generalization capability; we also analyze model capacity in incorporating structural information from the perspective of mutual information maximization and graph smoothness. Empirically, we demonstrate the superiority of SSL-GM over existing state-of-the-art models in both efficiency and effectiveness. In particular, SSL-GM obtains significant performance gains {\\bf (7$\\sim$26\\%)} in comparison to MLPs, and a remarkable acceleration of GNNs {\\bf (90$\\sim$126$\\times$)} on large-scale graph datasets.", "keywords": "Graph Neural Network;Self-supervised Learning;Inference Acceleration", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/c9321c31c53095863d027a8a5f2fac942a7c1cb6.zip", "author": "Zehong Wang;Zheyuan Zhang;Chuxu Zhang;Yanfang Ye", "authorids": "~Zehong_Wang2;~Zheyuan_Zhang5;~Chuxu_Zhang2;~Yanfang_Ye1", "gender": "M;M;;", "homepage": "https://zehong-wang.github.io/;https://jasonzhangzy1757.github.io/;;http://yes-lab.org/", "dblp": "319/7828;;;", "google_scholar": "-qXxOv0AAAAJ;qJURp_AAAAAJ;;egjr888AAAAJ", "orcid": "0000-0002-7670-6777;0009-0005-5918-6182;;", "linkedin": "zehong-wang-745b02286/;jasonzhangzy1757/;;", "or_profile": "~Zehong_Wang2;~Zheyuan_Zhang5;~Chuxu_Zhang2;~Yanfang_Ye1", "aff": "University of Notre Dame;University of Notre Dame;;University of Notre Dame", "aff_domain": "nd.edu;nd.edu;;nd.edu", "position": "PhD student;PhD student;;Associate Professor", "bibtex": "@misc{\nwang2024graph,\ntitle={Graph Inference Acceleration by Bridging {GNN}s and {MLP}s with Self-Supervised Learning},\nauthor={Zehong Wang and Zheyuan Zhang and Chuxu Zhang and Yanfang Ye},\nyear={2024},\nurl={https://openreview.net/forum?id=AhCdJ93Wmi}\n}", "github": "", "project": "", "reviewers": "eCUd;N5QN;rTyJ;RVGt;WTGk", "site": "https://openreview.net/forum?id=AhCdJ93Wmi", "pdf_size": 1297645, "rating": "3;3;5;5;6", "confidence": "4;3;5;4;2", "soundness": "2;2;2;3;3", "contribution": "2;2;2;2;3", "presentation": "3;2;3;3;3", "wc_summary": "32;81;84;72;75", "wc_strengths": "20;24;49;63;64", "wc_weaknesses": "353;251;185;94;102", "wc_questions": "1;8;6;37;4", "wc_review": "406;364;324;266;245", "wc_reply_reviewers": "172;0;491;0;0", "wc_reply_authors": "2553;2211;1429;1537;664", "reply_reviewers": "3;0;2;0;0", "reply_authors": "4;4;3;3;1", "rating_avg": [ 4.4, 1.2 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "contribution_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 68.8, 18.882796403075474 ], "wc_strengths_avg": [ 44.0, 18.772320048411704 ], "wc_weaknesses_avg": [ 197.0, 96.98453484963466 ], "wc_questions_avg": [ 11.2, 13.105723940324701 ], "wc_review_avg": [ 321.0, 59.80635417746178 ], "wc_reply_reviewers_avg": [ 132.6, 191.1811706209584 ], "wc_reply_authors_avg": [ 1678.8, 657.2349351639793 ], "reply_reviewers_avg": [ 1.0, 1.2649110640673518 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.19611613513818404, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fWIfMeuv-owJ:scholar.google.com/&scioq=Graph+Inference+Acceleration+by+Bridging+GNNs+and+MLPs+with+Self-Supervised+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "AhMEkBSdIV", "title": "LCA-on-the-Line: Benchmarking Out-of-Distribution Generalization with Class Taxonomies", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we address the challenge of assessing model generalization under Out-of-Distribution (OOD) conditions. We reintroduce the Least Common Ancestor (LCA) distance, a metric that has been largely overshadowed since ImageNet. By leveraging the WordNet hierarchy, we utilize the LCA to measure the taxonomic distance between labels and predictions, presenting it as a benchmark for model generalization. The LCA metric proves especially robust in comparison to previous state-of-the-art metrics when evaluating diverse models, including both vision-only and vision-language models on natural distribution shift datasets. To validate our benchmark's efficacy, we perform an extensive empirical study on 75 models spanning five distinct ImageNet-OOD datasets. Our findings reveal a strong linear correlation between in-domain ImageNet LCA scores and OOD Top1 performance across ImageNet-S/R/A/ObjectNet. This discovery gives rise to a novel evaluation framework termed \"LCA-on-the-Line\", facilitating unified and consistent assessments across a broad spectrum of models and datasets.\n\nBeside introducing an evaluative tool, we also delve into the intricate ties between the LCA metric and model generalization. By aligning model predictions more closely with the WordNet hierarchy and refining prompt engineering in zero-shot vision-language models, we offer tangible strategies to improve model generalization. We challenge the prevailing notion that LCA offers no added evaluative value over top-1 accuracy, our research provides invaluable insights and actionable techniques to enhance model robustness and generalization across various tasks and scenarios.", "keywords": "Out-of-Distribution Generalization;representation evaluation;Hierarchy;Vision Language Model;Class Taxonomy;Zero-shot", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/5f1a0827ba140be8acc7f46d9b1b87bff1f3f4a1.zip", "author": "Jia Shi", "authorids": "~Jia_Shi2", "gender": "M", "homepage": "https://www.linkedin.com/in/elvishelvisshi/", "dblp": "", "google_scholar": "asHobe0AAAAJ", "orcid": "", "linkedin": "elvishelvisshi/", "or_profile": "~Jia_Shi2", "aff": "Carnegie Mellon University", "aff_domain": "cmu.edu", "position": "MS student", "bibtex": "@misc{\nshi2024lcaontheline,\ntitle={{LCA}-on-the-Line: Benchmarking Out-of-Distribution Generalization with Class Taxonomies},\nauthor={Jia Shi},\nyear={2024},\nurl={https://openreview.net/forum?id=AhMEkBSdIV}\n}", "github": "", "project": "", "reviewers": "CuPJ;r8af;5fCB", "site": "https://openreview.net/forum?id=AhMEkBSdIV", "pdf_size": 12468190, "rating": "3;5;8", "confidence": "4;3;3", "soundness": "2;3;4", "contribution": "2;2;4", "presentation": "1;3;2", "wc_summary": "55;70;176", "wc_strengths": "14;165;60", "wc_weaknesses": "186;313;163", "wc_questions": "2;5;133", "wc_review": "257;553;532", "wc_reply_reviewers": "0;0;30", "wc_reply_authors": "1209;638;287", "reply_reviewers": "0;0;1", "reply_authors": "3;2;2", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 100.33333333333333, 53.85371131335539 ], "wc_strengths_avg": [ 79.66666666666667, 63.19458485942886 ], "wc_weaknesses_avg": [ 220.66666666666666, 65.9612680964283 ], "wc_questions_avg": [ 46.666666666666664, 61.05916984550496 ], "wc_review_avg": [ 447.3333333333333, 134.85877386691934 ], "wc_reply_reviewers_avg": [ 10.0, 14.142135623730951 ], "wc_reply_authors_avg": [ 711.3333333333334, 379.9599394088218 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.8029550685469661, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14865788217805581136&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "aff_unique_index": "0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "AhcxMGfqQn", "title": "Collaborative World Models: An Online-Offline Transfer RL Approach", "track": "main", "status": "Reject", "tldr": "", "abstract": "Training offline reinforcement learning (RL) models with visual inputs is challenging due to the coupling of overfitting issue in representation learning and the risk of overestimating true value functions. Recent work has attempted to alleviate the overestimation bias by encouraging conservative behaviors beyond the scope of the offline dataset. This paper, in contrast, tries to build flexible constraints for the offline policies without impeding the exploration of potential advantages. The key idea is to leverage an off-the-shelf RL simulator, with which can be easily interacted in an online manner. In this auxiliary domain, we perform an actor-critic algorithm whose value model is aligned to the target data and thus serves as a \u201c$\\textit{test bed}$\u201d for the offline policies. In this way, the online simulator can be used as the $\\textit{playground}$ for the offline agent, allowing for mildly-conservative value estimation. Experimental results demonstrate the remarkable effectiveness of our approach in challenging environments such as DeepMind Control, Meta-World, and RoboDesk. It outperforms existing offline visual RL approaches by substantial margins.", "keywords": "World models;reinforcement learning;visual control;transfer learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/232fe4e6270555bd90bf72f233503acb9b80d129.zip", "author": "Qi Wang;Junming Yang;Yunbo Wang;Xin Jin;Wenjun Zeng;Xiaokang Yang", "authorids": "~Qi_Wang26;~Junming_Yang1;~Yunbo_Wang2;~Xin_Jin8;~Wenjun_Zeng3;~Xiaokang_Yang1", "gender": "M;M;M;M;M;M", "homepage": "https://qiwang067.github.io/;https://junming-yang.github.io/;http://home.ustc.edu.cn/~jinxustc/;https://www.eias.ac.cn/h-col-187.html;https://icne.sjtu.edu.cn/info/1064/1078.htm;https://wyb15.github.io/", "dblp": "19/1924-80;191/4782.html;68/3340-14;57/145;06/3071-1.html;84/3894", "google_scholar": "OwW5XfMAAAAJ;L6R5ExQAAAAJ;byaSC-kAAAAJ;_cUfvYQAAAAJ;yDEavdMAAAAJ;C8bGfr0AAAAJ", "orcid": ";0000-0002-4261-6271;0000-0002-1820-8358;;0000-0003-4029-3322;", "linkedin": "qi-wang-chris-7a6670361/;;;;;", "or_profile": "~Qi_Wang26;~Junming_Yang1;~Xin_Jin8;~Wenjun_Zeng3;~Xiaokang_Yang1;~Yunbo_Wang1", "aff": "Shanghai Jiaotong University;Nanjing University of Posts and Telecommunications;Eastern Institute of Technology, Ningbo;Eastern Institute for Advanced Study;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;njupt.edu.cn;eitech.edu.cn;eias.ac.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Assistant Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nwang2024collaborative,\ntitle={Collaborative World Models: An Online-Offline Transfer {RL} Approach},\nauthor={Qi Wang and Junming Yang and Yunbo Wang and Xin Jin and Wenjun Zeng and Xiaokang Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=AhcxMGfqQn}\n}", "github": "", "project": "", "reviewers": "wybs;MD4J;77pB;G38A;mTFC", "site": "https://openreview.net/forum?id=AhcxMGfqQn", "pdf_size": 3818750, "rating": "3;3;5;5;6", "confidence": "2;2;4;2;4", "soundness": "2;2;3;3;3", "contribution": "2;2;2;2;2", "presentation": "2;3;3;3;3", "wc_summary": "76;83;126;141;122", "wc_strengths": "43;73;75;89;105", "wc_weaknesses": "154;206;187;116;409", "wc_questions": "158;21;113;42;87", "wc_review": "431;383;501;388;723", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "734;452;830;783;1281", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;2;2;2", "rating_avg": [ 4.4, 1.2 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 109.6, 25.476263462289758 ], "wc_strengths_avg": [ 77.0, 20.513410247932935 ], "wc_weaknesses_avg": [ 214.4, 102.00117646380359 ], "wc_questions_avg": [ 84.2, 49.109673181563736 ], "wc_review_avg": [ 485.2, 126.19730583495037 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 816.0, 267.10672024492385 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.748455199183749, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:M3nft3Kp0OAJ:scholar.google.com/&scioq=Collaborative+World+Models:+An+Online-Offline+Transfer+RL+Approach&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Nanjing University of Posts and Telecommunications;Eastern Institute of Technology;Eastern Institute for Advanced Study", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.njupt.edu.cn;https://www.eit.edu.cn;", "aff_unique_abbr": "SJTU;NJUPT;;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Nanjing;Ningbo", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "How Well Do Supervised 3D Models Transfer to Medical Imaging Tasks?", "status": "Oral", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19240", "id": "AhizIPytk4", "author_site": "Wenxuan Li, Alan Yuille, Zongwei Zhou", "tldr": "", "abstract": "The pre-training and fine-tuning paradigm has become prominent in transfer learning. For example, if the model is pre-trained on ImageNet and then fine-tuned to PASCAL, it can significantly outperform that trained on PASCAL from scratch. While ImageNet pre-training has shown enormous success, it is formed in 2D, and the learned features are for classification tasks; when transferring to more diverse tasks, like 3D image segmentation, its performance is inevitably compromised due to the deviation from the original ImageNet context. A significant challenge lies in the lack of large, annotated 3D datasets rivaling the scale of ImageNet for model pre-training. To overcome this challenge, we make two contributions. Firstly, we construct AbdomenAtlas 1.1 that comprises **9,262** three-dimensional computed tomography (CT) volumes with high-quality, per-voxel annotations of 25 anatomical structures and pseudo annotations of seven tumor types. Secondly, we develop a suite of models that are pre-trained on our AbdomenAtlas 1.1 for transfer learning. Our preliminary analyses indicate that the model trained only with 21 CT volumes, 672 masks, and 40 GPU hours has a transfer learning ability similar to the model trained with 5,050 (unlabeled) CT volumes and 1,152 GPU hours. More importantly, the transfer learning ability of supervised models can further scale up with larger annotated datasets, achieving significantly better performance than preexisting pre-trained models, irrespective of their pre-training methodologies or data sources. We hope this study can facilitate collective efforts in constructing larger 3D medical datasets and more releases of supervised pre-trained models.", "keywords": "Transfer Learning;Medical Image Analysis;Organ Segmentation", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Wenxuan Li;Alan Yuille;Zongwei Zhou", "authorids": "~Wenxuan_Li3;~Alan_Yuille1;~Zongwei_Zhou1", "gender": "F;M;M", "homepage": "https://github.com/WenxuanChelsea;;https://www.zongweiz.com/", "dblp": ";y/AlanLYuille;", "google_scholar": "tpNZM2YAAAAJ;;JVOeczAAAAAJ", "orcid": ";;0000-0002-3154-9851", "linkedin": ";;", "or_profile": "~Wenxuan_Li3;~Alan_Yuille1;~Zongwei_Zhou1", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jh.edu;johnshopkins.edu;jhu.edu", "position": "PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nli2024how,\ntitle={How Well Do Supervised 3D Models Transfer to Medical Imaging Tasks?},\nauthor={Wenxuan Li and Alan Yuille and Zongwei Zhou},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AhizIPytk4}\n}", "github": "", "project": "", "reviewers": "osvs;bY7Q;esgA;mSzX;fMfJ", "pdf_size": 1417390, "rating": "6;6;6;8;8", "confidence": "3;5;4;3;3", "soundness": "3;3;3;3;3", "contribution": "3;3;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "37;54;99;85;89", "wc_strengths": "29;29;82;52;146", "wc_weaknesses": "134;355;232;88;13", "wc_questions": "34;63;31;8;26", "wc_review": "234;501;444;233;274", "wc_reply_reviewers": "5;0;0;13;0", "wc_reply_authors": "931;3371;1257;586;391", "reply_reviewers": "1;0;0;1;0", "reply_authors": "2;6;2;1;1", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 72.8, 23.378622713923935 ], "wc_strengths_avg": [ 67.6, 43.765740025732455 ], "wc_weaknesses_avg": [ 164.4, 118.81178392735293 ], "wc_questions_avg": [ 32.4, 17.76063061943466 ], "wc_review_avg": [ 337.2, 112.90597858395276 ], "wc_reply_reviewers_avg": [ 3.6, 5.083306010855534 ], "wc_reply_authors_avg": [ 1307.2, 1073.5798805864426 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 1.8547236990991407 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6123724356957946, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5683284319199448880&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=AhizIPytk4", "pdf": "https://openreview.net/pdf?id=AhizIPytk4", "email": "jh.edu;johnshopkins.edu;jhu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Ai4L058yoO", "title": "Is Feature Extraction the most informative dimensionality reduction technique? Revisiting Unsupervised Feature Selection from a Dynamic Approach", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper compares unsupervised feature extraction and unsupervised feature selection techniques in the context of dimensionality reduction without using labeled data. Unsupervised feature extraction transforms the input space into a lower-dimensional representation by creating informative features that capture underlying patterns, leading to improved model performance. On the other hand, unsupervised feature selection chooses a subset of features based on predefined criteria, potentially overlooking important relationships and reducing the model's discriminative power. State-of-the-art researches suggest that feature extraction outperforms feature selection in terms of model accuracy and robustness. Leveraging the intrinsic structure of the data, unsupervised feature extraction provides richer representations, enhancing the model's ability to discern complex patterns. These paper proposes to revisit feature selection algorithms from a dynamic perspective, where the features are selected depending on the specific sample input. Through empirical evaluations, it will be demonstrated that unsupervised feature selection outperforms feature extraction, both in accuracy and data compression. These findings highlight the potential of unsupervised feature selection as a powerful approach for dimensionality reduction and improved model performance, particularly when labeled data is scarce or unavailable.", "keywords": "dynamic feature selection;unsupervised learning;dimensionality reduction", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Brais Cancela", "authorids": "~Brais_Cancela1", "gender": "M", "homepage": "", "dblp": "86/9834", "google_scholar": "https://scholar.google.es/citations?user=kFjVlJIAAAAJ", "orcid": "0000-0002-2295-4142", "linkedin": "cancelabarizo", "or_profile": "~Brais_Cancela1", "aff": "Universidad de La Coru\u00f1a", "aff_domain": "udc.es", "position": "Associate Professor", "bibtex": "@misc{\ncancela2024is,\ntitle={Is Feature Extraction the most informative dimensionality reduction technique? Revisiting Unsupervised Feature Selection from a Dynamic Approach},\nauthor={Brais Cancela},\nyear={2024},\nurl={https://openreview.net/forum?id=Ai4L058yoO}\n}", "github": "", "project": "", "reviewers": "tqPc;LSnn;eFwU;rUoV", "site": "https://openreview.net/forum?id=Ai4L058yoO", "pdf_size": 680743, "rating": "3;5;5;5", "confidence": "4;2;4;4", "soundness": "2;2;2;2", "contribution": "1;2;2;3", "presentation": "1;2;2;2", "wc_summary": "78;35;68;54", "wc_strengths": "37;27;56;48", "wc_weaknesses": "284;77;150;71", "wc_questions": "646;130;97;278", "wc_review": "1045;269;371;451", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.75, 16.145819892467525 ], "wc_strengths_avg": [ 42.0, 10.977249200050075 ], "wc_weaknesses_avg": [ 145.5, 85.79772724262573 ], "wc_questions_avg": [ 287.75, 217.7778397817372 ], "wc_review_avg": [ 534.0, 301.9950330717378 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:W9rLRHrjG2gJ:scholar.google.com/&scioq=Is+Feature+Extraction+the+most+informative+dimensionality+reduction+technique%3F+Revisiting+Unsupervised+Feature+Selection+from+a+Dynamic+Approach&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of A Coru\u00f1a", "aff_unique_dep": "", "aff_unique_url": "https://www.udc.es", "aff_unique_abbr": "UDC", "aff_country_unique_index": "0", "aff_country_unique": "Spain" }, { "id": "AialDkY6y3", "title": "Deep Graph Predictions using Dirac-Bianconi Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Viewing Graph Neural Networks as network dynamical systems on graphs has proven a fruitful inspiration for designing interesting GNN architectures. This work introduces the Dirac-Bianconi Graph Neural Network (DBGNN) based on Bianconi's topological Dirac equation on graphs. While heat equations based on network Laplacian tend to smooth out differences, Dirac equations typically feature long-range propagation. We indeed find that the DBGNN layer does not lead to an equilibration, or smoothing, of nodal features, even after hundreds of steps. A further distinguishing feature of the topological Dirac equation is that it treats edges and nodes on the same footing. Consequently, we expect DBGNN to be useful in contexts where edges encode more than mere logical connectivity, but have physical properties as well. We show competitive performance for molecular property prediction and superior performance for predicting the dynamic stability of power grids. In the case of power grids, DBGNN achieves robust out-of-distribution generalization, showing that structural relations are learned.", "keywords": "Graph Neural Networks;graph convolution;physic inspired machine learning", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/8c59b79853a3e1ff2a1e88642982c4b1f3b0f839.zip", "author": "Christian Nauck;Rohan Gorantla;Michael Lindner;Konstantin Sch\u00fcrholt;Antonia S J S Mey;Frank Hellmann", "authorids": "~Christian_Nauck1;~Rohan_Gorantla1;~Michael_Lindner1;~Konstantin_Sch\u00fcrholt1;~Antonia_S_J_S_Mey1;~Frank_Hellmann1", "gender": "M;M;;M;;Non-Binary", "homepage": ";;https://www.pik-potsdam.de/members/mlindner;https://kschuerholt.github.io/;https://mey-research.org;", "dblp": ";;;267/9297;;", "google_scholar": ";1FzTrKoAAAAJ;;refZxl4AAAAJ;_NNNlvMAAAAJ;", "orcid": "0000-0003-1972-9654;0000-0003-4344-0383;;;;0000-0001-5635-4949", "linkedin": ";rohangorantla/;;https://de.linkedin.com/in/konstantin-schuerholt/en;;", "or_profile": "~Christian_Nauck1;~Rohan_Gorantla1;~Michael_Lindner1;~Konstantin_Sch\u00fcrholt1;~Antonia_S_J_S_Mey1;~Frank_Hellmann1", "aff": ";University of Edinburgh, University of Edinburgh;;University of St. Gallen;University of Edinburgh, University of Edinburgh;Potsdam Institute for Climate Impact Research", "aff_domain": ";ed.ac.uk;;unisg.ch;ed.ac.uk;pik-potsdam.de", "position": ";PhD student;;PhD student;Assistant Professor;Researcher", "bibtex": "@misc{\nnauck2024deep,\ntitle={Deep Graph Predictions using Dirac-Bianconi Graph Neural Networks},\nauthor={Christian Nauck and Rohan Gorantla and Michael Lindner and Konstantin Sch{\\\"u}rholt and Antonia S J S Mey and Frank Hellmann},\nyear={2024},\nurl={https://openreview.net/forum?id=AialDkY6y3}\n}", "github": "", "project": "", "reviewers": "VKy2;YvSy;NtG7;Bj5J;3z7c", "site": "https://openreview.net/forum?id=AialDkY6y3", "pdf_size": 4935719, "rating": "3;3;5;5;6", "confidence": "4;4;3;3;4", "soundness": "3;3;2;2;4", "contribution": "2;2;3;2;3", "presentation": "2;2;2;2;4", "wc_summary": "74;110;160;84;191", "wc_strengths": "123;41;45;68;53", "wc_weaknesses": "210;116;140;267;176", "wc_questions": "83;90;1;99;76", "wc_review": "490;357;346;518;496", "wc_reply_reviewers": "0;0;0;669;8", "wc_reply_authors": "436;466;524;1301;893", "reply_reviewers": "0;0;0;2;1", "reply_authors": "1;1;1;3;3", "rating_avg": [ 4.4, 1.2 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 123.8, 44.901670347549434 ], "wc_strengths_avg": [ 66.0, 29.95997329771841 ], "wc_weaknesses_avg": [ 181.8, 53.225557770680055 ], "wc_questions_avg": [ 69.8, 35.23293913371406 ], "wc_review_avg": [ 441.4, 74.07455703546259 ], "wc_reply_reviewers_avg": [ 135.4, 266.81799039794896 ], "wc_reply_authors_avg": [ 724.0, 331.9632509781768 ], "reply_reviewers_avg": [ 0.6, 0.8 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4082482904638631, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10298937043062127716&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Edinburgh;University of St. Gallen;Potsdam Institute for Climate Impact Research", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ed.ac.uk;https://www.unisg.ch;https://www.pik-potsdam.de", "aff_unique_abbr": "Edinburgh;HSG;PIK", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "United Kingdom;Switzerland;Germany" }, { "id": "Aj1wftldeR", "title": "D5RL: Diverse Datasets for Data-Driven Deep Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline reinforcement learning algorithms hold the promise of enabling data-driven RL methods that do not require costly or dangerous real-world exploration and benefit from large pre-collected datasets. This in turn can facilitate real-world applications, as well as a more standardized approach to RL research. Furthermore, offline RL methods can provide effective initializations for online finetuning, overcoming challenges with exploration. However, evaluating progress on offline RL algorithms requires effective and challenging benchmarks that capture properties of real-world tasks, provide a range of task difficulties, and cover a range of challenges both in terms of the parameters of the domain (e.g., length of the horizon, sparsity of rewards) and the parameters of the data (e.g., narrow demonstration data or broad exploratory data). While considerable progress in offline RL in recent years has been enabled by simpler benchmark tasks, the most widely used datasets are increasingly saturating in performance and might fail to reflect properties of realistic tasks. We propose a new benchmark for offline RL that focuses on realistic simulations of robotic manipulation and locomotion environments, based on models of real-world robotic systems, and comprising a variety of data sources, including scripted data, over 20 hours of demonstrations and play-style data collected by human teleoperators, and other data sources. Our proposed benchmark covers state-based and image-based domains, and aims to test a number of real-world robot training challenges such as long-horizon manipulation, fine-grained motor control, imperfect controllers, and representation learning. Our proposed tasks vary in complexity from single instance to diverse scenarios with multiple distribution shifts, which can require significant robustness and generalization. Moreover, we support both offline RL evaluation and evaluation with online finetuning, with some of the tasks specifically designed to require both pretraining and finetuning. We hope that our proposed benchmark will facilitate further progress on both offline RL algorithms and algorithms designed for online finetuning from offline initialization.", "keywords": "Offline RL;Imitation Learning;Representation Learning", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Rafael Rafailov;Kyle Beltran Hatch;Anikait Singh;Aviral Kumar;Laura Smith;Ilya Kostrikov;Philippe Hansen-Estruch;Victor Kolev;Philip J. Ball;Jiajun Wu;Sergey Levine;Chelsea Finn", "authorids": "~Rafael_Rafailov1;~Kyle_Beltran_Hatch1;~Anikait_Singh1;~Aviral_Kumar2;~Laura_Smith1;~Ilya_Kostrikov1;~Philippe_Hansen-Estruch1;~Victor_Kolev1;~Philip_J._Ball2;~Jiajun_Wu1;~Sergey_Levine1;~Chelsea_Finn1", "gender": "M;M;M;M;F;M;M;M;M;F;M;M", "homepage": "https://rmrafailov.github.io/;https://khatch31.github.io/;https://asap7772.github.io/;https://aviralkumar2907.github.io/;;;https://victorkolev.github.io;https://jiajunwu.com;https://people.eecs.berkeley.edu/~svlevine/;https://ai.stanford.edu/~cbfinn/;https://philipjball.github.io/;https://www.linkedin.com/in/philippe-hansen-estruch-b05559210/", "dblp": "272/5358;;302/3876;202/7961;54/11024;https://dblp.org/pers/k/Kostrikov:Ilya.html;;117/4768;80/7594;131/1783;244/1972;289/6990.html", "google_scholar": "TwABcRgAAAAJ;;lPaISmIAAAAJ;;;PTS2AOgAAAAJ;;2efgcS0AAAAJ;8R35rCwAAAAJ;vfPE6hgAAAAJ;5Cm8L90AAAAJ;UzjHQLcAAAAJ", "orcid": ";;;;;;;0000-0002-4176-343X;;;;", "linkedin": ";kyle-h-3402a792/;asap7772/;;;;;jiajunwu/;;;;philippe-hansen-estruch-b05559210/", "or_profile": "~Rafael_Rafailov1;~Kyle_Beltran_Hatch1;~Anikait_Singh1;~Aviral_Kumar2;~Laura_Smith1;~Ilya_Kostrikov1;~Victor_Kolev1;~Jiajun_Wu1;~Sergey_Levine1;~Chelsea_Finn1;~Philip_Ball1;~Philippe_I_Hansen-Estruch1", "aff": "Stanford University;Toyota Research Institute;Stanford University;Google DeepMind;University of California, Berkeley;OpenAI;Stanford University;Stanford University;Google;Google;Google DeepMind;Meta Facebook", "aff_domain": "stanford.edu;tri.global;stanford.edu;google.com;berkeley.edu;openai.com;stanford.edu;stanford.edu;google.com;google.com;google.com;meta.com", "position": "PhD student;Researcher;PhD student;Researcher;PhD student;Member of Technical Staff;Undergrad student;Assistant Professor;Research Scientist;Research Scientist;Researcher;Intern", "bibtex": "@misc{\nrafailov2024drl,\ntitle={D5{RL}: Diverse Datasets for Data-Driven Deep Reinforcement Learning},\nauthor={Rafael Rafailov and Kyle Beltran Hatch and Anikait Singh and Aviral Kumar and Laura Smith and Ilya Kostrikov and Philippe Hansen-Estruch and Victor Kolev and Philip J. Ball and Jiajun Wu and Sergey Levine and Chelsea Finn},\nyear={2024},\nurl={https://openreview.net/forum?id=Aj1wftldeR}\n}", "github": "", "project": "", "reviewers": "55ef;mGUu;axPt;VAo4", "site": "https://openreview.net/forum?id=Aj1wftldeR", "pdf_size": 1543814, "rating": "3;5;5;6", "confidence": "4;3;4;3", "soundness": "2;2;2;3", "contribution": "1;2;2;2", "presentation": "2;3;2;2", "wc_summary": "66;85;43;48", "wc_strengths": "66;74;27;62", "wc_weaknesses": "296;308;314;50", "wc_questions": "2;57;6;5", "wc_review": "430;524;390;165", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "627;760;641;152", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 60.5, 16.53027525481654 ], "wc_strengths_avg": [ 57.25, 17.99131735032207 ], "wc_weaknesses_avg": [ 242.0, 111.04053313993049 ], "wc_questions_avg": [ 17.5, 22.85278976405288 ], "wc_review_avg": [ 377.25, 131.8434203894908 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 545.0, 232.70904580613106 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9409474801396491122&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;2;3;4;0;0;2;2;2;5", "aff_unique_norm": "Stanford University;Toyota Research Institute;Google;University of California, Berkeley;OpenAI;Meta", "aff_unique_dep": ";;Google DeepMind;;;Meta Platforms, Inc.", "aff_unique_url": "https://www.stanford.edu;https://www.tri.global;https://deepmind.com;https://www.berkeley.edu;https://openai.com;https://meta.com", "aff_unique_abbr": "Stanford;TRI;DeepMind;UC Berkeley;OpenAI;Meta", "aff_campus_unique_index": "0;0;2;0;0;3;3", "aff_campus_unique": "Stanford;;Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "Ali45HfJqJ", "title": "Observer Uncertainty of Learning in Games from a Covariance Perspective", "track": "main", "status": "Reject", "tldr": "", "abstract": "We investigate the accuracy of prediction in deterministic learning dynamics of zero-sum games with random initializations, specifically focusing on observer uncertainty and its relationship to the evolution of covariances. Zero-sum games are a prominent field of interest in machine learning due to their various applications, such as Generative Adversarial Networks. Concurrently, the accuracy of observation in dynamical systems from mechanics has long been a classic subject of investigation since the discovery of the Heisenberg Uncertainty Principle. This principle employs covariance and standard deviation of particle states to measure observation accuracy. In this study, we bring these two approaches together to analyze the follow-the-regularized-leader (FTRL) algorithm in two-player zero-sum games. We provide growth rates of covariance information for continuous-time FTRL, as well as its two canonical discretization methods (Euler and symplectic). Our analysis and experiments shows that employing symplectic discretization enhances the accuracy of prediction in learning dynamics.", "keywords": "covariance;symplectic Euler method;follow-the-regularized-leader (FTRL) algorithm;uncertainty;zero-sum games", "primary_area": "optimization", "supplementary_material": "/attachment/35d9f855c0d4f1c58d4c78531c44dec56de73857.zip", "author": "Yi Feng;Georgios Piliouras;Xiao Wang", "authorids": "~Yi_Feng3;~Georgios_Piliouras1;~Xiao_Wang4", "gender": "M;;", "homepage": "https://sites.google.com/view/yifeng95524/home;;", "dblp": ";62/1236;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yi_Feng3;~Georgios_Piliouras1;~Xiao_Wang4", "aff": "Shanghai University of Finance and Economics;Singapore University of Technology and Design;", "aff_domain": "shufe.edu;sutd.edu.sg;", "position": "PhD student;Associate Professor;", "bibtex": "@misc{\nfeng2024observer,\ntitle={Observer Uncertainty of Learning in Games from a Covariance Perspective},\nauthor={Yi Feng and Georgios Piliouras and Xiao Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=Ali45HfJqJ}\n}", "github": "", "project": "", "reviewers": "kXrY;1edF;3XfB;snif", "site": "https://openreview.net/forum?id=Ali45HfJqJ", "pdf_size": 1356653, "rating": "5;6;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "contribution": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "69;124;106;45", "wc_strengths": "117;93;30;30", "wc_weaknesses": "86;469;24;157", "wc_questions": "34;106;7;4", "wc_review": "306;792;167;236", "wc_reply_reviewers": "78;18;0;40", "wc_reply_authors": "1013;1051;34;1077", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;1;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.0, 30.87879531328902 ], "wc_strengths_avg": [ 67.5, 38.44801685392889 ], "wc_weaknesses_avg": [ 184.0, 171.14175411044494 ], "wc_questions_avg": [ 37.75, 41.09972627646077 ], "wc_review_avg": [ 375.25, 245.5782716365599 ], "wc_reply_reviewers_avg": [ 34.0, 29.086079144497972 ], "wc_reply_authors_avg": [ 793.75, 439.231929053433 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bGQakyIdxZAJ:scholar.google.com/&scioq=Observer+Uncertainty+of+Learning+in+Games+from+a+Covariance+Perspective&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Shanghai University of Finance and Economics;Singapore University of Technology and Design", "aff_unique_dep": ";", "aff_unique_url": "http://www.sufe.edu.cn;https://www.sutd.edu.sg", "aff_unique_abbr": "SUFE;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Singapore" }, { "id": "AlkANue4lm", "title": "Non-Redundant Graph Neural Networks with Improved Expressiveness", "track": "main", "status": "Reject", "tldr": "", "abstract": "Message passing graph neural networks iteratively compute node embeddings by aggregating messages from all neighbors. This procedure can be viewed as a neural variant of the Weisfeiler-Leman method, which limits their expressive power. Moreover, oversmoothing and oversquashing restrict the number of layers these networks can effectively utilize. The repeated exchange and encoding of identical information in message passing amplifies oversquashing. We propose a novel aggregation scheme based on neighborhood trees, which allows for controlling the redundancy by pruning branches of the unfolding trees underlying standard message passing. We prove that reducing redundancy improves expressivity and experimentally show that it alleviates oversquashing. We investigate the interaction between redundancy in message passing and redundancy in computation and propose a compact representation of neighborhood trees, from which we compute node and graph embeddings via a neural tree canonization technique. Our method is provably more expressive than the Weisfeiler-Leman method, less susceptible to oversquashing than message passing neural networks, and provides high classification accuracy on widely-used benchmark datasets.", "keywords": "graph neural networks;message passing;Weisfeiler-Leman;expressivity", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Franka Bause;Samir Moustafa;Johannes Langguth;Wilfried N. Gansterer;Nils Morten Kriege", "authorids": "~Franka_Bause1;~Samir_Moustafa1;~Johannes_Langguth2;~Wilfried_N._Gansterer1;~Nils_Morten_Kriege1", "gender": ";M;Not Specified;;M", "homepage": "https://dm.cs.univie.ac.at/team/person/112939/;http://samirmoustafa.net/;https://www.simula.no/people/langguth;;https://kriegegroup.univie.ac.at/", "dblp": "234/8688;https://dblp.uni-trier.de/pid/358/6919;;;97/8178", "google_scholar": "UTQlpH8AAAAJ;tAUUlfAAAAAJ;;;https://scholar.google.de/citations?user=wGT17PcAAAAJ", "orcid": "0000-0003-4202-3692;0000-0002-0674-9667;;;0000-0003-2645-947X", "linkedin": ";samirmoustafa/;;;", "or_profile": "~Franka_Bause1;~Samir_Moustafa1;~Johannes_Langguth2;~Wilfried_N._Gansterer1;~Nils_Morten_Kriege1", "aff": "Universit\u00e4t Vienna;Universit\u00e4t Vienna;Simula Research Laboratory;;Universit\u00e4t Vienna", "aff_domain": "univie.ac.at;univie.ac.at;simula.no;;univie.ac.at", "position": "PhD student;PhD student;Researcher;;Associate Professor", "bibtex": "@misc{\nbause2024nonredundant,\ntitle={Non-Redundant Graph Neural Networks with Improved Expressiveness},\nauthor={Franka Bause and Samir Moustafa and Johannes Langguth and Wilfried N. Gansterer and Nils Morten Kriege},\nyear={2024},\nurl={https://openreview.net/forum?id=AlkANue4lm}\n}", "github": "", "project": "", "reviewers": "h7VD;zntd;e2WG;8Vsr", "site": "https://openreview.net/forum?id=AlkANue4lm", "pdf_size": 444009, "rating": "3;3;5;6", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "contribution": "2;2;2;3", "presentation": "2;3;2;4", "wc_summary": "59;36;57;48", "wc_strengths": "23;42;27;45", "wc_weaknesses": "150;98;374;79", "wc_questions": "156;23;39;93", "wc_review": "388;199;497;265", "wc_reply_reviewers": "93;0;77;40", "wc_reply_authors": "277;314;410;287", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 50.0, 9.082951062292475 ], "wc_strengths_avg": [ 34.25, 9.41740410091868 ], "wc_weaknesses_avg": [ 175.25, 117.65495102204582 ], "wc_questions_avg": [ 77.75, 52.093065757353926 ], "wc_review_avg": [ 337.25, 114.48662585647286 ], "wc_reply_reviewers_avg": [ 52.5, 35.892199709686224 ], "wc_reply_authors_avg": [ 322.0, 52.578512721453045 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:81w9TdYd-DoJ:scholar.google.com/&scioq=Non-Redundant+Graph+Neural+Networks+with+Improved+Expressiveness&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Vienna;Simula Research Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://univie.ac.at;https://www.simula.no", "aff_unique_abbr": "UV;Simula", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Austria;Norway" }, { "id": "AnPX5Jual9", "title": "Rotative Factorization Machines", "track": "main", "status": "Reject", "tldr": "", "abstract": "Feature interaction learning, which focuses on capturing the complex relationships among multiple features, is crucial in various real-world predictive tasks.\nHowever, most feature interaction approaches empirically enumerate all feature interactions within a predefined maximal order, which leads to suboptimal results due to the restricted learning capacity.\nSome recent studies propose intricate transformations to convert the feature interaction orders into learnable parameters, enabling them to automatically learn the interactions from data.\nDespite the progress, the interaction order of each feature is often independently learned, which lacks the flexibility to capture the feature dependencies in the varying context.\nIn addition, they can only model the feature interactions within a bounded order due to the exponential growth of the interaction terms.\nTo address these issues, we present a Rotative Factorization Machine (RFM).\nUnlike prior studies, RFM represents each feature as a polar angle in the complex plane.\nAs such, the feature interactions are converted into a series of complex rotations, where the orders are cast into the rotation coefficients, thereby allowing for the learning of arbitrarily large order. \nFurther, we propose a novel self-attentive rotation function that models the rotation coefficients through a rotation-based attention mechanism, which can adaptively learn the interaction orders from different interaction contexts.\nMoreover, it incorporates a modulus amplification network to learn the modulus of the complex features that further enhances the representations.\nSuch a network can adaptively capture the feature interactions in the varying context, with no need of predefined order coefficients.\nExtensive experiments conducted on five widely used datasets have demonstrated the effectiveness of our approach.", "keywords": "Feature Interaction;Neural Networks;Self-Attentive Rotation", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/006fcd0b1329983605b37ad954cf666d40109fab.zip", "author": "Zhen Tian;Yuhong Shi;Xiangkun Wu;Xin Zhao;Ji-Rong Wen", "authorids": "~Zhen_Tian1;~Yuhong_Shi1;~Xiangkun_Wu1;~Xin_Zhao10;~Ji-Rong_Wen1", "gender": "M;F;M;M;M", "homepage": "https://www.tianzhen.xyz;https://github.com/littlerain51;https://github.com/as112sa;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "84/8525-1;;;https://dblp.uni-trier.de/pid/52/8700.html;w/JRWen", "google_scholar": "MBDadZUAAAAJ;;;JNhNacoAAAAJ;tbxCHJgAAAAJ", "orcid": "0000-0001-5569-2591;;;0000-0002-8333-6196;0000-0002-9777-9676", "linkedin": ";;;;", "or_profile": "~Zhen_Tian1;~Yuhong_Shi1;~Xiangkun_Wu1;~Xin_Zhao10;~Ji-Rong_Wen1", "aff": "Renmin University of China;Zhejiang University;Zhejiang University;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;zju.edu.cn;zju.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "MS student;MS student;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\ntian2024rotative,\ntitle={Rotative Factorization Machines},\nauthor={Zhen Tian and Yuhong Shi and Xiangkun Wu and Xin Zhao and Ji-Rong Wen},\nyear={2024},\nurl={https://openreview.net/forum?id=AnPX5Jual9}\n}", "github": "", "project": "", "reviewers": "7fiD;5eVE;LHqT;f3tV", "site": "https://openreview.net/forum?id=AnPX5Jual9", "pdf_size": 1019592, "rating": "3;5;5;6", "confidence": "2;4;5;5", "soundness": "2;3;2;4", "contribution": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "90;73;25;29", "wc_strengths": "54;21;10;42", "wc_weaknesses": "103;33;174;165", "wc_questions": "115;5;292;161", "wc_review": "362;132;501;397", "wc_reply_reviewers": "0;0;0;18", "wc_reply_authors": "849;487;1582;1104", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;3;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 54.25, 27.94078560098123 ], "wc_strengths_avg": [ 31.75, 17.239127008059313 ], "wc_weaknesses_avg": [ 118.75, 56.55251983775789 ], "wc_questions_avg": [ 143.25, 102.89891884757584 ], "wc_review_avg": [ 348.0, 134.77944947209124 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 1005.5, 398.55645773215116 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9365858115816939, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12588312293281606905&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Renmin University of China;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "RUC;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "AnuHbhwv9Q", "title": "Out of the Ordinary: Spectrally Adapting Regression for Covariate Shift", "track": "main", "status": "Reject", "tldr": "", "abstract": "Designing deep neural network classifiers that perform robustly on distributions differing from the available training data is an active area of machine learning research. However, out-of-distribution generalization for regression---the analogous problem for modeling continuous targets---remains relatively unexplored. To tackle this problem, we return to first principles and analyze how the closed-form solution for ordinary least squares (OLS) regression is sensitive to covariate shift. We characterize the out-of-distribution risk of the OLS model in terms of the eigenspectrum decomposition of the source and target data. We then use this insight to propose a method for adapting the weights of the last layer of a pre-trained neural regression model to perform better on input data originating from a different distribution. We demonstrate how this lightweight spectral adaptation procedure can improve out-of-distribution performance in a suite of both synthetic and real-world experiments.", "keywords": "Distribution-Shift;Domain-Adaptation;Robust-Machine-Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/26f313c9f1b463906a918f78389d8f790ee44d47.zip", "author": "Benjamin Eyre;Elliot Creager;David Madras;Vardan Papyan;Richard Zemel", "authorids": "~Benjamin_Eyre1;~Elliot_Creager1;~David_Madras1;~Vardan_Papyan1;~Richard_Zemel1", "gender": "M;M;M;M;M", "homepage": ";https://ecreager.github.io/;http://www.cs.toronto.edu/~madras/;https://sites.google.com/view/vardan-papyan;http://www.cs.columbia.edu/~zemel", "dblp": ";182/2055;188/6211;173/9783;16/6366", "google_scholar": "https://scholar.google.ca/citations?user=Ww1QOOkAAAAJ;boebIUcAAAAJ;MgnNDpkAAAAJ;https://scholar.google.co.il/citations?user=VrE-Gd4AAAAJ;https://scholar.google.ca/citations?user=iBeDoRAAAAAJ", "orcid": ";0009-0004-7122-3866;;;", "linkedin": ";;;;", "or_profile": "~Benjamin_Eyre1;~Elliot_Creager1;~David_Madras1;~Vardan_Papyan1;~Richard_Zemel1", "aff": "Google;University of Waterloo;Google;University of Toronto;Department of Computer Science, University of Toronto", "aff_domain": "google.com;uwaterloo.ca;google.com;toronto.edu;cs.toronto.edu", "position": "Intern;Assistant Professor;Researcher;Assistant Professor;Full Professor", "bibtex": "@misc{\neyre2024out,\ntitle={Out of the Ordinary: Spectrally Adapting Regression for Covariate Shift},\nauthor={Benjamin Eyre and Elliot Creager and David Madras and Vardan Papyan and Richard Zemel},\nyear={2024},\nurl={https://openreview.net/forum?id=AnuHbhwv9Q}\n}", "github": "", "project": "", "reviewers": "11sP;TGia;ZgCX;FNwT", "site": "https://openreview.net/forum?id=AnuHbhwv9Q", "pdf_size": 1038725, "rating": "5;6;6;8", "confidence": "3;3;4;4", "soundness": "2;2;3;3", "contribution": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "102;105;82;179", "wc_strengths": "74;52;106;147", "wc_weaknesses": "97;190;75;120", "wc_questions": "168;4;44;305", "wc_review": "441;351;307;751", "wc_reply_reviewers": "497;16;0;224", "wc_reply_authors": "942;179;203;999", "reply_reviewers": "2;1;0;2", "reply_authors": "3;1;2;4", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 117.0, 36.871398129173244 ], "wc_strengths_avg": [ 94.75, 35.75874019033668 ], "wc_weaknesses_avg": [ 120.5, 43.165379646193315 ], "wc_questions_avg": [ 130.25, 117.62307384182748 ], "wc_review_avg": [ 462.5, 173.42649739875392 ], "wc_reply_reviewers_avg": [ 184.25, 201.0278276756728 ], "wc_reply_authors_avg": [ 580.75, 390.3628920632698 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=711905381529530377&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "Google;University of Waterloo;University of Toronto", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://uwaterloo.ca;https://www.utoronto.ca", "aff_unique_abbr": "Google;UW;U of T", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Mountain View;;Toronto", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "United States;Canada" }, { "id": "Ao4O1kNK9h", "title": "Scaling Properties For Artificial Neural Network Models of the $\\textit{C. elegans}$ Nervous System", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The nematode worm $\\textit{C. elegans}$ enables straightforward optical measurement of neural activity, presenting a unique platform for exploring intrinsic neural dynamics. This paper investigates the scaling properties essential for self-supervised neural activity prediction based on past neural data, omitting behavioral aspects. Specifically, we investigate how predictive accuracy, quantified by the mean squared error (MSE), scales with the amount of training data, considering variables such as the number of neurons recorded, recording duration, and diversity of datasets. We also examine the relationship between these scaling properties and various parameters of artificial neural network models (ANNs), including size, architecture, and hyperparameters. Employing the nervous system of $\\textit{C. elegans}$ as an experimental platform, we elucidate the critical influence of data volume and model complexity in self-supervised neural prediction, demonstrating a logarithmic decrease in the MSE with an increase in the amount of training data, consistent across diverse datasets. Additionally, we observe nonlinear changes in MSE as the size of the ANN model varies. These findings emphasize the need for enhanced high-throughput tools for extended imaging of entire mesoscale nervous systems to acquire sufficient data for developing highly accurate ANN models of neural dynamics, with significant implications for systems neuroscience and biologically-inspired AI.", "keywords": "$\\textit{C. elegans}$;scaling properties;neural dynamics;self-supervised prediction;ANNs", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Quilee Simeon;Leandro Risso Ven\u00e2ncio;Kaiya Ivy Zhao;Aran Nayebi;Michael Skuhersky;Edward Boyden;Guangyu Robert Yang", "authorids": "~Quilee_Simeon1;~Leandro_Risso_Ven\u00e2ncio1;~Kaiya_Ivy_Zhao1;~Aran_Nayebi2;~Michael_Skuhersky1;~Edward_Boyden1;~Guangyu_Robert_Yang1", "gender": "M;M;F;;Not Specified;M;M", "homepage": "https://qsimeon.github.io/;;https://kyzhao-ivy.github.io/;https://anayebi.github.io/;http://web.mit.edu/vex/www/;http://synthneuro.org;https://www.metaconscious.org/", "dblp": ";;;43/7661;;;", "google_scholar": ";;N0yuICkAAAAJ;https://scholar.google.com/citations?hl=en;kWibszMAAAAJ;q2rHA5QAAAAJ;hrI8aH8AAAAJ", "orcid": "0000-0003-4561-5087;0009-0002-0924-5218;;;;;0000-0002-8919-4248", "linkedin": "quilee-simeon-7843a3178/;rissov-leandro/;kaiya-zhao-910830218/;;;edboyden/;", "or_profile": "~Quilee_Simeon1;~Leandro_Risso_Ven\u00e2ncio1;~Kaiya_Ivy_Zhao1;~Aran_Nayebi2;~Michael_Skuhersky1;~Edward_Boyden1;~Guangyu_Robert_Yang1", "aff": "Massachusetts Institute of Technology;Universidade Federal de S\u00e3o Carlos;Fudan University;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;ufscar.br;fudan.edu.cn;mit.edu;;mit.edu;mit.edu", "position": "PhD Student;Undergrad student;Undergrad student;Postdoc;;Full Professor;Assistant Professor", "bibtex": "@misc{\nsimeon2024scaling,\ntitle={Scaling Properties For Artificial Neural Network Models of the \\${\\textbackslash}textit\\{C. elegans\\}\\$ Nervous System},\nauthor={Quilee Simeon and Leandro Risso Ven{\\^a}ncio and Kaiya Ivy Zhao and Aran Nayebi and Michael Skuhersky and Edward Boyden and Guangyu Robert Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=Ao4O1kNK9h}\n}", "github": "", "project": "", "reviewers": "3D8y;3Phf;KaWC", "site": "https://openreview.net/forum?id=Ao4O1kNK9h", "pdf_size": 944569, "rating": "3;5;5", "confidence": "5;2;4", "soundness": "2;2;2", "contribution": "2;2;2", "presentation": "1;3;3", "wc_summary": "150;38;58", "wc_strengths": "50;20;54", "wc_weaknesses": "638;130;138", "wc_questions": "49;595;271", "wc_review": "887;783;521", "wc_reply_reviewers": "325;272;85", "wc_reply_authors": "1298;1431;648", "reply_reviewers": "2;1;1", "reply_authors": "2;3;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 82.0, 48.771576421791686 ], "wc_strengths_avg": [ 41.333333333333336, 15.173075568988056 ], "wc_weaknesses_avg": [ 302.0, 237.61032525264272 ], "wc_questions_avg": [ 305.0, 224.19634252146042 ], "wc_review_avg": [ 730.3333333333334, 153.98989865860972 ], "wc_reply_reviewers_avg": [ 227.33333333333334, 102.94442945373758 ], "wc_reply_authors_avg": [ 1125.6666666666667, 342.09777679617986 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7559289460184544, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Universidade Federal de S\u00e3o Carlos;Fudan University", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;http://www.ufscar.br;https://www.fudan.edu.cn", "aff_unique_abbr": "MIT;UFSCar;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0;0", "aff_country_unique": "United States;Brazil;China" }, { "id": "AoRIT2Uzfg", "title": "DRMGuard: Defending Deep Regression Models against Backdoor Attacks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep regression models are used in a wide variety of safety-critical applications, but are vulnerable to backdoor attacks. Although many defenses have been proposed for classification models, they are ineffective as they do not consider the uniqueness of regression models. First, the outputs of regression models are continuous values instead of discretized labels. Thus, the potential infected target of a backdoored regression model has infinite possibilities, which makes it impossible to be determined by existing defenses. Second, the backdoor behavior of backdoored deep regression models is triggered by the activation values of all the neurons in the feature space, which makes it difficult to be detected and mitigated using existing defenses. To resolve these problems, we propose DRMGuard, the first defense to identify if a deep regression model in the image domain is backdoored or not. DRMGuard formulates the optimization problem for reverse engineering based on the unique output-space and feature-space characteristics of backdoored deep regression models. We conduct extensive evaluations on two regression tasks and four datasets. The results show that DRMGuard can consistently defend against various backdoor attacks. We also generalize four state-of-the-art defenses designed for classifiers to regression models, and compare DRMGuard with them. The results show that DRMGuard significantly outperforms all those defenses. The code will be open-sourced upon paper acceptance.", "keywords": "Defend;reverse engineering;backdoor attack;deep regression model", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/9db40cbe5e38cc894ee3ff27e9a76d803ce21ba0.pdf", "author": "Lingyu Du;Yupei Liu;Jinyuan Jia;Guohao Lan", "authorids": "~Lingyu_Du1;~Yupei_Liu1;~Jinyuan_Jia2;~Guohao_Lan1", "gender": "M;M;;M", "homepage": "https://github.com/LingyuDu;https://liu00222.github.io/;https://jinyuan-jia.github.io/;https://guohao.netlify.app/", "dblp": ";204/1178;24/5124-1.html;178/9755.html", "google_scholar": ";52VEwW8AAAAJ;iyg4ytkAAAAJ;1ebZN5gAAAAJ", "orcid": ";0000-0003-4300-758X;0000-0002-9785-7769;", "linkedin": ";yupei-liu-39236912b;;", "or_profile": "~Lingyu_Du1;~Yupei_Liu1;~Jinyuan_Jia2;~Guohao_Lan1", "aff": "Delft University of Technology;MathWorks;Pennsylvania State University;Delft University of Technology", "aff_domain": "tudelft.nl;mathworks.com;psu.edu;tudelft.nl", "position": "PhD student;Software Engineer;Assistant Professor;Assistant Professor", "bibtex": "@misc{\ndu2024drmguard,\ntitle={{DRMG}uard: Defending Deep Regression Models against Backdoor Attacks},\nauthor={Lingyu Du and Yupei Liu and Jinyuan Jia and Guohao Lan},\nyear={2024},\nurl={https://openreview.net/forum?id=AoRIT2Uzfg}\n}", "github": "", "project": "", "reviewers": "tXTC;FMPV;WvrB", "site": "https://openreview.net/forum?id=AoRIT2Uzfg", "pdf_size": 1298267, "rating": "3;3;8", "confidence": "4;5;5", "soundness": "3;1;3", "contribution": "1;2;3", "presentation": "3;2;3", "wc_summary": "35;136;45", "wc_strengths": "15;17;24", "wc_weaknesses": "53;112;127", "wc_questions": "9;59;4", "wc_review": "112;324;200", "wc_reply_reviewers": "0;0;24", "wc_reply_authors": "792;653;699", "reply_reviewers": "0;0;1", "reply_authors": "3;3;2", "rating_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 45.43860326491855 ], "wc_strengths_avg": [ 18.666666666666668, 3.8586123009300755 ], "wc_weaknesses_avg": [ 97.33333333333333, 31.94091767971331 ], "wc_questions_avg": [ 24.0, 24.8327740429189 ], "wc_review_avg": [ 212.0, 86.96359391530842 ], "wc_reply_reviewers_avg": [ 8.0, 11.313708498984761 ], "wc_reply_authors_avg": [ 714.6666666666666, 57.81772123569805 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:p41sdQqsYE8J:scholar.google.com/&scioq=DRMGuard:+Defending+Deep+Regression+Models+against+Backdoor+Attacks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Delft University of Technology;MathWorks;Pennsylvania State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tudelft.nl;https://www.mathworks.com;https://www.psu.edu", "aff_unique_abbr": "TU Delft;MathWorks;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Netherlands;United States" }, { "id": "Ap344YqCcD", "title": "Imitation Bootstrapped Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Despite the considerable potential of reinforcement learning (RL), robotics control tasks predominantly rely on imitation learning (IL) owing to its better sample efficiency.\nHowever, given the high cost of collecting extensive demonstrations, RL is still appealing if it can utilize limited imitation data for efficient autonomous self-improvement.\nExisting RL methods that utilize demonstrations either initialize the replay buffer with demonstrations and oversample them during RL training, which does not benefit from the generalization potential of modern IL methods, or pretrain the RL policy with IL on the demonstrations, which requires additional mechanisms to prevent catastrophic forgetting during RL fine-tuning.\nWe propose _imitation bootstrapped reinforcement learning_ (IBRL), a novel framework that first trains an IL policy on a limited number of demonstrations and then uses it to propose alternative actions for both online exploration and target value bootstrapping.\nIBRL achieves SoTA performance and sample efficiency on 7 challenging sparse reward continuous control tasks in simulation while learning directly from pixels. \nAs a highlight of our method, IBRL achieves $\\mathbf{6.4\\times}$ higher success rate than RLPD, a strong method that combines the idea of oversampling demonstrations with modern RL improvements, under the budget of **10** demos and **100K** interactions in the challenging PickPlaceCan task in the Robomimic benchmark.", "keywords": "reinforcement learning;robotics;continuous control", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Hengyuan Hu;Suvir Mirchandani;Dorsa Sadigh", "authorids": "~Hengyuan_Hu2;~Suvir_Mirchandani1;~Dorsa_Sadigh1", "gender": "M;F;M", "homepage": "http://suvirpmirchandani.com;https://dorsa.fyi/;", "dblp": "287/4981;117/3174;", "google_scholar": "fz7LJPIAAAAJ;ZaJEZpYAAAAJ;oF46lMIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Suvir_Mirchandani1;~Dorsa_Sadigh1;~Hengyuan_Hu1", "aff": "Stanford University;Stanford University;Computer Science Department, Stanford University", "aff_domain": "stanford.edu;stanford.edu;cs.stanford.edu", "position": "PhD student;Assistant Professor;PhD student", "bibtex": "@misc{\nhu2024imitation,\ntitle={Imitation Bootstrapped Reinforcement Learning},\nauthor={Hengyuan Hu and Suvir Mirchandani and Dorsa Sadigh},\nyear={2024},\nurl={https://openreview.net/forum?id=Ap344YqCcD}\n}", "github": "", "project": "", "reviewers": "8Ymy;GwQ8;iZef;k6EZ", "site": "https://openreview.net/forum?id=Ap344YqCcD", "pdf_size": 1495869, "rating": "5;5;6;6", "confidence": "3;3;4;4", "soundness": "2;3;3;2", "contribution": "2;2;2;2", "presentation": "2;3;3;3", "wc_summary": "45;65;387;118", "wc_strengths": "44;91;151;68", "wc_weaknesses": "125;140;371;198", "wc_questions": "24;51;97;10", "wc_review": "238;347;1006;394", "wc_reply_reviewers": "198;122;58;25", "wc_reply_authors": "870;1181;1505;787", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 153.75, 137.28323823395192 ], "wc_strengths_avg": [ 88.5, 39.72719471596252 ], "wc_weaknesses_avg": [ 208.5, 97.69979529149485 ], "wc_questions_avg": [ 45.5, 33.185087012090236 ], "wc_review_avg": [ 496.25, 299.69515761853745 ], "wc_reply_reviewers_avg": [ 100.75, 66.09604753689891 ], "wc_reply_authors_avg": [ 1085.75, 283.1266280306393 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12607106594968839768&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ApjY32f3Xr", "title": "PINNacle: A Comprehensive Benchmark of Physics-Informed Neural Networks for Solving PDEs", "track": "main", "status": "Reject", "tldr": "", "abstract": "While significant progress has been made on Physics-Informed Neural Networks (PINNs), a comprehensive comparison of these methods across a wide range of Partial Differential Equations (PDEs) is still lacking. This study introduces PINNacle, a benchmarking tool designed to fill this gap. PINNacle provides a diverse dataset, comprising over 20 distinct PDEs from various domains including heat conduction, fluid dynamics, biology, and electromagnetics. These PDEs encapsulate key challenges inherent to real-world problems, such as complex geometry, multi-scale phenomena, nonlinearity, and high dimensionality. PINNacle also offers a user-friendly toolbox, incorporating about 10 state-of-the-art PINN methods for systematic evaluation and comparison. We have conducted extensive experiments with these methods, offering insights into their strengths and weaknesses. In addition to providing a standardized means of assessing performance, PINNacle also offers an in-depth analysis to guide future research, particularly in areas such as domain decomposition methods and loss reweighting for handling multi-scale problems and complex geometry. While PINNacle does not guarantee success in all real-world scenarios, it represents a significant contribution to the field by offering a robust, diverse, and comprehensive benchmark suite that will undoubtedly foster further research and development in PINNs.", "keywords": "PINN;machine learning;physics-informed machine learning", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/38585e83cfcd145e3816bd1154c966361c146b92.pdf", "author": "Zhongkai Hao;Jiachen Yao;Chang Su;Hang Su;Ziao Wang;Fanzhi Lu;Zeyu Xia;Yichi Zhang;Songming Liu;Lu Lu;Jun Zhu", "authorids": "~Zhongkai_Hao1;~Jiachen_Yao3;~Chang_Su7;~Hang_Su3;~Ziao_Wang2;~Fanzhi_Lu1;~Zeyu_Xia4;~Yichi_Zhang4;~Songming_Liu1;~Lu_Lu1;~Jun_Zhu2", "gender": "M;M;M;F;M;M;M;M;M;M;M", "homepage": "https://jiachenyao.com/;https://github.com/EdwardIX;https://wangziao9.github.io/;https://github.com/FortuniaL;;https://zycheiheihei.github.io;;https://lu.seas.upenn.edu;http://ml.cs.tsinghua.edu.cn/~jun;;https://haozhongkai.github.io/", "dblp": "213/4920;;;;;;285/4585;01/2086-10;50/2644-1;26/5371-6;270/0220.html", "google_scholar": "Z_bCoGcAAAAJ;;;;https://scholar.google.com/citations?hl=en;HzgDakoAAAAJ;6urFg8kAAAAJ;wD_wsWUAAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ;dfSzq27ZiVoC", "orcid": "0000-0001-7655-7831;;;;;0000-0002-1894-3977;;0000-0002-5476-5768;;;", "linkedin": "jiachen-y-05a05932a/;;;;;;%E6%9D%BE%E9%93%AD-%E5%88%98-7b8339254/;;;;", "or_profile": "~Jiachen_Yao3;~Chang_Su7;~Ziao_Wang2;~Fanzhi_Lu1;~Zeyu_Xia4;~Yichi_Zhang4;~Songming_Liu1;~Lu_Lu1;~Jun_Zhu2;~Hang_Su2;~Hao_Zhongkai1", "aff": "Tsinghua University;Tsinghua University;, University of California, Santa Barbara;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Yale University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;cs.ucsb.edu;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;yale.edu;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "Undergrad student;Undergrad student;MS student;Undergrad student;Undergrad student;PhD student;PhD student;Assistant Professor;Professor;Associate Professor;PhD student", "bibtex": "@misc{\nhao2024pinnacle,\ntitle={{PINN}acle: A Comprehensive Benchmark of Physics-Informed Neural Networks for Solving {PDE}s},\nauthor={Zhongkai Hao and Jiachen Yao and Chang Su and Hang Su and Ziao Wang and Fanzhi Lu and Zeyu Xia and Yichi Zhang and Songming Liu and Lu Lu and Jun Zhu},\nyear={2024},\nurl={https://openreview.net/forum?id=ApjY32f3Xr}\n}", "github": "", "project": "", "reviewers": "oeuE;xNZs;RGvf;Ytqc", "site": "https://openreview.net/forum?id=ApjY32f3Xr", "pdf_size": 861377, "rating": "3;6;6;6", "confidence": "5;3;3;3", "soundness": "2;3;4;3", "contribution": "1;2;4;3", "presentation": "4;3;4;3", "wc_summary": "55;273;53;41", "wc_strengths": "26;233;92;50", "wc_weaknesses": "54;127;227;517", "wc_questions": "125;105;4;13", "wc_review": "260;738;376;621", "wc_reply_reviewers": "292;0;0;290", "wc_reply_authors": "295;522;237;596", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 105.5, 96.85427197599495 ], "wc_strengths_avg": [ 100.25, 80.20091957078796 ], "wc_weaknesses_avg": [ 231.25, 176.0374605020193 ], "wc_questions_avg": [ 61.75, 53.8115926172047 ], "wc_review_avg": [ 498.75, 189.90441674695194 ], "wc_reply_reviewers_avg": [ 145.5, 145.50171820291334 ], "wc_reply_authors_avg": [ 412.5, 150.22399941420812 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12178264276662821414&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;0;0;0;0;2;0;0;0", "aff_unique_norm": "Tsinghua University;University of California, Santa Barbara;Yale University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ucsb.edu;https://www.yale.edu", "aff_unique_abbr": "THU;UCSB;Yale", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;1;0;0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Critical Learning Periods Emerge Even in Deep Linear Networks", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19239", "id": "Aq35gl2c1k", "author_site": "Michael Kleinman, Alessandro Achille, Stefano Soatto", "tldr": "", "abstract": "Critical learning periods are periods early in development where temporary sensory deficits can have a permanent effect on behavior and learned representations. \nDespite the radical differences between biological and artificial networks, critical learning periods have been empirically observed in both systems. This suggests that critical periods may be fundamental to learning and not an accident of biology.\nYet, why exactly critical periods emerge in deep networks is still an open question, and in particular it is unclear whether the critical periods observed in both systems depend on particular architectural or optimization details. To isolate the key underlying factors, we focus on deep linear network models, and show that, surprisingly, such networks also display much of the behavior seen in biology and artificial networks, while being amenable to analytical treatment. We show that critical periods depend on the depth of the model and structure of the data distribution. We also show analytically and in simulations that the learning of features is tied to competition between sources. Finally, we extend our analysis to multi-task learning to show that pre-training on certain tasks can damage the transfer performance on new tasks, and show how this depends on the relationship between tasks and the duration of the pre-training stage. To the best of our knowledge, our work provides the first analytically tractable model that sheds light into why critical learning periods emerge in biological and artificial networks.", "keywords": "critical learning periods;deep neural networks;gradient descent;linear networks", "primary_area": "applications to neuroscience & cognitive science", "supplementary_material": "", "author": "Michael Kleinman;Alessandro Achille;Stefano Soatto", "authorids": "~Michael_Kleinman2;~Alessandro_Achille1;~Stefano_Soatto1", "gender": ";M;", "homepage": ";;https://www.cs.ucla.edu/~soatto", "dblp": "276/0181;190/7328;08/1262", "google_scholar": "https://scholar.google.ca/citations?user=b5c-VcMAAAAJ;;lH1PdF8AAAAJ", "orcid": ";;0000-0003-2902-6362", "linkedin": ";;stefano-soatto-5765aa6/", "or_profile": "~Michael_Kleinman2;~Alessandro_Achille1;~Stefano_Soatto2", "aff": "Stanford University;Amazon;UCLA Computer Science Department, University of California, Los Angeles", "aff_domain": "stanford.edu;amazon.com;cs.ucla.edu", "position": "Postdoc;Applied Research Scientist;Professor", "bibtex": "@inproceedings{\nkleinman2024critical,\ntitle={Critical Learning Periods Emerge Even in Deep Linear Networks},\nauthor={Michael Kleinman and Alessandro Achille and Stefano Soatto},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=Aq35gl2c1k}\n}", "github": "", "project": "", "reviewers": "SR9U;uHcm;qaj6;PVne", "pdf_size": 1610853, "rating": "5;6;8;10", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "contribution": "3;2;3;3", "presentation": "3;2;3;4", "wc_summary": "109;65;185;95", "wc_strengths": "69;33;120;68", "wc_weaknesses": "149;89;172;359", "wc_questions": "4;23;106;227", "wc_review": "331;210;583;749", "wc_reply_reviewers": "0;21;77;0", "wc_reply_authors": "836;542;510;938", "reply_reviewers": "0;1;2;0", "reply_authors": "2;1;2;2", "rating_avg": [ 7.25, 1.920286436967152 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 113.5, 44.23516700544941 ], "wc_strengths_avg": [ 72.5, 31.0201547384922 ], "wc_weaknesses_avg": [ 192.25, 100.92912116926412 ], "wc_questions_avg": [ 90.0, 87.9062000088731 ], "wc_review_avg": [ 468.25, 210.66487011364757 ], "wc_reply_reviewers_avg": [ 24.5, 31.5 ], "wc_reply_authors_avg": [ 706.5, 184.41461438833963 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8268106308031117, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12530288096224791156&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=Aq35gl2c1k", "pdf": "https://openreview.net/pdf?id=Aq35gl2c1k", "email": "stanford.edu;amazon.com;cs.ucla.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Stanford University;Amazon;University of California, Los Angeles", "aff_unique_dep": ";Amazon.com, Inc.;Computer Science Department", "aff_unique_url": "https://www.stanford.edu;https://www.amazon.com;https://www.ucla.edu", "aff_unique_abbr": "Stanford;Amazon;UCLA", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Stanford;;Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "KoLA: Carefully Benchmarking World Knowledge of Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19238", "id": "AqN23oqraW", "author_site": "Jifan Yu, Xiaozhi Wang, Shangqing Tu, Shulin Cao, Daniel Zhang-Li, Xin Lv, Hao Peng, Zijun Yao, Xiaohan Zhang, Hanming Li, Chunyang Li, Zheyuan Zhang, Yushi Bai, Yantao Liu, Amy Xin, Kaifeng Yun, Linlu Gong, Nianyi Lin, Jianhui Chen, Zhili Wu, Yunjia Qi, Weikai Li, Yong Guan, Kaisheng Zeng, Ji Qi, Hailong Jin, Jinxin Liu, Yu Gu, Yuan Yao, Ning Ding, Lei Hou, Zhiyuan Liu, Xu Bin, Jie Tang, Juanzi Li", "tldr": "", "abstract": "The unprecedented performance of large language models (LLMs) necessitates improvements in evaluations. Rather than merely exploring the breadth of LLM abilities, we believe meticulous and thoughtful designs are essential to thorough, unbiased, and applicable evaluations. Given the importance of world knowledge to LLMs, we construct a Knowledge-oriented LLM Assessment benchmark (KoLA), in which we carefully design three crucial factors: (1) For ability modeling, we mimic human cognition to form a four-level taxonomy of knowledge-related abilities, covering 19 tasks. (2) For data, to ensure fair comparisons, we use both Wikipedia, a corpus prevalently pre-trained by LLMs, along with continuously collected emerging corpora, aiming to evaluate the capacity to handle unseen data and evolving knowledge. (3) For evaluation criteria, we adopt a contrastive system, including overall standard scores for better numerical comparability across tasks and models, and a unique self-contrast metric for automatically evaluating knowledge-creating ability. We evaluate 21 open-source and commercial LLMs and obtain some intriguing findings. The KoLA dataset will be updated every three months to provide timely references for developing LLMs and knowledge-related systems.", "keywords": "Large Language Model;World Knowledge;Evolving Benchmark", "primary_area": "datasets and benchmarks", "supplementary_material": "/attachment/15ddff071c61dc0e8abdc05e3574461847222dd9.zip", "author": "Jifan Yu;Xiaozhi Wang;Shangqing Tu;Shulin Cao;Daniel Zhang-Li;Xin Lv;Hao Peng;Zijun Yao;Xiaohan Zhang;Hanming Li;Chunyang Li;Zheyuan Zhang;Yushi Bai;Yantao Liu;Amy Xin;Kaifeng Yun;Linlu GONG;Nianyi Lin;Jianhui Chen;Zhili Wu;Yunjia Qi;Weikai Li;Yong Guan;Kaisheng Zeng;Ji Qi;Hailong Jin;Jinxin Liu;Yu Gu;Yuan Yao;Ning Ding;Lei Hou;Zhiyuan Liu;Xu Bin;Jie Tang;Juanzi Li", "authorids": "~Jifan_Yu2;~Xiaozhi_Wang1;~Shangqing_Tu1;~Shulin_Cao1;~Daniel_Zhang-Li1;~Xin_Lv1;~Hao_Peng6;~Zijun_Yao2;~Xiaohan_Zhang6;~Hanming_Li1;~Chunyang_Li3;~Zheyuan_Zhang3;~Yushi_Bai1;~Yantao_Liu1;~Amy_Xin1;~Kaifeng_Yun2;~Linlu_GONG1;~Nianyi_Lin2;~Jianhui_Chen6;~Zhili_Wu1;~Yunjia_Qi1;~Weikai_Li2;~Yong_Guan2;~Kaisheng_Zeng1;~Ji_Qi2;~Hailong_Jin2;~Jinxin_Liu2;~Yu_Gu5;~Yuan_Yao12;~Ning_Ding5;~Lei_Hou2;~Zhiyuan_Liu1;~Xu_Bin1;~Jie_Tang1;~Juanzi_Li1", "gender": "M;M;M;F;M;M;M;M;F;;M;M;M;Not Specified;F;;F;;M;M;;M;M;M;;M;M;M;M;M;M;M;;;", "homepage": "https://yujifan0326.github.io/;https://bakser.github.io/;https://shangqingtu.github.io/;https://github.com/ShulinCao;https://github.com/Danielznn16;https://davidlvxin.github.io;;https://transirius.github.io/;;;https://lcy2723.github.io/;https://sparrowzheyuan18.github.io/;https://bys0318.github.io/;https://github.com/RicardoL1u;;;https://github.com/gonglinlu;https://linny2002.github.io/linny.github.io/;;https://wu-zhili.github.io/;https://github.com/kijlk;https://weikai-li.github.io;;https://github.com/alpc43;;;https://scholar.google.com/citations?user=A7KHQ6YAAAAJ&hl=en&oi=sra;http://entslscheia.github.io;https://yaoyuanthu.github.io/;https://www.stingning.cn/;https://www.cs.tsinghua.edu.cn/csen/info/1305/4466.htm;http://nlp.csai.tsinghua.edu.cn/~lzy;;;", "dblp": "239/6130.html;03/2015;296/1838;229/2976;321/0309;;69/7742-15;134/4025-2;;;;;302/4421;172/9996;349/5224;;;;;;349/5606;157/3533-2;04/606;199/8788.html;;190/7770.html;20/6480-2;15/4208-16;;;32/5685-1;53/3245-1;;;", "google_scholar": "https://scholar.google.com.tw/citations?hl=zh-CN;DjpXXZkAAAAJ;https://scholar.google.nl/citations?user=HiR6VAsAAAAJ;lUfGROcAAAAJ;;rJzgbYQAAAAJ;2ry7XsgAAAAJ;B4LmHSUAAAAJ;https://scholar.google.com.hk/citations?user=RKyE8o0AAAAJ;;GpXP-a4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;;2UKO_uYAAAAJ;;;;CkgiFxMAAAAJ;;Xxiwr8YAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;;;A7KHQ6YAAAAJ;c5RwjjcAAAAJ;https://scholar.google.com.hk/citations?user=3NWfi3YAAAAJ;uZXQuYAAAAAJ;YnIq4hsAAAAJ;dT0v5u0AAAAJ;;;", "orcid": "0000-0003-3430-4048;0000-0002-5727-143X;0009-0008-0640-3413;;0009-0009-3681-1896;;0009-0006-7192-5790;0000-0002-0288-9283;0000-0003-3295-7758;0009-0009-5835-1587;;0000-0003-3471-0572;;;0009-0001-2404-0475;;0009-0004-2323-4464;;0000-0001-8665-2971;;;0000-0002-5801-9500;0000-0002-9044-2595;0000-0002-8104-9652;;0009-0005-0939-6990;0009-0009-4673-9824;;;;0000-0002-8907-3526;0000-0002-7709-2543;;;", "linkedin": ";xiaozhiwang098/?locale=en_US;;;;;;%E5%AD%90%E4%BF%8A-%E5%A7%9A-313188209/;;;;;;;;;;;;;;weikai-li;;https://cn.linkedin.com/in/%E5%BC%80%E8%83%9C-%E6%9B%BE-496566107;;;%E9%87%91%E9%91%AB-%E5%88%98-86aaa7211/;;;;;;;;", "or_profile": "~Jifan_Yu2;~Xiaozhi_Wang1;~Shangqing_Tu1;~Shulin_Cao1;~Daniel_Zhang-Li1;~Xin_Lv1;~Hao_Peng6;~Zijun_Yao2;~Xiaohan_Zhang6;~Hanming_Li1;~Chunyang_Li3;~Zheyuan_Zhang3;~Yushi_Bai1;~Yantao_Liu1;~Amy_Xin1;~Kaifeng_Yun2;~Linlu_GONG1;~Nianyi_Lin2;~Jianhui_Chen6;~Zhili_Wu1;~Yunjia_Qi1;~Weikai_Li2;~Yong_Guan2;~Kaisheng_Zeng1;~Ji_Qi2;~Hailong_Jin2;~Jinxin_Liu2;~Yu_Gu5;~Yuan_Yao12;~Ning_Ding5;~Lei_Hou2;~Zhiyuan_Liu1;~Xu_Bin1;~Jie_Tang1;~Juanzi_Li1", "aff": "Tsinghua University;Department of Computer Science and Technology, Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Zhipu AI;Tsinghua University;Department of Computer Science and Technology, Tsinghua University;Beijing Knowledge Atlas Technology Co., Ltd. ;;Tsinghua University;Tsinghua University;Tsinghua University;University of Chinese Academy of Sciences;Tsinghua University;;Tsinghua University;Tsinghua University;Tsinghua University;, Tsinghua University;Tsinghua University;UCLA Computer Science Department, University of California, Los Angeles;Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University;Ohio State University;National University of Singapore;Tsinghua University;Tsinghua University;Tsinghua University;;;", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;zhipuai.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;zhipuai.cn;;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;ucas.ac.cn;cs.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;cs.ucla.edu;tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;osu.edu;nus.edu;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;;", "position": "Postdoc;PhD student;PhD student;PhD student;PhD student;Researcher;PhD student;PhD student;Researcher;;Undergrad student;MS student;PhD student;MS student;MS student;;Undergrad student;Undergrad student;MS student;Undergrad student;PhD student;PhD student;Postdoc;PhD student;;Postdoc;PhD student;PhD student;Postdoc;Postdoc;Assistant Professor;Associate Professor;;;", "bibtex": "@inproceedings{\nyu2024kola,\ntitle={Ko{LA}: Carefully Benchmarking World Knowledge of Large Language Models},\nauthor={Jifan Yu and Xiaozhi Wang and Shangqing Tu and Shulin Cao and Daniel Zhang-Li and Xin Lv and Hao Peng and Zijun Yao and Xiaohan Zhang and Hanming Li and Chunyang Li and Zheyuan Zhang and Yushi Bai and Yantao Liu and Amy Xin and Kaifeng Yun and Linlu GONG and Nianyi Lin and Jianhui Chen and Zhili Wu and Yunjia Qi and Weikai Li and Yong Guan and Kaisheng Zeng and Ji Qi and Hailong Jin and Jinxin Liu and Yu Gu and Yuan Yao and Ning Ding and Lei Hou and Zhiyuan Liu and Xu Bin and Jie Tang and Juanzi Li},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AqN23oqraW}\n}", "github": "", "project": "", "reviewers": "vPns;ryRT;VBLJ;1m8m", "pdf_size": 4664841, "rating": "5;6;8;8", "confidence": "2;4;2;4", "soundness": "2;3;3;4", "contribution": "2;2;4;3", "presentation": "2;3;3;4", "wc_summary": "77;47;124;120", "wc_strengths": "86;46;70;57", "wc_weaknesses": "124;82;46;57", "wc_questions": "60;198;22;34", "wc_review": "347;373;262;268", "wc_reply_reviewers": "116;0;13;0", "wc_reply_authors": "743;1173;331;362", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 92.0, 31.851216617265973 ], "wc_strengths_avg": [ 64.75, 14.922717580923388 ], "wc_weaknesses_avg": [ 77.25, 29.978117018918983 ], "wc_questions_avg": [ 78.5, 70.34735247328075 ], "wc_review_avg": [ 312.5, 48.427781283061066 ], "wc_reply_reviewers_avg": [ 32.25, 48.64347335460329 ], "wc_reply_authors_avg": [ 652.25, 341.6367771478943 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 35, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13925067843687558202&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=AqN23oqraW", "pdf": "https://openreview.net/pdf?id=AqN23oqraW", "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;zhipuai.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;zhipuai.cn;;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;ucas.ac.cn;cs.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;cs.ucla.edu;tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;osu.edu;nus.edu;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;;", "author_num": 35, "aff_unique_index": "0;0;0;0;0;1;0;0;2;0;0;0;3;0;0;0;0;0;0;4;0;0;0;0;5;6;0;0;0", "aff_unique_norm": "Tsinghua University;Zhipu AI;Beijing Knowledge Atlas Technology Co., Ltd.;University of Chinese Academy of Sciences;University of California, Los Angeles;Ohio State University;National University of Singapore", "aff_unique_dep": ";;;;Computer Science Department;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.zhipu.ai;;http://www.ucas.ac.cn;https://www.ucla.edu;https://www.osu.edu;https://www.nus.edu.sg", "aff_unique_abbr": "THU;Zhipu AI;;UCAS;UCLA;OSU;NUS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;1;2;0;0;0", "aff_country_unique": "China;United States;Singapore" }, { "id": "AqXzHRU2cs", "title": "Generative Pretrained Embedding and Hierarchical Representation to Unlock Human Rhythm in Activities of Daily Living", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Within the evolving landscape of smart homes, the precise recognition of daily living activities using ambient sensor data stands paramount. This paper not only aims to bolster existing algorithms by evaluating two distinct pretrained embeddings suited for ambient sensor activations but also introduces a novel hierarchical architecture. We delve into an architecture anchored on Transformer Decoder-based pre-trained embeddings, reminiscent of the GPT design, and contrast it with the previously established state-of-the-art (SOTA) ELMo embeddings for ambient sensors. Our proposed hierarchical structure leverages the strengths of each pre-trained embedding, enabling the discernment of activity dependencies and sequence order, thereby enhancing classification precision. To further refine recognition, we incorporate into our proposed architecture an hour-of-the-day embedding. Empirical evaluations underscore the preeminence of the Transformer Decoder embedding in classification endeavors. Additionally, our innovative hierarchical design significantly bolsters the efficacy of both pre-trained embeddings, notably in capturing inter-activity nuances. The integration of temporal aspects subtly but distinctively augments classification, especially for time-sensitive activities. In conclusion, our GPT-inspired hierarchical approach, infused with temporal insights, outshines the SOTA ELMo benchmark.", "keywords": "Embedding;Smart Homes;Transformers;Activity of Daily Living;Human activity Recognition", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/3a6f43f562afd48016b18299128ca7392f5e71bf.pdf", "author": "Damien Bouchabou;Sao Mai Nguyen", "authorids": "~Damien_Bouchabou1;~Sao_Mai_Nguyen1", "gender": "M;F", "homepage": ";http://nguyensmai.free.fr", "dblp": "183/0962;42/10546.html", "google_scholar": "https://scholar.google.fr/citations?user=vtUIdqgAAAAJ;https://scholar.google.fr/citations?user=ppPWNQoAAAAJ", "orcid": "0000-0003-3623-3626;0000-0003-0929-0019", "linkedin": "damien-bouchabou-5725329b/;", "or_profile": "~Damien_Bouchabou1;~Sao_Mai_Nguyen1", "aff": ";IMT Atlantique", "aff_domain": ";imt-atlantique.edu", "position": ";Associate Professor", "bibtex": "@misc{\nbouchabou2024generative,\ntitle={Generative Pretrained Embedding and Hierarchical Representation to Unlock Human Rhythm in Activities of Daily Living},\nauthor={Damien Bouchabou and Sao Mai Nguyen},\nyear={2024},\nurl={https://openreview.net/forum?id=AqXzHRU2cs}\n}", "github": "", "project": "", "reviewers": "e66t;dgfF;v1qx;wCG6", "site": "https://openreview.net/forum?id=AqXzHRU2cs", "pdf_size": 1600991, "rating": "3;5;5;6", "confidence": "5;3;3;4", "soundness": "2;2;3;3", "contribution": "2;2;2;2", "presentation": "3;2;3;3", "wc_summary": "128;42;25;45", "wc_strengths": "114;45;35;48", "wc_weaknesses": "348;78;115;50", "wc_questions": "99;58;4;95", "wc_review": "689;223;179;238", "wc_reply_reviewers": "0;0;0;68", "wc_reply_authors": "1678;374;816;965", "reply_reviewers": "0;0;0;1", "reply_authors": "4;2;1;4", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 60.0, 39.99374951164244 ], "wc_strengths_avg": [ 60.5, 31.26099806468117 ], "wc_weaknesses_avg": [ 147.75, 117.89057426274587 ], "wc_questions_avg": [ 64.0, 38.15101571387058 ], "wc_review_avg": [ 332.25, 207.1079126928761 ], "wc_reply_reviewers_avg": [ 17.0, 29.444863728670914 ], "wc_reply_authors_avg": [ 958.25, 468.95328925171214 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6225430174794673, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:P9XYffbs23cJ:scholar.google.com/&scioq=Generative+Pretrained+Embedding+and+Hierarchical+Representation+to+Unlock+Human+Rhythm+in+Activities+of+Daily+Living&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "IMT Atlantique", "aff_unique_dep": "", "aff_unique_url": "https://www.imt-atlantique.fr", "aff_unique_abbr": "IMT Atlantique", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "AqaFgmH87p", "title": "On the efficacy of group-wise clipping in differentially private optimization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent advances have substantially improved the accuracy, memory cost, and training speed of differentially private (DP) deep learning, especially on large vision and language models with millions to billions of parameters. In this work, we thoroughly study the per-sample gradient clipping style, a key component in DP optimization. We show that different clipping styles have the same time complexity but instantiate an accuracy-memory trade-off: while the all-layer clipping (of coarse granularity) is the most prevalent and usually gives the best accuracy, it incurs heavier memory cost compared to other group-wise clipping, such as the layer-wise clipping (of finer granularity). We formalize this trade-off through our convergence theory and complexity analysis. Importantly, we demonstrate that the accuracy gap between group-wise clipping and all-layer clipping becomes smaller for larger models, while the memory advantage of the group-wise clipping remains. Consequently, the group-wise clipping allows DP optimization of large models to achieve high accuracy and low peak memory simultaneously.", "keywords": "deep learning;differential privacy;per-sample gradient clipping;optimization", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Zhiqi Bu;Ruixuan Liu;Yu-Xiang Wang;Sheng Zha;George Karypis", "authorids": "~Zhiqi_Bu1;~Ruixuan_Liu2;~Yu-Xiang_Wang1;~Sheng_Zha1;~George_Karypis1", "gender": "M;F;;M;M", "homepage": "https://sites.google.com/view/zhiqi-bu;;http://www.cs.ucsb.edu/~yuxiangw/publications.html;https://github.com/szha;", "dblp": "245/2573;243/0195.html;62/1637-3.html;218/5471;", "google_scholar": "MEvTLxIAAAAJ;sXWB1UQAAAAJ;HGNZ1fkAAAAJ;;ElqwScwAAAAJ", "orcid": ";0000-0002-0823-3760;;;", "linkedin": ";;;shengzha/;", "or_profile": "~Zhiqi_Bu1;~Ruixuan_Liu2;~Yu-Xiang_Wang1;~Sheng_Zha1;~George_Karypis1", "aff": "Amazon;Emory University;UC Santa Barbara;Amazon;University of Minnesota, Minneapolis", "aff_domain": "amazon.com;emory.edu;ucsb.edu;amazon.com;umn.edu", "position": "Researcher;Postdoc;Assistant Professor;Researcher;Full Professor", "bibtex": "@misc{\nbu2024on,\ntitle={On the efficacy of group-wise clipping in differentially private optimization},\nauthor={Zhiqi Bu and Ruixuan Liu and Yu-Xiang Wang and Sheng Zha and George Karypis},\nyear={2024},\nurl={https://openreview.net/forum?id=AqaFgmH87p}\n}", "github": "", "project": "", "reviewers": "fLMf;yntr;nK9w;RBj1", "site": "https://openreview.net/forum?id=AqaFgmH87p", "pdf_size": 575557, "rating": "3;5;5;6", "confidence": "4;5;3;3", "soundness": "2;2;2;3", "contribution": "2;2;2;3", "presentation": "3;2;2;2", "wc_summary": "103;10;44;84", "wc_strengths": "50;26;56;49", "wc_weaknesses": "323;74;176;188", "wc_questions": "38;32;6;26", "wc_review": "514;142;282;347", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "771;258;389;383", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 60.25, 35.9887135085432 ], "wc_strengths_avg": [ 45.25, 11.431863365173676 ], "wc_weaknesses_avg": [ 190.25, 88.52224296751636 ], "wc_questions_avg": [ 25.5, 12.031209415515965 ], "wc_review_avg": [ 321.25, 133.6850309496168 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 450.25, 192.42839577359678 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3458572319330373, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:W5QdxLU3BJYJ:scholar.google.com/&scioq=On+the+efficacy+of+group-wise+clipping+in+differentially+private+optimization&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Amazon;Emory University;University of California, Santa Barbara;University of Minnesota", "aff_unique_dep": "Amazon.com, Inc.;;;", "aff_unique_url": "https://www.amazon.com;https://www.emory.edu;https://www.ucsb.edu;https://www.minnesota.edu", "aff_unique_abbr": "Amazon;Emory;UCSB;UMN", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Santa Barbara;Minneapolis", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Ar5g8fwnjV", "title": "HOVER: Hyperbolic Video-text Retrieval", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Retrieving complex videos with compositional actions is challenging but still with few attentions given. Existing video-text retrieval methods ignore the multi-level semantic structures between mono-action videos and complex compositional videos, e.g., one simultaneously containing \"sitting up\", \"opening door\", \"cooking food\", \"eating\", etc. In this paper, we propose to jointly embed videos and texts into a hyperbolic space where their hierarchical semantic relationships are explicitly encoded. Specifically, a video with action compositions is first decomposed longitudinally into an action tree with mono-action leaf or child nodes and increasingly complex parent nodes. Then, the is-a semantic relationship in videos/texts is represented in the hyperbolic space by employing hyperbolic norm constraints. These constraints ensure that parents have smaller norms than their children, thereby placing parents in higher hierarchical positions compared to their children. Additionally, their temporal relationship is captured by utilizing relative cosine distances within the hyperbolic space. Experimental results show that the proposed method substantially outperforms the Euclidean counterparts, especially when with a small training size. Further, the learned hyperbolic video-text embeddings well generalize to novel datasets containing complex videos with varied-level action compositions.", "keywords": "video-text retrieval;hyperbolic representation;multi-modal learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/c9125072ab7ed346d764aaa551029b6731e5cbd2.zip", "author": "Ruiqi Shi;Jun Wen;Wei Ji;Menglin Yang;Difei Gao;Roger Zimmermann", "authorids": "~Ruiqi_Shi1;~Jun_Wen1;~Wei_Ji1;~Menglin_Yang3;~Difei_Gao1;~Roger_Zimmermann1", "gender": "M;M;M;M;;M", "homepage": "https://github.com/UX404;https://jungel2star.github.io/;https://jiwei0523.github.io/;https://scholar.google.com/citations?user=KroqSRUAAAAJ&hl=en;;https://www.comp.nus.edu.sg/cs/bio/rogerz/", "dblp": "168/6384;;52/3220-8;249/8541-1;;79/1490", "google_scholar": ";https://scholar.google.com.hk/citations?user=Gw2ekPsAAAAJ;69OFB-AAAAAJ;KroqSRUAAAAJ;;https://scholar.google.com.tw/citations?user=IDREwXEAAAAJ", "orcid": "0009-0001-8850-040X;;0000-0002-8106-9768;0000-0003-2510-5282;;0000-0002-7410-2590", "linkedin": ";;;;;roger-zimmermann-76b56b6/", "or_profile": "~Ruiqi_Shi1;~Jun_Wen1;~Wei_Ji1;~Menglin_Yang3;~Difei_Gao1;~Roger_Zimmermann1", "aff": "The Chinese University of Hong Kong;Harvard University;Nanjing University;Yale University;;National University of Singapore", "aff_domain": "cuhk.edu.hk;harvard.edu;nju.edu.cn;yale.edu;;nus.edu.sg", "position": "PhD student;Postdoc;Associate Professor;Postdoc;;Full Professor", "bibtex": "@misc{\nshi2024hover,\ntitle={{HOVER}: Hyperbolic Video-text Retrieval},\nauthor={Ruiqi Shi and Jun Wen and Wei Ji and Menglin Yang and Difei Gao and Roger Zimmermann},\nyear={2024},\nurl={https://openreview.net/forum?id=Ar5g8fwnjV}\n}", "github": "", "project": "", "reviewers": "BhHY;MxoF;AjjV;P4PE", "site": "https://openreview.net/forum?id=Ar5g8fwnjV", "pdf_size": 1685266, "rating": "3;3;5;6", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "contribution": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "59;59;67;56", "wc_strengths": "16;23;71;130", "wc_weaknesses": "199;124;74;131", "wc_questions": "3;2;5;4", "wc_review": "277;208;217;321", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.25, 4.085033659592048 ], "wc_strengths_avg": [ 60.0, 45.62345887808157 ], "wc_weaknesses_avg": [ 132.0, 44.4915722356493 ], "wc_questions_avg": [ 3.5, 1.118033988749895 ], "wc_review_avg": [ 255.75, 46.072632874625256 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qFiorVCiE3cJ:scholar.google.com/&scioq=HOVER:+Hyperbolic+Video-text+Retrieval&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Chinese University of Hong Kong;Harvard University;Nanjing University;Yale University;National University of Singapore", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.harvard.edu;https://www.nju.edu.cn;https://www.yale.edu;https://www.nus.edu.sg", "aff_unique_abbr": "CUHK;Harvard;Nanjing U;Yale;NUS", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;1;2", "aff_country_unique": "China;United States;Singapore" }, { "title": "FairTune: Optimizing Parameter Efficient Fine Tuning for Fairness in Medical Image Analysis", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19237", "id": "ArpwmicoYW", "author_site": "Raman Dutt, Ondrej Bohdal, Sotirios Tsaftaris, Timothy Hospedales", "tldr": "", "abstract": "Training models with robust group fairness properties is crucial in ethically sensitive application areas such as medical diagnosis. Despite the growing body of work aiming to minimise demographic bias in AI, this problem remains challenging. A key reason for this challenge is the fairness generalisation gap: High-capacity deep learning models can fit all training data nearly perfectly, and thus also exhibit perfect fairness during training. In this case, bias emerges only during testing when generalisation performance differs across sub-groups. This motivates us to take a bi-level optimisation perspective on fair learning: Optimising the learning strategy based on validation fairness. Specifically, we consider the highly effective workflow of adapting pre-trained models to downstream medical imaging tasks using parameter-efficient fine-tuning (PEFT) techniques. There is a trade-off between updating more parameters, enabling a better fit to the task of interest vs. fewer parameters, potentially reducing the generalisation gap. To manage this tradeoff, we propose FairTune, a framework to optimise the choice of PEFT parameters with respect to fairness. We demonstrate empirically that FairTune leads to improved fairness on a range of medical imaging datasets. The code is available at https://github.com/Raman1121/FairTune.", "keywords": "Fairness;PEFT;Hyperparameter Optimization;Medical Imaging", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/a2322b049ff04702f15e4a4262303a4eececd137.zip", "author": "Raman Dutt;Ondrej Bohdal;Sotirios A. Tsaftaris;Timothy Hospedales", "authorids": "~Raman_Dutt1;~Ondrej_Bohdal1;~Sotirios_A._Tsaftaris1;~Timothy_Hospedales1", "gender": "M;M;;M", "homepage": "https://ramandutt.super.site/;https://ondrejbohdal.github.io/;https://vios.science/;http://homepages.inf.ed.ac.uk/thospeda/", "dblp": "314/5918;267/5714.html;14/613;32/3545", "google_scholar": "wdmRaoUAAAAJ;aKppg0QAAAAJ;jC1uFnYAAAAJ;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ", "orcid": ";;;0000-0003-4867-7486", "linkedin": "raman-dutt/;;;timothyhospedales/", "or_profile": "~Raman_Dutt1;~Ondrej_Bohdal1;~Sotirios_A._Tsaftaris1;~Timothy_Hospedales1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh;University of Edinburgh;Samsung AI Research Centre", "aff_domain": "ed.ac.uk;ed.ac.uk;ed.ac.uk;samsung.com", "position": "PhD student;PhD student;Professor in machine learning and computer vision;Principal Researcher", "bibtex": "@inproceedings{\ndutt2024fairtune,\ntitle={FairTune: Optimizing Parameter Efficient Fine Tuning for Fairness in Medical Image Analysis},\nauthor={Raman Dutt and Ondrej Bohdal and Sotirios A. Tsaftaris and Timothy Hospedales},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=ArpwmicoYW}\n}", "github": "", "project": "", "reviewers": "iDy7;XV7U;gkxu;Dgwu", "pdf_size": 849783, "rating": "6;6;6;6", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "contribution": "3;4;3;3", "presentation": "3;3;3;3", "wc_summary": "50;79;93;62", "wc_strengths": "35;55;56;40", "wc_weaknesses": "117;118;347;22", "wc_questions": "38;3;82;94", "wc_review": "240;255;578;218", "wc_reply_reviewers": "148;0;0;0", "wc_reply_authors": "1069;414;1035;554", "reply_reviewers": "1;0;0;0", "reply_authors": "3;1;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.0, 16.355427233796124 ], "wc_strengths_avg": [ 46.5, 9.17877987534291 ], "wc_weaknesses_avg": [ 151.0, 119.68918079759757 ], "wc_questions_avg": [ 54.25, 36.196512262923896 ], "wc_review_avg": [ 322.75, 147.95501850224616 ], "wc_reply_reviewers_avg": [ 37.0, 64.08587988004847 ], "wc_reply_authors_avg": [ 768.0, 288.5316273825107 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9705834299251022485&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=ArpwmicoYW", "pdf": "https://openreview.net/pdf?id=ArpwmicoYW", "email": "ed.ac.uk;ed.ac.uk;ed.ac.uk;samsung.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Edinburgh;Samsung", "aff_unique_dep": ";AI Research", "aff_unique_url": "https://www.ed.ac.uk;https://www.samsung.com/global/researchers/samsung-ai-research-centre/", "aff_unique_abbr": "Edinburgh;SARC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;South Korea" }, { "title": "What Algorithms can Transformers Learn? A Study in Length Generalization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19236", "id": "AssIuHnmHX", "author_site": "Hattie Zhou, Arwen Bradley, Etai Littwin, Noam Razin, Omid Saremi, Joshua Susskind, Samy Bengio, Preetum Nakkiran", "tldr": "", "abstract": "Large language models exhibit surprising emergent generalization properties, yet also struggle on many simple reasoning tasks such as arithmetic and parity. In this work, we focus on length generalization, and we propose a unifying framework to understand when and how Transformers can be expected to length generalize on a given task. First, we show that there exist algorithmic tasks for which standard\ndecoder-only Transformers trained from scratch naturally exhibit strong length generalization. For these tasks, we leverage the RASP programming language (Weiss et al., 2021) to show that the correct algorithmic solution which solves the task can be represented by a simple Transformer. We thus propose the RASP-Generalization Conjecture: Transformers tend to learn a length-generalizing solution if there exists a short RASP-L program that works for all input lengths. We present empirical evidence to support the correlation between RASP-simplicity and generalization. We leverage our insights to give new scratchpad formats which yield strong length generalization on traditionally hard tasks (such as parity and addition), and we illustrate how scratchpad can hinder generalization when it increases the complexity of the corresponding RASP-L program. Overall, our work provides a novel perspective on the mechanisms of length generalization and the algorithmic capabilities of Transformers.", "keywords": "length generalization;systematic generalization;understanding;transformer;scratchpad;LLM;algorithmic reasoning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Hattie Zhou;Arwen Bradley;Etai Littwin;Noam Razin;Omid Saremi;Joshua M. Susskind;Samy Bengio;Preetum Nakkiran", "authorids": "~Hattie_Zhou1;~Arwen_Bradley1;~Etai_Littwin1;~Noam_Razin1;~Omid_Saremi1;~Joshua_M._Susskind1;~Samy_Bengio1;~Preetum_Nakkiran1", "gender": "F;F;M;M;;M;M;", "homepage": "http://hattiezhou.com;;;https://noamrazin.github.io/;;http://www.apple.com;http://bengio.abracadoudou.com;http://preetum.nakkiran.org", "dblp": ";278/8216;;247/1241;;132/7797;b/SamyBengio;151/6343", "google_scholar": ";cxi6phoAAAAJ;NOVS7vwAAAAJ;tDsd50oAAAAJ;;Sv2TGqsAAAAJ;Vs-MdPcAAAAJ;zithBbUAAAAJ", "orcid": ";0000-0002-4086-217X;;;;;;", "linkedin": ";arwen-bradley-2084ba2b/;;;omidsaremi/;joshua-susskind-8ab2ab5/;bengio;", "or_profile": "~Hattie_Zhou1;~Arwen_Bradley1;~Etai_Littwin1;~Noam_Razin1;~Omid_Saremi1;~Joshua_M._Susskind1;~Samy_Bengio1;~Preetum_Nakkiran1", "aff": "University of Montreal;Apple;;Tel Aviv University;Apple;Apple;Apple;Apple", "aff_domain": "umontreal.ca;apple.com;;tau.ac.il;apple.com;apple.com;apple.com;apple.com", "position": "PhD student;Researcher;;PhD student;ML;Researcher;Senior Director;Principal Researcher", "bibtex": "@inproceedings{\nzhou2024what,\ntitle={What Algorithms can Transformers Learn? A Study in Length Generalization},\nauthor={Hattie Zhou and Arwen Bradley and Etai Littwin and Noam Razin and Omid Saremi and Joshua M. Susskind and Samy Bengio and Preetum Nakkiran},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AssIuHnmHX}\n}", "github": "", "project": "", "reviewers": "q9T5;Ho88;LyN1;tXQ4", "pdf_size": 677257, "rating": "6;6;8;8", "confidence": "3;4;3;3", "soundness": "2;2;3;3", "contribution": "3;1;3;3", "presentation": "4;3;4;3", "wc_summary": "116;175;133;175", "wc_strengths": "199;18;64;37", "wc_weaknesses": "395;437;309;103", "wc_questions": "122;124;4;56", "wc_review": "832;754;510;371", "wc_reply_reviewers": "618;570;0;55", "wc_reply_authors": "2713;2623;534;774", "reply_reviewers": "3;2;0;1", "reply_authors": "8;6;1;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 149.75, 25.955490748587284 ], "wc_strengths_avg": [ 79.5, 70.9031028940201 ], "wc_weaknesses_avg": [ 311.0, 128.64680330268607 ], "wc_questions_avg": [ 76.5, 50.00749943758436 ], "wc_review_avg": [ 616.75, 185.03969168802675 ], "wc_reply_reviewers_avg": [ 310.75, 284.4234299420496 ], "wc_reply_authors_avg": [ 1661.0, 1011.0694832700668 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 4.25, 2.8613807855648994 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 132, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15771874752676632675&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 7, "openreview": "https://openreview.net/forum?id=AssIuHnmHX", "pdf": "https://openreview.net/pdf?id=AssIuHnmHX", "email": "umontreal.ca;apple.com;;tau.ac.il;apple.com;apple.com;apple.com;apple.com", "author_num": 8, "aff_unique_index": "0;1;2;1;1;1;1", "aff_unique_norm": "University of Montreal;Apple;Tel Aviv University", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://wwwumontreal.ca;https://www.apple.com;https://www.tau.ac.il", "aff_unique_abbr": "UM;Apple;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;1;1;1", "aff_country_unique": "Canada;United States;Israel" }, { "id": "AtLW9HU3bo", "title": "Discovering the question-critical moments: Towards building event-aware multi-modal large language models for complex video question answering", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, Multi-modal Large Language Models (MLLM) have demonstrated impressive capabilities in image-language reasoning tasks like Image Question Answering. However, naively transferring them to complex Video Question Answering (VideoQA) tasks suffers from unsatisfactory causal-temporal reasoning capabilities. Existing methods simply concatenate the uniformly sampled frame representations to obtain the video representation, which either results in a quite large number of visual tokens and is thus resource-demanding, or is distracted by the redundancy of question-irrelevant contents. In light of this, we introduce E-STR, extending MLLM to be Event-aware for Spatial-Temporal Reasoning in complex VideoQA tasks. Specifically, we propose a differentiable question-critical keyframes retriever to adaptively select the question-critical moments in the video serving as the key event for spatial-temporal reasoning, and a general context encoder to encode the unselected parts for preserving the general contexts of the video. To facilitate the acquisition of spatial-temporal representations, we also incorporate lightweight adapters within the frozen image encoder. Extensive experiments on three large-scale benchmarks, including NExT-QA, Causal-VidQA, and STAR, all of which are notable for complex causal-temporal reasoning within long videos containing multiple objects and events, show that our method achieves better performance than existing state-of-the-art methods.", "keywords": "multi-modal learning; video question answering; video-language reasoning; multi-modal large language models", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Haibo Wang;Weifeng Ge", "authorids": "~Haibo_Wang8;~Weifeng_Ge2", "gender": "M;M", "homepage": "http://www.weifengge.net/;https://whb139426.github.io/", "dblp": "155/3277.html;", "google_scholar": "wFs402oAAAAJ;", "orcid": "0000-0002-6258-6225;", "linkedin": ";", "or_profile": "~Weifeng_Ge2;~wang_haibo1", "aff": "Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn", "position": "Assistant Professor;MS student", "bibtex": "@misc{\nwang2024discovering,\ntitle={Discovering the question-critical moments: Towards building event-aware multi-modal large language models for complex video question answering},\nauthor={Haibo Wang and Weifeng Ge},\nyear={2024},\nurl={https://openreview.net/forum?id=AtLW9HU3bo}\n}", "github": "", "project": "", "reviewers": "zSXn;92w2;bWa6;xUFv", "site": "https://openreview.net/forum?id=AtLW9HU3bo", "pdf_size": 2567826, "rating": "3;5;5;5", "confidence": "4;5;4;5", "soundness": "2;3;2;3", "contribution": "2;2;3;2", "presentation": "2;3;3;3", "wc_summary": "48;43;72;79", "wc_strengths": "51;68;26;16", "wc_weaknesses": "52;255;194;66", "wc_questions": "491;64;4;53", "wc_review": "642;430;296;214", "wc_reply_reviewers": "394;0;0;0", "wc_reply_authors": "2072;1564;1163;785", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;2;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 60.5, 15.305227865013967 ], "wc_strengths_avg": [ 40.25, 20.474068965401088 ], "wc_weaknesses_avg": [ 141.75, 85.65738438686999 ], "wc_questions_avg": [ 153.0, 196.4471939224381 ], "wc_review_avg": [ 395.5, 161.86027925343512 ], "wc_reply_reviewers_avg": [ 98.5, 170.6070045455344 ], "wc_reply_authors_avg": [ 1396.0, 477.7054531821884 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-dsdYw20C_YJ:scholar.google.com/&scioq=Discovering+the+question-critical+moments:+Towards+building+event-aware+multi-modal+large+language+models+for+complex+video+question+answering&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "AwX6ON5A0V", "title": "On Gaussian Mixture Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "We investigate the sample complexity of Gaussian mixture models (GMMs). Our results provide the optimal upper bound, in the context of uniform spherical Gaussian mixtures. Furthermore, we highlight the relationship between the sample complexity of GMMs and the distribution of spacings among their means.", "keywords": "GMM;Machin learning", "primary_area": "learning theory", "supplementary_material": "/attachment/df7f47a1abe5a623c7c7f2ef3f2d1c684870b150.pdf", "author": "Farzad Aryan", "authorids": "~Farzad_Aryan1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "0000-0002-0700-9281", "linkedin": "", "or_profile": "~Farzad_Aryan1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\naryan2024on,\ntitle={On Gaussian Mixture Models},\nauthor={Farzad Aryan},\nyear={2024},\nurl={https://openreview.net/forum?id=AwX6ON5A0V}\n}", "github": "", "project": "", "reviewers": "EBv4;xe5T;Ywzd;DCj7;WTMq", "site": "https://openreview.net/forum?id=AwX6ON5A0V", "pdf_size": 211113, "rating": "3;3;3;5;6", "confidence": "5;4;3;2;3", "soundness": "4;2;2;3;3", "contribution": "1;2;1;2;3", "presentation": "2;1;1;3;2", "wc_summary": "167;85;111;174;285", "wc_strengths": "21;35;11;76;6", "wc_weaknesses": "133;364;208;33;14", "wc_questions": "39;153;54;30;303", "wc_review": "360;637;384;313;608", "wc_reply_reviewers": "208;47;0;21;131", "wc_reply_authors": "800;804;734;548;811", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "contribution_avg": [ 1.8, 0.7483314773547883 ], "presentation_avg": [ 1.8, 0.7483314773547883 ], "wc_summary_avg": [ 164.4, 68.99159369082584 ], "wc_strengths_avg": [ 29.8, 25.134836382996408 ], "wc_weaknesses_avg": [ 150.4, 127.77574104656956 ], "wc_questions_avg": [ 115.8, 103.44737792713742 ], "wc_review_avg": [ 460.4, 134.6233263591418 ], "wc_reply_reviewers_avg": [ 81.4, 77.3888880912499 ], "wc_reply_authors_avg": [ 739.4, 99.63453216631271 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.6201736729460422, "gs_citation": 3649, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12581175277722921512&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12 }, { "id": "AweVGJeW47", "title": "Smoothing for exponential family dynamical systems", "track": "main", "status": "Reject", "tldr": "", "abstract": "State-space modeling is a powerful technique for the analysis of spatiotemporal structures of time series. However, when assumptions about linearity or Gaussianity are violated, statistical inference about the latent process is challenging. While variational inference can be used to approximate the posterior in these nonlinear or non-Gaussian settings, it is desirable to preserve the temporal structure of the true posterior in the variational approximation, while ensuring inference scales linearly in sequence length. We propose a new structured variational approximation that satisfies these desiderata. Furthermore, by generalizing to *exponential family dynamical systems*, we are able to develop decoupled second order inference algorithms that have simple updates, without increased computational complexity. Then, we extend our insights and develop the *auto-encoding backward factorized smoother*, making it easy to leverage modern deep learning tools. For settings where a sequential inference algorithm may be more appropriate, we also present the *variational Bryson-Frazier* algorithm, by developing a new backward smoothing objective. We compare against various inference algorithms for state-space models, and validate the theory presented through numerical experiments.", "keywords": "state space model;bayesian inference;time-series;variational inference", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "", "author": "Matthew Dowling;Yuan Zhao;Il Memming Park", "authorids": "~Matthew_Dowling2;~Yuan_Zhao1;~Il_Memming_Park1", "gender": "M;;M", "homepage": ";;http://catniplab.github.io/", "dblp": ";65/2105-4;00/4652-2", "google_scholar": "https://scholar.google.com/citations?hl=en;XLpD5N0AAAAJ;CsmltusAAAAJ", "orcid": ";0000-0002-6123-8579;0000-0002-4255-7750", "linkedin": ";;memming/", "or_profile": "~Matthew_Dowling2;~Yuan_Zhao1;~Il_Memming_Park1", "aff": "State University of New York, Stony Brook;National Institute of Mental Health;Champalimaud Centre for the Unknown", "aff_domain": "stonybrook.edu;nih.gov;fchampalimaud.org", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@misc{\ndowling2024smoothing,\ntitle={Smoothing for exponential family dynamical systems},\nauthor={Matthew Dowling and Yuan Zhao and Il Memming Park},\nyear={2024},\nurl={https://openreview.net/forum?id=AweVGJeW47}\n}", "github": "", "project": "", "reviewers": "CGvB;vygh;pQjy", "site": "https://openreview.net/forum?id=AweVGJeW47", "pdf_size": 6330847, "rating": "5;5;6", "confidence": "4;3;2", "soundness": "3;3;3", "contribution": "2;2;3", "presentation": "3;3;3", "wc_summary": "66;255;121", "wc_strengths": "70;82;98", "wc_weaknesses": "215;119;1190", "wc_questions": "150;82;496", "wc_review": "501;538;1905", "wc_reply_reviewers": "122;0;0", "wc_reply_authors": "366;503;1462", "reply_reviewers": "1;0;0", "reply_authors": "2;1;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 147.33333333333334, 79.37393918801197 ], "wc_strengths_avg": [ 83.33333333333333, 11.469767022723502 ], "wc_weaknesses_avg": [ 508.0, 483.8367493277045 ], "wc_questions_avg": [ 242.66666666666666, 181.27204846736728 ], "wc_review_avg": [ 981.3333333333334, 653.3056116567668 ], "wc_reply_reviewers_avg": [ 40.666666666666664, 57.51135153650587 ], "wc_reply_authors_avg": [ 777.0, 487.58657350942985 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iuYJtriE0NEJ:scholar.google.com/&scioq=Smoothing+for+exponential+family+dynamical+systems&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "State University of New York;National Institute of Mental Health;Champalimaud Centre for the Unknown", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stonybrook.edu;https://www.nimh.nih.gov;https://www.champalimaud.org", "aff_unique_abbr": "SUNY Stony Brook;NIMH;CCU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Portugal" }, { "id": "AwfPDjuWPu", "title": "Supervision for Free: Enhancing Depth Estimation for Ground Robots Based on Supervision from Mono Camera Model Parameters", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Depth estimation is a critical topic for robotics and vision-related tasks. In monocular depth estimation, in comparison with supervised learning that requires expensive ground truth labeling, self-supervised methods possess great potential due to no labeling cost. However, self-supervised learning still has a large gap with supervised learning in depth estimation performance. Meanwhile, scaling is also a major issue for monocular unsupervised depth estimation, which commonly still needs ground truth scale from GPS, LiDAR, or existing maps to correct. In deep learning era, while existing methods mainly rely on the exploration of image relationships to train the unsupervised neural networks, fundamental information provided by the camera itself has been generally ignored, which can provide extensive supervision information for free, without the need for any extra equipment to provide supervision signals. Utilizing the camera itself's intrinsics and extrinsics, depth information can be calculated for ground regions and regions connecting ground based on physical principles, providing free supervision information without any other sensors. The method is easy to realize and can be a component to enhance the effects of all the unsupervised methods.", "keywords": "Depth Estimation;Camera Model;Unsupervised Learning", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Praveen Praveen Kamasani;Jinchang Zhang;Xue Iuan Wong;Guoyu Lu", "authorids": "praveenkumarreddy.kamasani@uga.edu;jz23267@uga.edu;xwong@ford.com;~Guoyu_Lu4", "gender": ";;;M", "homepage": ";;;https://engineering.uga.edu/people/profile/guoyu-lu-ph.d", "dblp": ";;;120/8962.html", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "praveenkumarreddy.kamasani@uga.edu;jz23267@uga.edu;xwong@ford.com;~Guoyu_Lu4", "aff": ";;;University of Georgia", "aff_domain": ";;;uga.edu", "position": ";;;Assistant Professor", "bibtex": "@misc{\nkamasani2024supervision,\ntitle={Supervision for Free: Enhancing Depth Estimation for Ground Robots Based on Supervision from Mono Camera Model Parameters},\nauthor={Praveen Praveen Kamasani and Jinchang Zhang and Xue Iuan Wong and Guoyu Lu},\nyear={2024},\nurl={https://openreview.net/forum?id=AwfPDjuWPu}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=AwfPDjuWPu", "pdf_size": 0, "rating": "", "confidence": "", "soundness": "", "contribution": "", "presentation": "", "wc_summary": "", "wc_strengths": "", "wc_weaknesses": "", "wc_questions": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "rating_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "soundness_avg": [ 0, 0 ], "contribution_avg": [ 0, 0 ], "presentation_avg": [ 0, 0 ], "wc_summary_avg": [ 0, 0 ], "wc_strengths_avg": [ 0, 0 ], "wc_weaknesses_avg": [ 0, 0 ], "wc_questions_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 0, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lKg3a3hRPMkJ:scholar.google.com/&scioq=Supervision+for+Free:+Enhancing+Depth+Estimation+for+Ground+Robots+Based+on+Supervision+from+Mono+Camera+Model+Parameters&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Georgia", "aff_unique_dep": "", "aff_unique_url": "https://www.uga.edu", "aff_unique_abbr": "UGA", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Function Vectors in Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19235", "id": "AwyxtyMwaG", "author_site": "Eric Todd, Millicent Li, Arnab Sen Sharma, Aaron Mueller, Byron Wallace, David Bau", "tldr": "", "abstract": "We report the presence of a simple neural mechanism that represents an input-output function as a vector within autoregressive transformer language models (LMs). Using causal mediation analysis on a diverse range of in-context-learning (ICL) tasks, we find that a small number attention heads transport a compact representation of the demonstrated task, which we call a function vector (FV). FVs are robust to changes in context, i.e., they trigger execution of the task on inputs such as zero-shot and natural text settings that do not resemble the ICL contexts from which they are collected. We test FVs across a range of tasks, models, and layers and find strong causal effects across settings in middle layers. We investigate the internal structure of FVs and find while that they often contain information that encodes the output space of the function, this information alone is not sufficient to reconstruct an FV. Finally, we test semantic vector composition in FVs, and find that to some extent they can be summed to create vectors that trigger new complex tasks. Our findings show that compact, causal internal vector representations of function abstractions can be explicitly extracted from LLMs.", "keywords": "In-Context Learning;Interpretability", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "/attachment/1a5822915a221c66a406c73d840b166f3d523440.zip", "author": "Eric Todd;Millicent Li;Arnab Sen Sharma;Aaron Mueller;Byron C Wallace;David Bau", "authorids": "~Eric_Todd1;~Millicent_Li1;~Arnab_Sen_Sharma1;~Aaron_Mueller1;~Byron_C_Wallace1;~David_Bau1", "gender": "M;;M;M;M;M", "homepage": "https://ericwtodd.github.io/;http://millicentli.github.io/;https://arnab-api.github.io/;https://aaronmueller.github.io;http://www.byronwallace.com/;https://baulab.info/", "dblp": "162/6042;;254/2046;248/7949;00/8247;47/3614", "google_scholar": "o12WPZEAAAAJ;UZ1gBvAAAAAJ;https://scholar.google.com/citations?view_op=list_works;lhwxXg4AAAAJ;KTzRHmwAAAAJ;CYI6cKgAAAAJ", "orcid": "0009-0008-7858-4823;;0000-0002-0407-6526;;;0000-0003-1744-6765", "linkedin": "eric-w-todd/;;arnab-api/;aaron-m-mueller/;;david-bau-4b8130/", "or_profile": "~Eric_Todd1;~Millicent_Li1;~Arnab_Sen_Sharma1;~Aaron_Mueller1;~Byron_C_Wallace1;~David_Bau1", "aff": "Northeastern University;Northeastern University;Northeastern University;Technion - Israel Institute of Technology;Northeastern University;Northeastern University", "aff_domain": "northeastern.edu;neu.edu;northeasterd.edu;technion.ac.il;northeastern.edu;northeastern.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ntodd2024function,\ntitle={Function Vectors in Large Language Models},\nauthor={Eric Todd and Millicent Li and Arnab Sen Sharma and Aaron Mueller and Byron C Wallace and David Bau},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AwyxtyMwaG}\n}", "github": "", "project": "", "reviewers": "baEP;ZAgB;cNFK;9YEq", "pdf_size": 1768824, "rating": "6;6;6;6", "confidence": "3;2;3;4", "soundness": "3;3;2;3", "contribution": "3;4;3;2", "presentation": "3;3;3;3", "wc_summary": "43;81;74;177", "wc_strengths": "35;132;66;176", "wc_weaknesses": "314;168;224;223", "wc_questions": "103;45;5;583", "wc_review": "495;426;369;1159", "wc_reply_reviewers": "210;0;26;806", "wc_reply_authors": "632;522;878;1455", "reply_reviewers": "1;0;1;3", "reply_authors": "1;1;2;4", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.75, 50.14665990871177 ], "wc_strengths_avg": [ 102.25, 55.137895317104736 ], "wc_weaknesses_avg": [ 232.25, 52.356351095163234 ], "wc_questions_avg": [ 184.0, 232.98283198553494 ], "wc_review_avg": [ 612.25, 318.8035249177775 ], "wc_reply_reviewers_avg": [ 260.5, 325.1811033870203 ], "wc_reply_authors_avg": [ 871.75, 360.5637079629618 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 147, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3928477797104162619&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=AwyxtyMwaG", "pdf": "https://openreview.net/pdf?id=AwyxtyMwaG", "email": "northeastern.edu;neu.edu;northeasterd.edu;technion.ac.il;northeastern.edu;northeastern.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Northeastern University;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.technion.ac.il/en/", "aff_unique_abbr": "NEU;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Understanding Augmentation-based Self-Supervised Representation Learning via RKHS Approximation and Regression", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19234", "id": "Ax2yRhCQr1", "author_site": "Runtian Zhai, Bingbin Liu, Andrej Risteski, J Kolter, Pradeep K Ravikumar", "tldr": "", "abstract": "Data augmentation is critical to the empirical success of modern self-supervised representation learning, such as contrastive learning and masked language modeling.\nHowever, a theoretical understanding of the exact role of the augmentation remains limited.\nRecent work has built the connection between self-supervised learning and the approximation of the top eigenspace of a graph Laplacian operator, suggesting that learning a linear probe atop such representation can be connected to RKHS regression.\nBuilding on this insight, this work delves into a statistical analysis of augmentation-based pretraining.\nStarting from the isometry property, a geometric characterization of the target function given by the augmentation, we disentangle the effects of the model and the augmentation,\nand prove two generalization bounds that are free of model complexity.\nOur first bound works for an arbitrary encoder, and it is the sum of an estimation error bound incurred by fitting a linear probe, and an approximation error bound by RKHS approximation.\nOur second bound specifically addresses the case\nwhere the encoder extracts the top-d eigenspace of a finite-sample-based approximation of the underlying RKHS.\nA key ingredient in our analysis is the *augmentation complexity*,\nwhich we use to quantitatively compare different augmentations and analyze their impact on downstream performance.", "keywords": "Learning Theory;Representation Learning;Self-supervised Learning;Data Augmentation;RKHS Approximation;RKHS Regression", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/4cf1ef9bad03c678ea01d4aade8c0ae725a44312.zip", "author": "Runtian Zhai;Bingbin Liu;Andrej Risteski;J Zico Kolter;Pradeep Kumar Ravikumar", "authorids": "~Runtian_Zhai1;~Bingbin_Liu1;~Andrej_Risteski2;~J_Zico_Kolter1;~Pradeep_Kumar_Ravikumar1", "gender": "M;F;M;M;M", "homepage": "http://www.runtianzhai.com;https://clarabing.github.io/;;http://www.cs.cmu.edu/~pradeepr/;http://www.zicokolter.com", "dblp": "242/8411;222/1554;63/11143;94/3594;67/2526", "google_scholar": "EXd0ES8AAAAJ;2ud06rQAAAAJ;;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ;UXh1I6UAAAAJ", "orcid": "0000-0003-3332-3466;;;;", "linkedin": ";;;;", "or_profile": "~Runtian_Zhai1;~Bingbin_Liu1;~Andrej_Risteski2;~Pradeep_Kumar_Ravikumar1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhai2024understanding,\ntitle={Understanding Augmentation-based Self-Supervised Representation Learning via {RKHS} Approximation and Regression},\nauthor={Runtian Zhai and Bingbin Liu and Andrej Risteski and J Zico Kolter and Pradeep Kumar Ravikumar},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=Ax2yRhCQr1}\n}", "github": "", "project": "", "reviewers": "kPdq;umfH;dUJ6;tXrm", "pdf_size": 650089, "rating": "5;6;8;8", "confidence": "2;3;4;3", "soundness": "2;3;3;3", "contribution": "2;3;3;4", "presentation": "2;2;3;2", "wc_summary": "111;113;234;57", "wc_strengths": "80;79;202;61", "wc_weaknesses": "426;155;474;85", "wc_questions": "48;275;744;576", "wc_review": "665;622;1654;779", "wc_reply_reviewers": "299;403;831;202", "wc_reply_authors": "912;797;1909;1544", "reply_reviewers": "1;1;2;1", "reply_authors": "3;1;5;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 128.75, 64.7857044416436 ], "wc_strengths_avg": [ 105.5, 56.224994441973934 ], "wc_weaknesses_avg": [ 285.0, 167.7065890178439 ], "wc_questions_avg": [ 410.75, 268.5045390677781 ], "wc_review_avg": [ 930.0, 421.9200161167991 ], "wc_reply_reviewers_avg": [ 433.75, 240.11390526164868 ], "wc_reply_authors_avg": [ 1290.5, 456.51095277112466 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2683750638215426254&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=Ax2yRhCQr1", "pdf": "https://openreview.net/pdf?id=Ax2yRhCQr1", "email": "cmu.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Ax9cPWDKkR", "title": "Efficiently Quantifying Individual Agent Importance in Cooperative MARL", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Measuring the contribution of individual agents is challenging in cooperative multi-agent reinforcement learning (MARL). In cooperative MARL, team performance is typically inferred from a single shared global reward. Arguably, among the best current approaches to effectively measure individual agent contributions is to use Shapley values. However, calculating these values is expensive as the computational complexity grows exponentially with respect to the number of agents. In this paper, we adapt difference rewards into an efficient method for quantifying the contribution of individual agents, referred to as Agent Importance, offering a linear computational complexity relative to the number of agents. We show empirically that the computed values are strongly correlated with the true Shapley values, as well as the true underlying individual agent rewards, used as the ground truth in environments where these are available. We demonstrate how Agent Importance can be used to help study MARL systems by diagnosing algorithmic failures discovered in prior MARL benchmarking work. Our analysis illustrates Agent Importance as a valuable explainability component for future MARL benchmarks.", "keywords": "Multi-agent reinforcement learning;MARL;Evaluation;Credit assignment;Explainable AI", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/f7abce38ae16f8a8aa7c3a3d9588a41a7159e7b3.zip", "author": "Omayma Mahjoub;Ruan John de Kock;Siddarth Singh;Wiem Khlifi;Abidine Vall;Kale-ab Tessera;Rihab Gorsane;Arnu Pretorius", "authorids": "~Omayma_Mahjoub1;~Ruan_John_de_Kock1;~Siddarth_Singh2;~Wiem_Khlifi1;~Abidine_Vall1;~Kale-ab_Tessera1;~Rihab_Gorsane1;~Arnu_Pretorius1", "gender": "F;M;M;F;M;F;M;M", "homepage": ";;https://www.raillab.org/people#profile;;;;;https://www.kaleabtessera.com/", "dblp": "329/6292;;;;;;188/4368;284/8544", "google_scholar": ";jrYPOrsAAAAJ;RDxZpTwAAAAJ;;;;zZ6ydrAAAAAJ;EB5CtIYAAAAJ", "orcid": ";;0000-0002-3321-4959;;;;;", "linkedin": "omayma-mahjoub-35613b1a6/;ruan-de-kock/;https://za.linkedin.com/in/siddarthsingh1;wiem-khlifi/;abidine-vall-271b23109/;rihabgorsane/;arnupretorius/;kale-ab-tessera-013976101/", "or_profile": "~Omayma_Mahjoub1;~Ruan_John_de_Kock1;~Siddarth_Singh2;~Wiem_Khlifi1;~Abidine_Vall1;~Rihab_Gorsane1;~Arnu_Pretorius1;~Kale-ab_Abebe_Tessera1", "aff": "InstaDeep;InstaDeep;InstaDeep;InstaDeep;;;InstaDeep;University of Edinburgh", "aff_domain": "instadeep.com;instadeep.com;instadeep.com;instadeep.com;;;instadeep.com;ed.ac.uk", "position": "Researcher;Research Engineer;Researcher;Researcher;;;Researcher;PhD student", "bibtex": "@misc{\nmahjoub2024efficiently,\ntitle={Efficiently Quantifying Individual Agent Importance in Cooperative {MARL}},\nauthor={Omayma Mahjoub and Ruan John de Kock and Siddarth Singh and Wiem Khlifi and Abidine Vall and Kale-ab Tessera and Rihab Gorsane and Arnu Pretorius},\nyear={2024},\nurl={https://openreview.net/forum?id=Ax9cPWDKkR}\n}", "github": "", "project": "", "reviewers": "18rV;Vw4r;D2kb;HLQg", "site": "https://openreview.net/forum?id=Ax9cPWDKkR", "pdf_size": 1009679, "rating": "3;5;5;5", "confidence": "4;3;5;4", "soundness": "2;3;2;3", "contribution": "2;2;2;2", "presentation": "4;3;2;3", "wc_summary": "226;47;80;116", "wc_strengths": "113;36;147;74", "wc_weaknesses": "266;145;258;265", "wc_questions": "200;141;39;53", "wc_review": "805;369;524;508", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 117.25, 67.36235966769573 ], "wc_strengths_avg": [ 92.5, 41.608292442733095 ], "wc_weaknesses_avg": [ 233.5, 51.188377587104675 ], "wc_questions_avg": [ 108.25, 65.83834369119563 ], "wc_review_avg": [ 551.5, 158.28534360451695 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=688138280485758688&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "InstaDeep;University of Edinburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.instadeep.com;https://www.ed.ac.uk", "aff_unique_abbr": "InstaDeep;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "AxYTFpdlvj", "title": "Graph Decoding via Generalized Random Dot Product Graph", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have established themselves as the state-of-the-art methodology for a multitude of graph-related tasks, including but not limited to link prediction, node clustering, and classification. Despite their efficacy, the performance of GNNs in encoder-decoder architectures is often constrained by the limitations inherent in traditional decoders, particularly in the reconstruction of adjacency matrices.\n\nIn this paper, we introduce a novel graph decoding approach through the use of the Generalized Random Dot Product Graph (GRDPG) as a generative model for graph decoding. This novel methodology enhances the performance of encoder-decoder architectures across a range of tasks, owing to GRDPG's better capability to capture structures embedded within adjacency matrices.\n\nTo evaluate our approach, we design a benchmark focused on graphs of varying sizes, thereby enriching the diversity of existing benchmarks for link prediction and node clustering tasks. Our experiments span a variety of tasks, encompassing both traditional benchmarks and specialized domains such as molecular graphs.\n\nThe empirical results show the capability of GRDPG on faithfully capturing properties of the original graphs while simultaneously improving the performance metrics of encoder-decoder architectures. By addressing the subtleties involved in adjacency matrix reconstruction, we elevate the overall performance of GNN-based architectures, rendering them more robust and versatile for a wide array of real-world applications, with special regard on molecular graphs.", "keywords": "graph autoencoders;inner dot product decoder;generalized random dot product;link prediction;node clustering;molecular graph", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "", "author": "Alvaro Ciudad Serrano;Zinnera Tariq;Stelina Tarasi;Alexis Molina", "authorids": "~Alvaro_Ciudad_Serrano1;~Zinnera_Tariq1;~Stelina_Tarasi1;~Alexis_Molina1", "gender": "M;F;M;F", "homepage": "https://www.linkedin.com/in/alvaro-ciudad/;;;", "dblp": ";;347/2305;", "google_scholar": ";;moyS0qgAAAAJ;", "orcid": ";;;", "linkedin": ";stelina-tarasi-93334b245;;zinnera-tariq-2a76a1213", "or_profile": "~Alvaro_Ciudad_Serrano1;~Stelina_Tarasi1;~Alexis_Molina1;~Zinnera_Tariq2", "aff": "Nostrum Biodiscovery;;Universidad Polit\u00e9cnica de Cataluna;Universitat Rovira i Virgili", "aff_domain": "nostrumbiodiscovery.com;;upc.edu;urv.cat", "position": "Researcher;;Associate Professor;MS student", "bibtex": "@misc{\nserrano2024graph,\ntitle={Graph Decoding via Generalized Random Dot Product Graph},\nauthor={Alvaro Ciudad Serrano and Zinnera Tariq and Stelina Tarasi and Alexis Molina},\nyear={2024},\nurl={https://openreview.net/forum?id=AxYTFpdlvj}\n}", "github": "", "project": "", "reviewers": "ggnG;AHQZ;hJLN;io5N", "site": "https://openreview.net/forum?id=AxYTFpdlvj", "pdf_size": 212635, "rating": "1;1;3;3", "confidence": "4;5;3;4", "soundness": "2;2;1;2", "contribution": "1;2;2;1", "presentation": "1;1;1;3", "wc_summary": "37;89;47;36", "wc_strengths": "20;26;32;7", "wc_weaknesses": "124;129;128;85", "wc_questions": "78;43;57;85", "wc_review": "259;287;264;213", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "127;147;89;391", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_avg": [ 52.25, 21.649191670822262 ], "wc_strengths_avg": [ 21.25, 9.256754290786809 ], "wc_weaknesses_avg": [ 116.5, 18.282505298782223 ], "wc_questions_avg": [ 65.75, 16.69393602479655 ], "wc_review_avg": [ 255.75, 26.845623479442605 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 188.5, 118.75499989474127 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BdzUf5Xj7-QJ:scholar.google.com/&scioq=Graph+Decoding+via+Generalized+Random+Dot+Product+Graph&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Nostrum Biodiscovery;Universitat Polit\u00e8cnica de Catalunya;Universitat Rovira i Virgili", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nostrumbiodiscovery.com/;https://www.upc.edu;https://www.urv.cat", "aff_unique_abbr": ";UPC;URV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "title": "Symmetric Neural-Collapse Representations with Supervised Contrastive Loss: The Impact of ReLU and Batching", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19233", "id": "AyXIDfvYg8", "author_site": "Ganesh Ramachandra Kini, Vala Vakilian, Tina Behnia, Jaidev Gill, Christos Thrampoulidis", "tldr": "", "abstract": "Supervised contrastive loss (SCL) is a competitive and often superior alternative to the cross-entropy loss for classification. While prior studies have demonstrated that both losses yield symmetric training representations under balanced data, this symmetry breaks under class imbalances. This paper presents an intriguing discovery: the introduction of a ReLU activation at the final layer effectively restores the symmetry in SCL-learned representations. We arrive at this finding analytically, by establishing that the global minimizers of an unconstrained features model with SCL loss and entry-wise non-negativity constraints form an orthogonal frame. Extensive experiments conducted across various datasets, architectures, and imbalance scenarios corroborate our finding. Importantly, our experiments reveal that the inclusion of the ReLU activation restores symmetry without compromising test accuracy. This constitutes the first geometry characterization of SCL under imbalances. Additionally, our analysis and experiments underscore the pivotal role of batch selection strategies in representation geometry. By proving necessary and sufficient conditions for mini-batch choices that ensure invariant symmetric representations, we introduce batch-binding as an efficient strategy that guarantees these conditions hold.", "keywords": "Supervised contrastive learning;neural collapse;implicit bias;class imbalance", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/4ca8e27c93ca216bf6d3c929a8b4ff265f95c4a2.zip", "author": "Ganesh Ramachandra Kini;Vala Vakilian;Tina Behnia;Jaidev Gill;Christos Thrampoulidis", "authorids": "~Ganesh_Ramachandra_Kini1;~Vala_Vakilian2;~Tina_Behnia1;~Jaidev_Gill1;~Christos_Thrampoulidis1", "gender": "M;M;F;M;", "homepage": "https://sites.google.com/view/ganeshkini/home?authuser=0;;;https://jaidevgill.github.io/;https://sites.google.com/view/cthrampo/home", "dblp": "190/7705;326/5424;323/4405;349/4531;127/6532", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;JHGm3eoAAAAJ;", "orcid": ";;;;", "linkedin": ";vala-vakilian-7516ab162/;;jaidev-gill/;", "or_profile": "~Ganesh_Ramachandra_Kini1;~Vala_Vakilian2;~Tina_Behnia1;~Jaidev_Gill1;~Christos_Thrampoulidis1", "aff": "University of California, Santa Barbara;University of British Columbia;University of British Columbia;University of British Columbia;University of British Columbia", "aff_domain": "ucsb.edu;ubc.ca;ubc.ca;ubc.ca;ubc.ca", "position": "PhD student;MS student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nkini2024symmetric,\ntitle={Symmetric Neural-Collapse Representations with Supervised Contrastive Loss: The Impact of Re{LU} and Batching},\nauthor={Ganesh Ramachandra Kini and Vala Vakilian and Tina Behnia and Jaidev Gill and Christos Thrampoulidis},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AyXIDfvYg8}\n}", "github": "", "project": "", "reviewers": "BEvk;ScWC;q7qd;PL8u", "pdf_size": 1269151, "rating": "5;6;6;6", "confidence": "3;4;4;4", "soundness": "4;3;4;4", "contribution": "2;2;3;2", "presentation": "4;3;3;4", "wc_summary": "525;67;92;96", "wc_strengths": "82;41;64;96", "wc_weaknesses": "530;127;85;209", "wc_questions": "210;4;18;38", "wc_review": "1347;239;259;439", "wc_reply_reviewers": "0;0;150;117", "wc_reply_authors": "1074;525;868;876", "reply_reviewers": "0;0;2;2", "reply_authors": "3;2;3;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 195.0, 190.8494170805874 ], "wc_strengths_avg": [ 70.75, 20.58367071248469 ], "wc_weaknesses_avg": [ 237.75, 174.524174543242 ], "wc_questions_avg": [ 67.5, 83.15497579820465 ], "wc_review_avg": [ 571.0, 454.743883961071 ], "wc_reply_reviewers_avg": [ 66.75, 67.76199155869018 ], "wc_reply_authors_avg": [ 835.75, 197.47705562925532 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15271411147837020711&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=AyXIDfvYg8", "pdf": "https://openreview.net/pdf?id=AyXIDfvYg8", "email": "ucsb.edu;ubc.ca;ubc.ca;ubc.ca;ubc.ca", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of California, Santa Barbara;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsb.edu;https://www.ubc.ca", "aff_unique_abbr": "UCSB;UBC", "aff_campus_unique_index": "0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United States;Canada" }, { "title": "Learning Energy-Based Models by Cooperative Diffusion Recovery Likelihood", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19232", "id": "AyzkDpuqcl", "author_site": "yaxuan zhu, Jianwen Xie, Yingnian Wu, Ruiqi Gao", "tldr": "", "abstract": "Training energy-based models (EBMs) on high-dimensional data can be both challenging and time-consuming, and there exists a noticeable gap in sample quality between EBMs and other generative frameworks like GANs and diffusion models. To close this gap, inspired by the recent efforts of learning EBMs by maximimizing diffusion recovery likelihood (DRL), we propose cooperative diffusion recovery likelihood (CDRL), an effective approach to tractably learn and sample from a series of EBMs defined on increasingly noisy versons of a dataset, paired with an initializer model for each EBM. At each noise level, the two models are jointly estimated within a cooperative training framework: Samples from the initializer serve as starting points that are refined by a few MCMC sampling steps from the EBM. The EBM is then optimized by maximizing recovery likelihood, while the initializer model is optimized by learning from the difference between the refined samples and the initial samples. In addition, we made several practical designs for EBM training to further improve the sample quality. Combining these advances, we significantly boost the generation performance compared to existing EBM methods on CIFAR-10 and ImageNet 32x32. And we have shown that CDRL has great potential to largely reduce the sampling time. We also demonstrate the effectiveness of our models for several downstream tasks, including classifier-free guided generation, compositional generation, image inpainting and out-of-distribution detection.", "keywords": "Energy-based model;recovery-likelihood;cooperative learning", "primary_area": "generative models", "supplementary_material": "", "author": "Yaxuan Zhu;Jianwen Xie;Ying Nian Wu;Ruiqi Gao", "authorids": "~Yaxuan_Zhu1;~Jianwen_Xie1;~Ying_Nian_Wu1;~Ruiqi_Gao1", "gender": "M;;;F", "homepage": ";;;http://www.stat.ucla.edu/~ruiqigao/", "dblp": "289/6018;;;206/7084", "google_scholar": "EptgCGsAAAAJ;;;VdlgOXoAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yaxuan_Zhu1;~Jianwen_Xie1;~Ying_Nian_Wu1;~Ruiqi_Gao1", "aff": "University of California, Los Angeles;;;Google", "aff_domain": "ucla.edu;;;google.com", "position": "PhD student;;;Researcher", "bibtex": "@inproceedings{\nzhu2024learning,\ntitle={Learning Energy-Based Models by Cooperative Diffusion Recovery Likelihood},\nauthor={Yaxuan Zhu and Jianwen Xie and Ying Nian Wu and Ruiqi Gao},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=AyzkDpuqcl}\n}", "github": "", "project": "", "reviewers": "S2Sh;nfgg;cEiK;KQ36;9dWK", "pdf_size": 23293565, "rating": "6;6;6;8;8", "confidence": "5;4;4;3;5", "soundness": "3;3;3;3;3", "contribution": "2;2;3;3;2", "presentation": "2;3;3;2;3", "wc_summary": "59;81;51;126;41", "wc_strengths": "6;81;63;53;63", "wc_weaknesses": "53;134;222;78;202", "wc_questions": "101;47;133;303;11", "wc_review": "219;343;469;560;317", "wc_reply_reviewers": "10;33;122;128;54", "wc_reply_authors": "1101;877;2174;1472;1055", "reply_reviewers": "1;1;1;1;1", "reply_authors": "4;4;5;3;3", "rating_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 71.6, 30.223169919781743 ], "wc_strengths_avg": [ 53.2, 25.26974475533934 ], "wc_weaknesses_avg": [ 137.8, 66.32164051046989 ], "wc_questions_avg": [ 119.0, 101.21659942914502 ], "wc_review_avg": [ 381.6, 119.63878969631881 ], "wc_reply_reviewers_avg": [ 69.4, 47.520942751591114 ], "wc_reply_authors_avg": [ 1335.8, 461.65069045762294 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.8, 0.7483314773547882 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.21821789023599233, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12543544673887222111&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=AyzkDpuqcl", "pdf": "https://openreview.net/pdf?id=AyzkDpuqcl", "email": "ucla.edu;;;google.com", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Los Angeles;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ucla.edu;https://www.google.com", "aff_unique_abbr": "UCLA;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Los Angeles;Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "B0OwtVEejJ", "title": "Weight-Entanglement Meets Gradient-Based Neural Architecture Search", "track": "main", "status": "Reject", "tldr": "", "abstract": "Weight sharing is a fundamental concept in neural architecture search (NAS), enabling gradient-based methods to explore cell-based architecture spaces significantly faster than traditional blackbox approaches. In parallel, weight entanglement has emerged as a technique for intricate parameter sharing among architectures within macro-level search spaces. Since weight-entanglement poses compatibility challenges for gradient-based NAS methods, these two paradigms have largely developed independently in parallel sub-communities. This paper aims to bridge the gap between these sub-communities by proposing a novel scheme to adapt gradient-based methods for weight-entangled spaces. This enables us to conduct an in-depth comparative assessment and analysis of the performance of gradient-based NAS in weight-entangled search spaces. Our findings reveal that this integration of weight-entanglement and gradient-based NAS brings forth the various benefits of gradient-based methods (enhanced performance, improved supernet training properties and superior any-time performance), while preserving the memory efficiency of weight-entangled spaces. The code for our work is openly accessible [here](https://anonymous.4open.science/r/TangleNAS-527C).", "keywords": "Neural Architecture Search;Transformers;Weight Entanglement", "primary_area": "optimization", "supplementary_material": "/attachment/8435a982dca96e5ef82ad8f0ea502014437a0691.pdf", "author": "Rhea Sanjay Sukthanker;Arjun Krishnakumar;Mahmoud Safari;Frank Hutter", "authorids": "~Rhea_Sanjay_Sukthanker3;~Arjun_Krishnakumar1;~Mahmoud_Safari1;~Frank_Hutter1", "gender": "F;M;M;M", "homepage": "https://rheasukthanker.github.io/;;https://ml.informatik.uni-freiburg.de/profile/safari/;http://ml.informatik.uni-freiburg.de/~hutter/", "dblp": "277/5077;312/6584;280/3542;89/5383", "google_scholar": "OsamqmMAAAAJ;;https://scholar.google.it/citations?user=ntPjyLwAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ", "orcid": ";;;0000-0002-2037-3694", "linkedin": "rhea-sukthanker-006502116/;arjun-krishnakumar-10235754/;;frank-hutter-9190b24b/", "or_profile": "~Rhea_Sanjay_Sukthanker3;~Arjun_Krishnakumar1;~Mahmoud_Safari1;~Frank_Hutter1", "aff": "University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg;University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg;Universit\u00e4t Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "cs.uni-freiburg.de;cs.uni-freiburg.de;uni-freiburg.de;uni-freiburg.de", "position": "PhD student;Research Engineer;Postdoc;Full Professor", "bibtex": "@misc{\nsukthanker2024weightentanglement,\ntitle={Weight-Entanglement Meets Gradient-Based Neural Architecture Search},\nauthor={Rhea Sanjay Sukthanker and Arjun Krishnakumar and Mahmoud Safari and Frank Hutter},\nyear={2024},\nurl={https://openreview.net/forum?id=B0OwtVEejJ}\n}", "github": "", "project": "", "reviewers": "JeZf;wjLK;2nyZ;b7V3", "site": "https://openreview.net/forum?id=B0OwtVEejJ", "pdf_size": 1708412, "rating": "3;3;5;6", "confidence": "5;4;2;5", "soundness": "2;2;3;3", "contribution": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "46;82;88;50", "wc_strengths": "27;63;91;33", "wc_weaknesses": "101;49;244;104", "wc_questions": "25;119;25;17", "wc_review": "199;313;448;204", "wc_reply_reviewers": "0;0;0;39", "wc_reply_authors": "738;1450;642;512", "reply_reviewers": "0;0;0;1", "reply_authors": "2;3;2;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 66.5, 18.6748493969831 ], "wc_strengths_avg": [ 53.5, 25.588083163847973 ], "wc_weaknesses_avg": [ 124.5, 72.37575560918172 ], "wc_questions_avg": [ 46.5, 41.98511641046146 ], "wc_review_avg": [ 291.0, 101.44703051346549 ], "wc_reply_reviewers_avg": [ 9.75, 16.887495373796554 ], "wc_reply_authors_avg": [ 835.5, 363.7344498394399 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.15713484026367722, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7294994996214458888&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-freiburg.de;https://www.uni-freiburg.de", "aff_unique_abbr": "UoF;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Freiburg;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "B0wJ5oCPdB", "title": "Chain-of-Symbol Prompting for Spatial Relationships in Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "While conventional Chain-of-Thought prompting shows promising performance on various language tasks for LLMs, the spatial scenarios are nearly unexplored. In this paper, we first investigate the performance of LLMs on complex spatial planning and understanding tasks that require LLMs to understand a virtual spatial environment simulated via natural language and act or reason correspondingly in text. By evaluating on classic spatial planning scenarios through natural language descriptions, we found that current popular LLMs such as ChatGPT still lack abilities to handle spatial relationships in texts. This arises a question -- do the natural language is the best way to represent complex spatial environments for LLMs, or maybe other alternatives such as symbolic representations are both more efficient and effective for LLMs? To this end, we propose a novel method called **CoS** (**C**hain-**o**f-**S**ymbol Prompting) that represents the spatial relationships with condensed symbols during the chained intermediate thinking steps. CoS is easy to use and does not need additional training on LLMs. Extensive experiments indicate that CoS clearly surpasses the performance of the Chain-of-Thought (CoT) Prompting described in natural langauge in all three spatial planning tasks and existing spatial QA benchmark, with even fewer tokens used in the inputs compared with CoT. The performance gain is strong, by up to 60.8\\% accuracy (from 31.8\\% to 92.6\\%) on Brick World scenarios for ChatGPT. CoS also reduces the number of tokens in the prompt obviously, by up to 65.8\\% of the tokens (from 407 to 139) for the intermediate steps from demonstrations on the Brick World task.", "keywords": "Large Language Models;Prompting;Spatial Planning;Reasoning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Hanxu Hu;Hongyuan Lu;Huajian Zhang;Yun-Ze Song;Wai Lam;Yue Zhang", "authorids": "~Hanxu_Hu1;~Hongyuan_Lu2;~Huajian_Zhang1;~Yun-Ze_Song1;~Wai_Lam1;~Yue_Zhang7", "gender": "M;M;M;F;M;M", "homepage": "https://hanxuhu.github.io;https://dblp1.uni-trier.de/pid/139/4326.html;https://hjznlp.github.io/;https://yunzesong.github.io/;http://www.se.cuhk.edu.hk/~textmine;http://frcchang.github.io", "dblp": ";139/4326;;;48/1707;47/722-4", "google_scholar": "https://scholar.google.com.hk/citations?user=r9fCUd8AAAAJ;;niE2uWkAAAAJ;qOQwD7UAAAAJ;ewA4NAcAAAAJ;", "orcid": ";;;;;0000-0002-5214-2268", "linkedin": ";luke-lu-595b68136;huajian-zhang-64b213276/;;;", "or_profile": "~Hanxu_Hu1;~Hongyuan_Lu2;~Huajian_Zhang1;~Yun-Ze_Song1;~Wai_Lam1;~Yue_Zhang7", "aff": "University of Edinburgh;The Chinese University of Hong Kong;University of Edinburgh, University of Edinburgh;;The Chinese University of Hong Kong;Westlake University", "aff_domain": "inf.ed.ac.uk;cuhk.edu.hk;ed.ac.uk;;cuhk.edu.hk;westlake.edu.cn", "position": "MS student;Researcher;Vistor;;Professor;Full Professor", "bibtex": "@misc{\nhu2024chainofsymbol,\ntitle={Chain-of-Symbol Prompting for Spatial Relationships in Large Language Models},\nauthor={Hanxu Hu and Hongyuan Lu and Huajian Zhang and Yun-Ze Song and Wai Lam and Yue Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=B0wJ5oCPdB}\n}", "github": "", "project": "", "reviewers": "LjHX;c5jB;tveC;Doc8", "site": "https://openreview.net/forum?id=B0wJ5oCPdB", "pdf_size": 405509, "rating": "6;6;6;6", "confidence": "4;3;3;4", "soundness": "3;3;3;2", "contribution": "3;3;2;2", "presentation": "4;3;3;3", "wc_summary": "41;208;95;72", "wc_strengths": "26;30;73;29", "wc_weaknesses": "166;27;144;250", "wc_questions": "1;65;108;21", "wc_review": "234;330;420;372", "wc_reply_reviewers": "0;0;26;79", "wc_reply_authors": "298;262;833;543", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 63.027771656627685 ], "wc_strengths_avg": [ 39.5, 19.397164741270824 ], "wc_weaknesses_avg": [ 146.75, 79.65354668814189 ], "wc_questions_avg": [ 48.75, 41.30602256330183 ], "wc_review_avg": [ 339.0, 68.47627326307996 ], "wc_reply_reviewers_avg": [ 26.25, 32.251937926270415 ], "wc_reply_authors_avg": [ 484.0, 228.6711612774991 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13789758302587408119&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "University of Edinburgh;Chinese University of Hong Kong;Westlake University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ed.ac.uk;https://www.cuhk.edu.hk;https://www.westlake.edu.cn", "aff_unique_abbr": "Edinburgh;CUHK;WU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "United Kingdom;China" }, { "id": "B1Tl99XWXC", "title": "Efficient Transfer Learning in Diffusion Models via Adversarial Noise", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion Probabilistic Models (DPMs) have demonstrated substantial promise in image generation tasks but heavily rely on the availability of large amounts of training data. Previous works, like GANs, have tackled the limited data problem by transferring pre-trained models learned with sufficient data. However, those methods are hard to be utilized in DPMs since the distinct differences between DPM-based and GAN-based methods, showing in the unique iterative denoising process integral and the need for many timesteps with no-targeted noise in DPMs. In this paper, we propose a novel DPMs-based transfer learning method, TAN, to address the limited data problem. It includes two strategies: similarity-guided training, which boosts transfer with a classifier, and adversarial noise selection which adaptive chooses targeted noise based on the input image. Extensive experiments in the context of few-shot image generation tasks demonstrate that our method is not only efficient but also excels in terms of image quality and diversity when compared to existing GAN-based and DDPM-based methods.", "keywords": "Transfer learning; Few Shot Image Generation; Diffusion model", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/143e48c55f7fb3a45063c3662fb88cf75ce8f0d4.zip", "author": "Xiyu Wang;Baijiong Lin;Daochang Liu;Ying-Cong Chen;Chang Xu", "authorids": "~Xiyu_Wang2;~Baijiong_Lin1;~Daochang_Liu1;~Ying-Cong_Chen1;~Chang_Xu4", "gender": "M;M;M;M;", "homepage": ";https://baijiong-lin.github.io/;https://finspire13.github.io;https://www.yingcong.me/;https://sydney.edu.au/engineering/about/our-people/academic-staff/c-xu.html", "dblp": ";279/2950;222/2701;137/6578;97/2966-2", "google_scholar": ";KVdbYTYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=n7j4bJUAAAAJ;N4F_3eoAAAAJ", "orcid": ";0000-0002-4257-0226;;;0000-0002-4756-0609", "linkedin": "%E6%9B%A6%E5%AE%87-%E7%8E%8B-66b6aa1b3/;;;;", "or_profile": "~Xiyu_Wang2;~Baijiong_Lin1;~Daochang_Liu1;~Ying-Cong_Chen1;~Charles_Xu1", "aff": "University of Sydney;The Hong Kong University of Science and Technology (Guangzhou);University of Sydney;Hong Kong University of Science and Technology;University of Sydney", "aff_domain": "usyd.edu.au;connect.hkust-gz.edu.cn;usyd.edu.au;hkust-gz.edu.cn;sydney.eud.au", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@misc{\nwang2024efficient,\ntitle={Efficient Transfer Learning in Diffusion Models via Adversarial Noise},\nauthor={Xiyu Wang and Baijiong Lin and Daochang Liu and Ying-Cong Chen and Chang Xu},\nyear={2024},\nurl={https://openreview.net/forum?id=B1Tl99XWXC}\n}", "github": "", "project": "", "reviewers": "dyX4;ft3z;Pd9e;jP4S", "site": "https://openreview.net/forum?id=B1Tl99XWXC", "pdf_size": 14548603, "rating": "3;6;6;6", "confidence": "5;3;3;2", "soundness": "2;3;3;3", "contribution": "1;3;3;3", "presentation": "2;3;3;3", "wc_summary": "60;94;162;60", "wc_strengths": "36;39;142;19", "wc_weaknesses": "180;150;70;50", "wc_questions": "14;16;67;7", "wc_review": "290;299;441;136", "wc_reply_reviewers": "0;0;27;0", "wc_reply_authors": "1100;789;792;489", "reply_reviewers": "0;0;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.0, 41.641325627314025 ], "wc_strengths_avg": [ 59.0, 48.52319033204639 ], "wc_weaknesses_avg": [ 112.5, 54.025456962435776 ], "wc_questions_avg": [ 26.0, 23.90606617576384 ], "wc_review_avg": [ 291.5, 107.92242584375131 ], "wc_reply_reviewers_avg": [ 6.75, 11.691342951089922 ], "wc_reply_authors_avg": [ 792.5, 216.0329835927838 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9271726499455307, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11020171793380811836&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "University of Sydney;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;https://www.ust.hk", "aff_unique_abbr": "USYD;HKUST", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Guangzhou;Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "Australia;China" }, { "id": "B1VWS7ZRm6", "title": "On Transferring Expert Knowledge from Tabular Data to Images", "track": "main", "status": "Reject", "tldr": "", "abstract": "Transferring knowledge across modalities has gained considerable attention in machine learning. Expert knowledge in fields like medicine is often represented in tabular form, and transferring this information can enhance the comprehensiveness and accuracy of image-based learning. Unlike general knowledge reuse scenarios, tabular data is divided into numerical and categorical variables, with each column having a unique semantic meaning. In addition, not all columns can be accurately represented in images, making it challenging to determine \"how to reuse\" and \"which subset to reuse\". To address this, we propose a novel method called CHannel tAbulaR alignment with optiMal tranSport (CHARMS) that automatically and effectively transfers relevant tabular knowledge. Specifically, by maximizing the mutual information between a group of channels and tabular features, our method modifies the visual embedding and captures the semantics of tabular knowledge. The alignment between channels and attributes helps select the subset of tabular data which contains knowledge to images. Experimental results demonstrate that CHARMS effectively reuses tabular knowledge to improve the performance and interpretability of visual classifiers.", "keywords": "Multimodal Learning;Tabular Data;Missing Modality", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Jun-Peng Jiang;Han-Jia Ye;Leye Wang;Yang Yang;Yuan Jiang;De-Chuan Zhan", "authorids": "~Jun-Peng_Jiang2;~Han-Jia_Ye1;~Leye_Wang1;~Yang_Yang17;~Yuan_Jiang1;~De-Chuan_Zhan1", "gender": ";M;M;M;F;M", "homepage": "http://www.lamda.nju.edu.cn/jiangjp/;http://www.lamda.nju.edu.cn/yehj;https://wangleye.github.io/;http://www.njustkmg.cn/;http://lamda.nju.edu.cn/jiangy;http://www.lamda.nju.edu.cn/zhandc/", "dblp": "266/2867;165/3014;07/8764;48/450-74;;74/498", "google_scholar": "ZZ_7-TQAAAAJ;mgOYhtoAAAAJ;;_6NJip0AAAAJ;;mYJf4TcAAAAJ", "orcid": ";;;0000-0002-5245-3584;;0000-0002-3533-2078", "linkedin": ";;;;;", "or_profile": "~Jun-Peng_Jiang2;~Han-Jia_Ye1;~Leye_Wang1;~Yang_Yang17;~Yuan_Jiang1;~De-Chuan_Zhan1", "aff": "NanJing University;Nanjing University;Peking University;Nanjing University of Science and Technology;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;pku.edu.cn;njust.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;Associate Professor;Assistant Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\njiang2024on,\ntitle={On Transferring Expert Knowledge from Tabular Data to Images},\nauthor={Jun-Peng Jiang and Han-Jia Ye and Leye Wang and Yang Yang and Yuan Jiang and De-Chuan Zhan},\nyear={2024},\nurl={https://openreview.net/forum?id=B1VWS7ZRm6}\n}", "github": "", "project": "", "reviewers": "QrqR;63tZ;zSmg;UGYo", "site": "https://openreview.net/forum?id=B1VWS7ZRm6", "pdf_size": 2073529, "rating": "5;5;5;6", "confidence": "4;3;4;5", "soundness": "2;3;3;3", "contribution": "2;3;2;3", "presentation": "1;2;2;3", "wc_summary": "64;200;22;100", "wc_strengths": "12;81;66;81", "wc_weaknesses": "300;841;194;139", "wc_questions": "14;94;2;66", "wc_review": "390;1216;284;386", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "723;867;825;863", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 96.5, 65.82362797658604 ], "wc_strengths_avg": [ 60.0, 28.38133189263675 ], "wc_weaknesses_avg": [ 368.5, 278.8678002208215 ], "wc_questions_avg": [ 44.0, 37.57658845611187 ], "wc_review_avg": [ 569.0, 375.9534545658545 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 819.5, 58.07538204781782 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2665088422304130128&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Nanjing University;Peking University;Nanjing University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nju.edu.cn;http://www.pku.edu.cn;http://www.nust.edu.cn/", "aff_unique_abbr": "Nanjing U;Peking U;NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "B21c9hT1D7", "title": "High-dimensional robust regression under heavy-tailed data: Asymptotics and Universality", "track": "main", "status": "Reject", "tldr": "", "abstract": "We investigate the high-dimensional properties of robust regression estimators in the presence of heavy-tailed contamination of both the covariates and response functions. In particular, we provide a sharp asymptotic characterisation of M-estimators trained on a family of elliptical covariate and noise data distributions including cases where second and higher moments do not exist. We show that, despite being consistent, the Huber loss with optimally tuned location parameter $\\delta$ is suboptimal in the high-dimensional regime in the presence of heavy-tailed noise, highlighting the necessity of further regularisation to achieve optimal performance. This result also uncovers the existence of a curious transition in $\\delta$ as a function of the sample complexity and contamination. Moreover, we derive the decay rates for the excess risk of ridge regression. We show that, while it is both optimal and universal for noise distributions with finite second moment, its decay rate can be considerably faster when the covariates' second moment does not exist. Finally, we show that our formulas readily generalise to a richer family of models and data distributions, such as generalised linear estimation with arbitrary convex regularisation trained on mixture models.", "keywords": "High-dimensional statistics;Robust regression;M-estimation;Huber loss;Heavy-tail contamination;Replica trick", "primary_area": "learning theory", "supplementary_material": "/attachment/dd9355e049cfee52697f13694ff2dda6eb552dcd.pdf", "author": "Urte Adomaityte;Leonardo Defilippis;Bruno Loureiro;Gabriele Sicuro", "authorids": "~Urte_Adomaityte1;~Leonardo_Defilippis1;~Bruno_Loureiro1;~Gabriele_Sicuro1", "gender": "F;M;M;M", "homepage": ";;https://brloureiro.github.io/;https://gsicuro.github.io/", "dblp": ";358/3529;207/1834;145/7405", "google_scholar": ";https://scholar.google.fr/citations?user=-df-QMIAAAAJ;DXl3ir8AAAAJ;Lls7QvUAAAAJ", "orcid": "0000-0002-5593-2177;;0000-0002-6327-4688;0000-0002-9258-2436", "linkedin": ";;bruno-loureiro-43183b14a/;", "or_profile": "~Urte_Adomaityte1;~Leonardo_Defilippis1;~Bruno_Loureiro1;~Gabriele_Sicuro1", "aff": "King's College London, University of London;Ecole Normale Sup\u00e9rieure, Ecole Normale Sup\u00e9rieure de Paris;Ecole Normale Sup\u00e9rieure, Ecole Normale Sup\u00e9rieure de Paris;University of Bologna", "aff_domain": "kcl.ac.uk;di.ens.fr;di.ens.fr;unibo.it", "position": "PhD student;PhD student;Researcher;Associate Professor", "bibtex": "@misc{\nadomaityte2024highdimensional,\ntitle={High-dimensional robust regression under heavy-tailed data: Asymptotics and Universality},\nauthor={Urte Adomaityte and Leonardo Defilippis and Bruno Loureiro and Gabriele Sicuro},\nyear={2024},\nurl={https://openreview.net/forum?id=B21c9hT1D7}\n}", "github": "", "project": "", "reviewers": "Fbsx;sjuz;JXsv", "site": "https://openreview.net/forum?id=B21c9hT1D7", "pdf_size": 997354, "rating": "5;6;8", "confidence": "3;3;3", "soundness": "3;3;3", "contribution": "2;2;3", "presentation": "2;2;3", "wc_summary": "35;118;43", "wc_strengths": "11;89;38", "wc_weaknesses": "56;222;29", "wc_questions": "454;473;22", "wc_review": "556;902;132", "wc_reply_reviewers": "116;171;0", "wc_reply_authors": "1112;1010;86", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 65.33333333333333, 37.38389433373088 ], "wc_strengths_avg": [ 46.0, 32.341923257592455 ], "wc_weaknesses_avg": [ 102.33333333333333, 85.33203124006575 ], "wc_questions_avg": [ 316.3333333333333, 208.2695902483659 ], "wc_review_avg": [ 530.0, 314.8883399979534 ], "wc_reply_reviewers_avg": [ 95.66666666666667, 71.27567763425488 ], "wc_reply_authors_avg": [ 736.0, 461.50189598743793 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10697380971436726465&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "King's College London;Ecole Normale Sup\u00e9rieure de Paris;University of Bologna", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kcl.ac.uk;https://www.ens.psl.eu;https://www.unibo.it", "aff_unique_abbr": "KCL;ENS Paris;Unibo", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "United Kingdom;France;Italy" }, { "id": "B37UmlxsaP", "title": "Revealing The Intrinsic Ability of Generative Text Summarizers for Outlier Paragraph Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Generative text summarizers are good at content encapsulation but falter when outlier paragraphs disrupt the primary narrative. We categorize these outliers into cross-document outliers that are thematically inconsistent but within the same domain, and cross-domain outliers, originating from distinct domains. Traditional methods lean on word embeddings and specialized classifiers, requiring extensive supervised fine-tuning. Confidence-based strategies, despite bypassing fine-tuning, are ill-suited due to the non-classification essence of summarization. Leveraging the encoder-decoder cross-attention framework, we introduce an approach emphasizing the unique characteristics of infrequent words in detection. We present CODE, a novel outlier detector using a closed-form expression rooted in cross-attention scores. Our experimental results validate the superiority of CODE under different datasets and architectures, e.g., achieving a 5.80\\% FPR at 95\\% TPR vs. 25.63\\% by supervised baselines on T5-Large and Delve domain. We further underscore the significance of cross-attention, word frequency normalization and judicious integration of cross-document outliers during pretraining.", "keywords": "Outlier Paragraph Detection;Generative Language Models;Cross Attention", "primary_area": "generative models", "supplementary_material": "", "author": "Qi Li;Lyuwen Wu;Lin Liu;Luoyi Fu;Xinbing Wang;Lei Zhou;Chenghu Zhou;Shiyu Liang", "authorids": "~Qi_Li15;~Lyuwen_Wu1;~Lin_Liu16;~Luoyi_Fu1;~Xinbing_Wang1;~Lei_Zhou3;~Chenghu_Zhou3;~Shiyu_Liang1", "gender": "M;F;M;F;M;M;M;M", "homepage": ";;;http://www.cs.sjtu.edu.cn/~fu-ly/index.html;http://www.cs.sjtu.edu.cn/~wang-xb/;;http://www.igsnrr.cas.cn/gkjj/ysfc/ysfc_zhouchenghu/;", "dblp": ";;;;96/1149.html;72/5749;85/1324.html;", "google_scholar": ";;eoj1VFoAAAAJ;https://scholar.google.com.tw/citations?user=xHs9mCUAAAAJ;https://scholar.google.com.tw/citations?user=CT5yZbwAAAAJ;;;L8r9ox4AAAAJ", "orcid": "0000-0001-8089-8348;0009-0007-2607-7072;;;0000-0002-0357-8356;;;", "linkedin": ";;;;;;;", "or_profile": "~Qi_Li15;~Lyuwen_Wu1;~Lin_Liu16;~Luoyi_Fu1;~Xinbing_Wang1;~Lei_Zhou3;~Chenghu_Zhou3;~Shiyu_Liang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;National University of Defense Technology;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;IGSNRR, Chinese Academy of Sciences, Beijing, China;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;nudt.edu.cn;sjtu.edu.cn;cs.sjtu.edu.cn;sjtu.edu.cn;lreis.ac.cn;sjtu.edu.cn", "position": "PhD student;MS student;Associate Professor;Associate Professor;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nli2024revealing,\ntitle={Revealing The Intrinsic Ability of Generative Text Summarizers for Outlier Paragraph Detection},\nauthor={Qi Li and Lyuwen Wu and Lin Liu and Luoyi Fu and Xinbing Wang and Lei Zhou and Chenghu Zhou and Shiyu Liang},\nyear={2024},\nurl={https://openreview.net/forum?id=B37UmlxsaP}\n}", "github": "", "project": "", "reviewers": "tMsa;bZd9;2Dc8;G5oa", "site": "https://openreview.net/forum?id=B37UmlxsaP", "pdf_size": 997499, "rating": "1;3;3;3", "confidence": "4;3;4;3", "soundness": "3;2;2;2", "contribution": "1;1;1;2", "presentation": "2;1;3;2", "wc_summary": "64;29;103;42", "wc_strengths": "12;34;22;35", "wc_weaknesses": "113;137;218;157", "wc_questions": "17;63;132;3", "wc_review": "206;263;475;237", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 59.5, 28.0579756931964 ], "wc_strengths_avg": [ 25.75, 9.443913383762052 ], "wc_weaknesses_avg": [ 156.25, 38.90613704802881 ], "wc_questions_avg": [ 53.75, 50.335747734587194 ], "wc_review_avg": [ 295.25, 105.72221857301331 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PqyAnqwWOFYJ:scholar.google.com/&scioq=Revealing+The+Intrinsic+Ability+of+Generative+Text+Summarizers+for+Outlier+Paragraph+Detection&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0;0;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;National University of Defense Technology;Chinese Academy of Sciences", "aff_unique_dep": ";;IGSNRR", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.nudt.edu.cn/;http://www.cas.cn", "aff_unique_abbr": "SJTU;NUDT;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "B3E8Y8g9GA", "title": "Data Overfitting for On-Device Super-Resolution with Dynamic Algorithm and Compiler Co-Design", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deep neural networks (DNNs) are frequently employed in a variety of computer vision applications. Nowadays, an emerging trend in the current video distribution system is to take the advantage of DNNs overfitting property to perform video resolution upscaling. By splitting videos into chunks and applying a super-resolution (SR) model to overfit each chunk, this scheme of SR models plus video chunks is able to replace traditional video transmission to enhance video quality and transmission efficiency. However, many models and chunks are needed to guarantee a high performance, which leads to tremendous overhead on model switching and memory footprints at the user end. To resolve such problems, we propose a Dynamic Deep neural network assisted by a Content-Aware data processing pipeline to reduce the model number down to one (Dy-DCA), which helps promote performance while conserving computational resources. Additionally, to achieve real acceleration on the user end, we design a framework that optimizes dynamic features (e.g., dynamic shapes, sizes, and control flow) in Dy-DCA to enable a series of compilation optimizations, including fused code generation, static execution planning, etc. By employ such techniques, our method achieves better PSNR and real-time performance (33 FPS) on an off-the-shelf mobile phone. Meanwhile, assisted by our compilation optimization, we achieve 1.7$\\times$ speedup while saving up to 1.61$\\times$ memory consumption.", "keywords": "video super-resolution; overfitting; compiler optimization", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Gen Li;Zhihao Shu;Jie Ji;Minghai Qin;Fatemeh Afghah;Wei Niu;Xiaolong Ma", "authorids": "~Gen_Li4;~Zhihao_Shu1;~Jie_Ji1;~Minghai_Qin1;~Fatemeh_Afghah1;~Wei_Niu3;~Xiaolong_Ma2", "gender": "M;M;;M;F;M;M", "homepage": "https://coulsonlee.github.io;;;https://sites.google.com/site/minghaiqin/home;https://sites.google.com/g.clemson.edu/is-win-lab/home;https://www.niuwei.info;https://xiaolongma2016.com", "dblp": "28/538-12;369/3951;;;70/8821.html;68/828-2.html;", "google_scholar": ";dkxKQq0AAAAJ;;MSgWKbYAAAAJ;https://scholar.google.com.tw/citations?user=67mA71QAAAAJ;w1RoaOMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-2315-1173;;0000-0003-3753-7648", "linkedin": ";;;;fatemeh-afghah-57b53816/;;xiaolong-ma-66b98910b/", "or_profile": "~Gen_Li4;~Zhihao_Shu1;~Jie_Ji1;~Minghai_Qin1;~Fatemeh_Afghah1;~Wei_Niu3;~Xiaolong_Ma2", "aff": "Clemson University;University of Georgia;;Western Digital Corporation;Clemson University;University of Georgia;Clemson University", "aff_domain": "clemson.edu;uga.edu;;wdc.com;clemson.edu;uga.edu;clemson.edu", "position": "PhD student;PhD student;;senior technologist;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nli2024data,\ntitle={Data Overfitting for On-Device Super-Resolution with Dynamic Algorithm and Compiler Co-Design},\nauthor={Gen Li and Zhihao Shu and Jie Ji and Minghai Qin and Fatemeh Afghah and Wei Niu and Xiaolong Ma},\nyear={2024},\nurl={https://openreview.net/forum?id=B3E8Y8g9GA}\n}", "github": "", "project": "", "reviewers": "nwgm;wfAU;J3a9;PF2v", "site": "https://openreview.net/forum?id=B3E8Y8g9GA", "pdf_size": 16068186, "rating": "5;5;5;6", "confidence": "4;3;2;3", "soundness": "2;3;2;3", "contribution": "2;3;3;2", "presentation": "3;2;2;3", "wc_summary": "75;24;97;219", "wc_strengths": "70;14;25;253", "wc_weaknesses": "66;45;22;93", "wc_questions": "4;108;89;25", "wc_review": "215;191;233;590", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "814;557;114;721", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 103.75, 71.61485530251387 ], "wc_strengths_avg": [ 90.5, 96.13662153414795 ], "wc_weaknesses_avg": [ 56.5, 26.196373794859472 ], "wc_questions_avg": [ 56.5, 43.176961449365564 ], "wc_review_avg": [ 307.25, 163.92433467914395 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 551.5, 268.82754695157263 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:45YcQKaSy-sJ:scholar.google.com/&scioq=Data+Overfitting+for+On-Device+Super-Resolution+with+Dynamic+Algorithm+and+Compiler+Co-Design&hl=en&as_sdt=0,33", "gs_version_total": 6, "aff_unique_index": "0;1;2;0;1;0", "aff_unique_norm": "Clemson University;University of Georgia;Western Digital Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.clemson.edu;https://www.uga.edu;https://www.westerndigital.com", "aff_unique_abbr": "Clemson;UGA;WDC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "B4E2BW27MP", "title": "PromptCoT: Align Prompt Distribution via Adapted Chain-of-Thought", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Diffusion-based generative models have exhibited remarkable capability in the production of high-fidelity visual content such as images and videos. However, their performance is significantly contingent upon the quality of textual inputs, commonly referred to as \"prompts\".\n The process of traditional prompt engineering, while effective, necessitates empirical expertise and poses challenges for inexperienced users.\n In this paper, we introduce PromptCoT, an innovative enhancer that autonomously refines prompts for users.\n The design of PromptCoT is based on the observation that, prompts resembling textual information corresponding to high-quality images within the training set tend to yield superior generation performance.\n As such, we fine-tune the pre-trained Large Language Models (LLM) using a curated text dataset comprising solely of high-quality visual content descriptions. By doing so, the LLM becomes capable of capturing the distribution of high-quality training texts, enabling it to generate aligned continuations and revisions to boost the original texts.\n Nonetheless, one drawback of pre-trained LLMs is their tendency to generate extraneous or irrelevant information. To enhance the alignment between the original text prompts and the refined counterparts, we leverage the Chain-of-Thought (CoT) mechanism. CoT can extract and amalgamate crucial information from the aligned continuation and revision, enabling reasonable inferences based on the contextual cues to produce a more comprehensive and nuanced final output. \n Considering computational efficiency, instead of allocating a dedicated LLM for prompt enhancement to each individual model or dataset, we integrate adapters that facilitate dataset-specific adaptation, leveraging a shared pre-trained LLM as the foundation for this process. \n By fine-tuning these adapters independently, we can adapt PromptCoT to new datasets with minimal increase in training cost and memory usage.\n We assess the performance of PromptCoT on widely-used latent diffusion models for image and video generation to validate the effectiveness. The results demonstrate significant improvements in key performance metrics.", "keywords": "text-to-image generative models;prompt engineering;Chain of Thought;parameter efficient adaptation;Large Language Models", "primary_area": "generative models", "supplementary_material": "/attachment/91f80fc6717d44aa9baf63e2157ee385b815414f.pdf", "author": "Junyi Yao;Yijiang Liu;Zhen Dong;Mingfei Guo;Kurt Keutzer;Li Du;Daquan Zhou;Shanghang Zhang", "authorids": "~Junyi_Yao1;~Yijiang_Liu2;~Zhen_Dong3;~Mingfei_Guo1;~Kurt_Keutzer1;~Li_Du5;~Daquan_Zhou1;~Shanghang_Zhang4", "gender": "M;M;M;F;M;M;M;F", "homepage": ";;https://dong-zhen.com/;https://www.linkedin.com/in/mingfeiguo/;https://people.eecs.berkeley.edu/~keutzer/;;https://iscl.nju.edu.cn/main.psp;https://www.shanghangzhang.com/", "dblp": ";;;;k/KurtKeutzer.html;244/9623;;95/11531", "google_scholar": ";uOyz518AAAAJ;czxMUzcAAAAJ;;ID9QePIAAAAJ;DdCAbWwAAAAJ;;voqw10cAAAAJ", "orcid": "0009-0002-1437-2836;0000-0001-5914-1607;;;0000-0003-3868-8501;;0000-0003-2687-6978;", "linkedin": ";;zhen-dong/;;kurtkeutzer/;;;", "or_profile": "~Junyi_Yao1;~Yijiang_Liu2;~Zhen_Dong3;~Mingfei_Guo1;~Kurt_Keutzer1;~Zhou_Daquan1;~LI_DU4;~Shanghang_Zhang1", "aff": "Peking University;Nanjing Universiy;Nexusflow.ai Inc;Stanford University;University of California, Berkeley;Bytedance;Nanjing University;Peking University", "aff_domain": "stu.pku.edu.cn;nju.edu.cn;nexusflow.ai;stanford.edu;berkeley.edu;bytedance.com;nju.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;Principal Researcher;MS student;Full Professor;Researcher;Associate Professor;Assistant Professor", "bibtex": "@misc{\nyao2024promptcot,\ntitle={PromptCoT: Align Prompt Distribution via Adapted Chain-of-Thought},\nauthor={Junyi Yao and Yijiang Liu and Zhen Dong and Mingfei Guo and Kurt Keutzer and Li Du and Daquan Zhou and Shanghang Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=B4E2BW27MP}\n}", "github": "", "project": "", "reviewers": "BjAS;zdHw;YkZW", "site": "https://openreview.net/forum?id=B4E2BW27MP", "pdf_size": 9156801, "rating": "3;3;6", "confidence": "3;3;2", "soundness": "2;2;3", "contribution": "1;2;3", "presentation": "2;2;3", "wc_summary": "115;54;111", "wc_strengths": "38;43;81", "wc_weaknesses": "226;203;2", "wc_questions": "8;4;264", "wc_review": "387;304;458", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 93.33333333333333, 27.86076492528915 ], "wc_strengths_avg": [ 54.0, 19.200694431886227 ], "wc_weaknesses_avg": [ 143.66666666666666, 100.6125682451695 ], "wc_questions_avg": [ 92.0, 121.63332876587184 ], "wc_review_avg": [ 383.0, 62.93382768167424 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2816106117694485180&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;3;4;5;1;0", "aff_unique_norm": "Peking University;Nanjing University;Nexusflow.ai;Stanford University;University of California, Berkeley;ByteDance", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.pku.edu.cn;http://www.nju.edu.cn;https://www.nexusflow.ai;https://www.stanford.edu;https://www.berkeley.edu;https://www.bytedance.com", "aff_unique_abbr": "Peking U;Nanjing U;Nexusflow.ai;Stanford;UC Berkeley;Bytedance", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Berkeley", "aff_country_unique_index": "0;0;1;1;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "B4XM9nQ8Ns", "title": "HyperSINDy: Deep Generative Modeling of Nonlinear Stochastic Governing Equations", "track": "main", "status": "Reject", "tldr": "", "abstract": "The discovery of governing differential equations from data is an open frontier in machine learning. The {\\em sparse identification of nonlinear dynamics} (SINDy) \\citep{brunton_discovering_2016} framework enables data-driven discovery of interpretable models in the form of sparse, deterministic governing laws. Recent works have sought to adapt this approach to the stochastic setting, though these adaptations are severely hampered by the curse of dimensionality. On the other hand, Bayesian-inspired deep learning methods have achieved widespread success in high-dimensional probabilistic modeling via computationally efficient approximate inference techniques, suggesting the use of these techniques for efficient stochastic equation discovery. Here, we introduce {\\em HyperSINDy}, a framework for modeling stochastic dynamics via a deep generative model of sparse, nonlinear governing equations whose parametric form is discovered from data. HyperSINDy employs a variational encoder to approximate the distribution of observed states and derivatives. A hypernetwork \\citep{ha_hypernetworks_2016} transforms samples from this distribution into the coefficients of a differential equation whose sparse form is learned simultaneously using a trainable binary mask \\citep{louizos_learning_2018}. Once trained, HyperSINDy generates stochastic dynamics via a differential equation whose coefficients are driven by a Wiener process. In experiments HyperSINDy accurately recovers ground truth stochastic governing equations, with stochasticity scaled to match that of the data. Finally, HyperSINDy provides uncertainty quantification that scales to high-dimensional systems, retaining computational efficiency and interpretability. Taken together, HyperSINDy offers a promising framework for model discovery and uncertainty quantification in real-world systems, integrating sparse equation discovery methods with advances in statistical machine learning and deep generative modeling.", "keywords": "generative modeling;deep learning;equation discovery;system identification;VAE;hypernetwork;SINDy", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/41c0846f555c3053af7c564d2f777bd42388eb26.zip", "author": "Mozes Jacobs;Bingni W Brunton;Steven Brunton;J. Nathan Kutz;Ryan V. Raut", "authorids": "~Mozes_Jacobs1;~Bingni_W_Brunton1;~Steven_Brunton1;~J._Nathan_Kutz1;~Ryan_V._Raut1", "gender": "M;F;M;M;M", "homepage": "https://mozesjacobs.github.io;https://www.bingbrunton.com;https://eigensteve.com;http://faculty.washington.edu/kutz;https://ryraut.github.io/", "dblp": ";138/7786;;;", "google_scholar": ";UftAYPkAAAAJ;TjzWdigAAAAJ;;fafSHeYAAAAJ", "orcid": ";0000-0002-4831-3466;;0000-0002-6004-2275;0000-0002-8761-1431", "linkedin": ";;;;", "or_profile": "~Mozes_Jacobs1;~Bingni_W_Brunton1;~Steven_Brunton1;~J._Nathan_Kutz1;~Ryan_V._Raut1", "aff": "Harvard University, Harvard University;University of Washington, Seattle;;University of Washington;Allen Institute", "aff_domain": "g.harvard.edu;uw.edu;;u.washington.edu;alleninstitute.org", "position": "PhD student;Associate Professor;;Full Professor;Researcher", "bibtex": "@misc{\njacobs2024hypersindy,\ntitle={Hyper{SIND}y: Deep Generative Modeling of Nonlinear Stochastic Governing Equations},\nauthor={Mozes Jacobs and Bingni W Brunton and Steven Brunton and J. Nathan Kutz and Ryan V. Raut},\nyear={2024},\nurl={https://openreview.net/forum?id=B4XM9nQ8Ns}\n}", "github": "", "project": "", "reviewers": "WJp7;7UM6;Bwz8;UKtm;cj4i;kqcu", "site": "https://openreview.net/forum?id=B4XM9nQ8Ns", "pdf_size": 16711409, "rating": "5;5;6;6;6;8", "confidence": "5;3;5;3;3;3", "soundness": "2;2;3;3;2;4", "contribution": "2;2;3;3;2;4", "presentation": "3;3;3;3;3;4", "wc_summary": "135;58;30;75;78;79", "wc_strengths": "58;32;62;34;48;147", "wc_weaknesses": "68;122;17;55;38;73", "wc_questions": "78;32;441;83;112;100", "wc_review": "339;244;550;247;276;399", "wc_reply_reviewers": "0;47;9;39;0;77", "wc_reply_authors": "715;1489;1405;501;875;1155", "reply_reviewers": "0;1;1;1;0;1", "reply_authors": "1;3;2;1;2;3", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.7453559924999298 ], "contribution_avg": [ 2.6666666666666665, 0.7453559924999298 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 75.83333333333333, 31.450578514375355 ], "wc_strengths_avg": [ 63.5, 38.96045003162395 ], "wc_weaknesses_avg": [ 62.166666666666664, 32.67729418962892 ], "wc_questions_avg": [ 141.0, 136.4648916999045 ], "wc_review_avg": [ 342.5, 107.63015376742709 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 28.311756490114767 ], "wc_reply_authors_avg": [ 1023.3333333333334, 358.17950186401725 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3535533905932737, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5622348542617702222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Harvard University;University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.harvard.edu;https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "Harvard;UW;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "B4nhr6OJWI", "title": "Instilling Inductive Biases with Subnetworks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite the recent success of artificial neural networks on a variety of tasks, we have little knowledge or control over the exact solutions these models implement. Instilling inductive biases \u2014 preferences for some solutions over others \u2014 into these models is one promising path toward understanding and controlling their behavior. Much work has been done to study the inherent inductive biases of models and instill different inductive biases through hand-designed architectures or carefully curated training regimens. In this work, we explore a more mechanistic approach: Subtask Induction. Our method discovers a functional subnetwork that implements a particular subtask within a trained model and uses it to instill inductive biases towards solutions utilizing that subtask. Subtask Induction is flexible and efficient, and we demonstrate its effectiveness with two experiments. First, we show that Subtask Induction significantly reduces the amount of training data required for a model to adopt a specific, generalizable solution to a modular arithmetic task. Second, we demonstrate that Subtask Induction successfully induces a human-like shape bias while increasing data efficiency for convolutional and transformer-based image classification models.", "keywords": "inductive bias;generalization", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Enyan Zhang;Michael A. Lepori;Ellie Pavlick", "authorids": "~Enyan_Zhang1;~Michael_A._Lepori1;~Ellie_Pavlick1", "gender": ";M;F", "homepage": "https://enyanz.com;https://lepori.xyz/;http://cs.brown.edu/people/epavlick/", "dblp": ";262/0162;141/4059", "google_scholar": "qbL4zikAAAAJ;G1fepc8AAAAJ;sFyrSa8AAAAJ", "orcid": ";;", "linkedin": ";michael-lepori-925426124/;", "or_profile": "~Enyan_Zhang1;~Michael_A._Lepori1;~Ellie_Pavlick1", "aff": "Brown University;Brown University;Brown University", "aff_domain": "brown.edu;brown.edu;brown.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@misc{\nzhang2024instilling,\ntitle={Instilling Inductive Biases with Subnetworks},\nauthor={Enyan Zhang and Michael A. Lepori and Ellie Pavlick},\nyear={2024},\nurl={https://openreview.net/forum?id=B4nhr6OJWI}\n}", "github": "", "project": "", "reviewers": "wyEr;pnkg;PuHj", "site": "https://openreview.net/forum?id=B4nhr6OJWI", "pdf_size": 1727742, "rating": "6;6;8", "confidence": "3;3;3", "soundness": "3;2;4", "contribution": "3;2;3", "presentation": "4;4;4", "wc_summary": "78;268;116", "wc_strengths": "17;31;116", "wc_weaknesses": "87;154;92", "wc_questions": "349;282;22", "wc_review": "531;735;346", "wc_reply_reviewers": "96;0;0", "wc_reply_authors": "1495;1713;339", "reply_reviewers": "2;0;0", "reply_authors": "4;3;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 154.0, 82.0893821798329 ], "wc_strengths_avg": [ 54.666666666666664, 43.74420596553966 ], "wc_weaknesses_avg": [ 111.0, 30.474032661705056 ], "wc_questions_avg": [ 217.66666666666666, 141.0350626223454 ], "wc_review_avg": [ 537.3333333333334, 158.87171624371098 ], "wc_reply_reviewers_avg": [ 32.0, 45.254833995939045 ], "wc_reply_authors_avg": [ 1182.3333333333333, 602.931357803044 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6901432766231011120&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Brown University", "aff_unique_dep": "", "aff_unique_url": "https://www.brown.edu", "aff_unique_abbr": "Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "P2Seg: Pointly-supervised Segmentation via Mutual Distillation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19231", "id": "B4vzu2aokv", "author_site": "Zipeng Wang, Xuehui Yu, Xumeng Han, Wenwen Yu, Zhixun Huang, Jianbin Jiao, Zhenjun Han", "tldr": "", "abstract": "Point-level Supervised Instance Segmentation (PSIS) aims to enhance the applicability and scalability of instance segmentation by utilizing low-cost yet instance-informative annotations. Existing PSIS methods usually rely on positional information to distinguish objects, but predicting precise boundaries remains challenging due to the lack of contour annotations. Nevertheless, weakly supervised semantic segmentation methods are proficient in utilizing intra-class feature consistency to capture the boundary contours of the same semantic regions. In this paper, we design a Mutual Distillation Module (MDM) to leverage the complementary strengths of both instance position and semantic information and achieve accurate instance-level object perception. The MDM consists of Semantic to Instance (S2I) and Istance to Semantic (I2S). S2I is guided by the precise boundaries of semantic regions to learn the association between annotated points and instance contours. I2S leverages discriminative relationships between instances to facilitate the differentiation of various objects within the semantic map. Extensive experiments substantiate the efficacy of MDM in fostering the synergy between instance and semantic information, consequently improving the quality of instance-level object representations. Our method achieves 55.7 mAP50 and 17.6 mAP on the PASCAL VOC and MS COCO datasets, significantly outperforming recent PSIS methods and several box-supervised instance segmentation competitors.", "keywords": "Mutual Distillation;Semantic to Instance;Instance to Semantic;Point-level Supervised Instance Segmentation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/86d4f1caa1f211657373db8c23b9b64e7d261e1c.pdf", "author": "Zipeng Wang;Xuehui Yu;Xumeng Han;Wenwen Yu;Zhixun Huang;Jianbin Jiao;Zhenjun Han", "authorids": "~Zipeng_Wang2;~Xuehui_Yu1;~Xumeng_Han1;~Wenwen_Yu2;~Zhixun_Huang1;~Jianbin_Jiao1;~Zhenjun_Han1", "gender": "M;M;;;M;M;M", "homepage": "https://vision.ucas.ac.cn/;;http://vision.ucas.ac.cn/;;http://lamp.ucas.ac.cn/;https://people.ucas.ac.cn/~hanzhj;https://yinglang.github.io/", "dblp": ";297/3745;;;;11/2938;243/8603", "google_scholar": ";https://scholar.google.cz/citations?user=LWu_FiQAAAAJ;;;;0rK4yTcAAAAJ;WYrxoBEAAAAJ", "orcid": ";0000-0002-1636-463X;;0000-0002-8941-2700;;;", "linkedin": ";;;;;;", "or_profile": "~Zipeng_Wang2;~Xumeng_Han1;~Wenwen_Yu2;~Zhixun_Huang1;~Jianbin_Jiao1;~Zhenjun_Han1;~hui_ying2", "aff": "University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;;ucas.ac.cn;ucas.ac.cn;mails.ucas.ac.cn", "position": "MS student;PhD student;MS student;;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nwang2024pseg,\ntitle={P2Seg: Pointly-supervised Segmentation via Mutual Distillation},\nauthor={Zipeng Wang and Xuehui Yu and Xumeng Han and Wenwen Yu and Zhixun Huang and Jianbin Jiao and Zhenjun Han},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=B4vzu2aokv}\n}", "github": "", "project": "", "reviewers": "QcUt;3EDk;ZJkY;enoR;fR5S", "pdf_size": 2915691, "rating": "3;6;6;6;8", "confidence": "4;4;3;4;5", "soundness": "1;3;3;2;3", "contribution": "2;3;3;2;3", "presentation": "2;2;3;1;3", "wc_summary": "45;67;39;104;76", "wc_strengths": "125;65;29;12;95", "wc_weaknesses": "983;189;134;134;164", "wc_questions": "39;57;15;4;9", "wc_review": "1192;378;217;254;344", "wc_reply_reviewers": "58;16;66;28;0", "wc_reply_authors": "2791;1410;1491;1448;1015", "reply_reviewers": "1;1;1;1;0", "reply_authors": "5;2;3;2;2", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.4, 0.8 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 66.2, 23.301502097504358 ], "wc_strengths_avg": [ 65.2, 41.48445491988535 ], "wc_weaknesses_avg": [ 320.8, 331.73929523045655 ], "wc_questions_avg": [ 24.8, 20.08382433701311 ], "wc_review_avg": [ 477.0, 362.2275527896794 ], "wc_reply_reviewers_avg": [ 33.6, 24.96076921891631 ], "wc_reply_authors_avg": [ 1631.0, 604.4809343560804 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.8, 1.16619037896906 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3952847075210474, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7416667966264732466&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=B4vzu2aokv", "pdf": "https://openreview.net/pdf?id=B4vzu2aokv", "email": "ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;;ucas.ac.cn;ucas.ac.cn;mails.ucas.ac.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Chinese Academy of Sciences", "aff_unique_dep": "", "aff_unique_url": "http://www.ucas.ac.cn", "aff_unique_abbr": "UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "B5CgCJY2po", "title": "Flood and Echo: Algorithmic Alignment of GNNs with Distributed Computing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks are a natural fit for learning algorithms. They can directly represent tasks through an abstract but versatile graph structure and handle inputs of different sizes. This opens up the possibility for scaling and extrapolation to larger graphs, one of the most important advantages of an algorithm. However, this raises two core questions i) How can we enable nodes to gather the required information in a given graph ($\\textit{information exchange}$), even if is far away and ii) How can we design an execution framework which enables this information exchange for extrapolation to larger graph sizes ($\\textit{algorithmic alignment for extrapolation}$). We propose a new execution framework that is inspired by the design principles of distributed algorithms: Flood and Echo Net. It propagates messages through the entire graph in a wave like activation pattern, which naturally generalizes to larger instances. Through its sparse but parallel activations it is provably more efficient in terms of message complexity. We study the proposed model and provide both empirical evidence and theoretical insights in terms of its expressiveness, efficiency, information exchange and ability to extrapolate.", "keywords": "GNN;Extrapolation;Algorithm Learning;Distributed Computing", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Jo\u00ebl Mathys;Florian Gr\u00f6tschla;Kalyan Varma Nadimpalli;Roger Wattenhofer", "authorids": "~Jo\u00ebl_Mathys1;~Florian_Gr\u00f6tschla1;~Kalyan_Varma_Nadimpalli1;~Roger_Wattenhofer1", "gender": ";M;M;Not Specified", "homepage": ";https://disco.ethz.ch/members/fgroetschla;;https://disco.ethz.ch/members/wroger", "dblp": ";334/1811;;w/RogerWattenhofer", "google_scholar": ";;5fONIZkAAAAJ;https://scholar.google.ch/citations?user=EG3VPm4AAAAJ", "orcid": ";;;", "linkedin": ";;;roger-wattenhofer-4466731/", "or_profile": "~Jo\u00ebl_Mathys1;~Florian_Gr\u00f6tschla1;~Kalyan_Varma_Nadimpalli1;~Roger_Wattenhofer1", "aff": ";Oracle Labs;Department of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras;Swiss Federal Institute of Technology", "aff_domain": ";oracle.com;cse.iitm.ac.in;ethz.ch", "position": ";Intern;Intern;Full Professor", "bibtex": "@misc{\nmathys2024flood,\ntitle={Flood and Echo: Algorithmic Alignment of {GNN}s with Distributed Computing},\nauthor={Jo{\\\"e}l Mathys and Florian Gr{\\\"o}tschla and Kalyan Varma Nadimpalli and Roger Wattenhofer},\nyear={2024},\nurl={https://openreview.net/forum?id=B5CgCJY2po}\n}", "github": "", "project": "", "reviewers": "gPuo;VsT5;hXqD;W7g1", "site": "https://openreview.net/forum?id=B5CgCJY2po", "pdf_size": 952795, "rating": "3;3;5;8", "confidence": "4;4;4;3", "soundness": "2;2;1;4", "contribution": "2;1;1;4", "presentation": "2;3;1;4", "wc_summary": "62;79;139;29", "wc_strengths": "67;71;48;46", "wc_weaknesses": "260;128;238;45", "wc_questions": "113;78;94;49", "wc_review": "502;356;519;169", "wc_reply_reviewers": "157;125;0;0", "wc_reply_authors": "1071;1557;935;383", "reply_reviewers": "1;1;0;0", "reply_authors": "2;3;2;1", "rating_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 1.0897247358851685 ], "contribution_avg": [ 2.0, 1.224744871391589 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 77.25, 39.927277643235335 ], "wc_strengths_avg": [ 58.0, 11.113055385446435 ], "wc_weaknesses_avg": [ 167.75, 86.7363101590101 ], "wc_questions_avg": [ 83.5, 23.4574082114798 ], "wc_review_avg": [ 386.5, 140.65294166849125 ], "wc_reply_reviewers_avg": [ 70.5, 71.40203078344481 ], "wc_reply_authors_avg": [ 986.5, 418.1731100871982 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9169493006161777, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kCCViVDaxREJ:scholar.google.com/&scioq=Flood+and+Echo:+Algorithmic+Alignment+of+GNNs+with+Distributed+Computing&hl=en&as_sdt=0,47", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "Oracle Corporation;Indian Institute of Technology, Madras;Swiss Federal Institute of Technology", "aff_unique_dep": "Oracle Labs;Department of Computer Science;", "aff_unique_url": "https://labs.oracle.com;https://www.iitm.ac.in;https://www.ethz.ch", "aff_unique_abbr": "Oracle Labs;IIT Madras;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madras", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;India;Switzerland" }, { "id": "B5Tp4WwZl8", "title": "Error Feedback Shines when Features are Rare", "track": "main", "status": "Reject", "tldr": "", "abstract": "We provide the first proof that gradient descent $\\left({\\color{green}\\sf GD}\\right)$ with greedy sparsification $\\left({\\color{green}\\sf TopK}\\right)$ and error feedback $\\left({\\color{green}\\sf EF}\\right)$ can obtain better communication complexity than vanilla ${\\color{green}\\sf GD}$ when solving the distributed optimization problem $\\min_{x\\in \\mathbb{R}^d} {f(x)=\\frac{1}{n}\\sum_{i=1}^n f_i(x)}$, where $n$ = # of clients, $d$ = # of features, and $f_1,\\dots,f_n$ are smooth nonconvex functions. Despite intensive research since 2014 when ${\\color{green}\\sf EF}$ was first proposed by Seide et al., this problem remained open until now. Perhaps surprisingly, we show that ${\\color{green}\\sf EF}$ shines in the regime when features are rare, i.e., when each feature is present in the data owned by a small number of clients only. To illustrate our main result, we show that in order to find a random vector $\\hat{x}$ such that $\\lVert {\\nabla f(\\hat{x})} \\rVert^2 \\leq \\varepsilon$ in expectation, ${\\color{green}\\sf GD}$ with the ${\\color{green}\\sf Top1}$ sparsifier and ${\\color{green}\\sf EF}$ requires ${\\cal O} \\left( \\left( L +\n{\\color{blue}r} \\sqrt{ \\frac{{\\color{red}c}}{n} \\min \\left( \\frac{{\\color{red}c}}{n} \\max_i L_i^2, \\frac{1}{n}\\sum_{i=1}^n L_i^2 \\right) }\n\\right) \\frac{1}{\\varepsilon} \\right)$ bits to be communicated by each worker to the server only, where $L$ is the smoothness constant of $f$, $L_i$ is the smoothness constant of $f_i$, ${\\color{red}c}$ is the maximal number of clients owning any feature ($1\\leq {\\color{red}c} \\leq n$), and ${\\color{blue}r}$ is the maximal number of features owned by any client ($1\\leq {\\color{blue}r} \\leq d$). Clearly, the communication complexity improves as ${\\color{red}c}$ decreases (i.e., as features become more rare), and can be much better than the ${\\cal O}({\\color{blue}r} L \\frac{1}{\\varepsilon})$ communication complexity of ${\\color{green}\\sf GD}$ in the same regime.", "keywords": "error feedback;greedy sparsification;distributed optimization;communication complexity", "primary_area": "optimization", "supplementary_material": "/attachment/6584da25926a6f7cc21f5f7b17e343403d8decca.zip", "author": "Peter Richt\u00e1rik;Elnur Gasanov;Konstantin Pavlovich Burlachenko", "authorids": "~Peter_Richt\u00e1rik1;~Elnur_Gasanov1;~Konstantin_Pavlovich_Burlachenko1", "gender": "M;M;M", "homepage": "https://elnurgasanov.com;https://burlachenkok.github.io/;https://richtarik.org", "dblp": "231/7651;;62/8001", "google_scholar": ";3pA-LoQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-4380-5848", "linkedin": ";burlachenkok/;richtarik/", "or_profile": "~Elnur_Gasanov1;~Konstantin_Pavlovich_Konstantin_Burlachenko1;~Peter_Richtarik1", "aff": "KAUST;;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;;kaust.edu.sa", "position": "PhD student;;Full Professor", "bibtex": "@misc{\nricht{\\'a}rik2024error,\ntitle={Error Feedback Shines when Features are Rare},\nauthor={Peter Richt{\\'a}rik and Elnur Gasanov and Konstantin Pavlovich Burlachenko},\nyear={2024},\nurl={https://openreview.net/forum?id=B5Tp4WwZl8}\n}", "github": "", "project": "", "reviewers": "XpPb;h2wk;CuB9;xzAm", "site": "https://openreview.net/forum?id=B5Tp4WwZl8", "pdf_size": 1608153, "rating": "3;6;8;8", "confidence": "4;3;3;4", "soundness": "2;3;4;4", "contribution": "2;3;3;3", "presentation": "2;3;4;4", "wc_summary": "64;91;63;48", "wc_strengths": "41;30;19;39", "wc_weaknesses": "345;39;15;84", "wc_questions": "3;23;59;3", "wc_review": "453;183;156;174", "wc_reply_reviewers": "0;0;0;6", "wc_reply_authors": "2091;190;12;460", "reply_reviewers": "0;0;0;1", "reply_authors": "6;1;2;2", "rating_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 66.5, 15.5 ], "wc_strengths_avg": [ 32.25, 8.699856320652657 ], "wc_weaknesses_avg": [ 120.75, 131.81876763192713 ], "wc_questions_avg": [ 22.0, 22.869193252058544 ], "wc_review_avg": [ 241.5, 122.49591829934579 ], "wc_reply_reviewers_avg": [ 1.5, 2.598076211353316 ], "wc_reply_authors_avg": [ 688.25, 825.4351503903865 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3665083330689157, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FQtWzFELb4MJ:scholar.google.com/&scioq=Error+Feedback+Shines+when+Features+are+Rare&hl=en&as_sdt=0,33", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Saudi Arabia" }, { "id": "B5kAfAC7hO", "title": "Provable Representation with Efficient Planning for Partially Observable Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "In real-world reinforcement learning problems, the state information is often only partially observable, which breaks the basic assumption in Markov decision processes, and thus, leads to inferior performances. Partially Observable Markov Decision Processes have been introduced to explicitly take the issue into account for learning, exploration, and planning, but presenting significant computational and statistical challenges. To address these difficulties, we exploit the representation view, which leads to a coherent design framework for a practically tractable reinforcement learning algorithm upon partial observations. We provide a theoretical analysis for justifying the statistical efficiency of the proposed algorithm. We also empirically demonstrate the proposed algorithm can surpass state-of-the-art performance with partial observations across various benchmarks, therefore, pushing reliable reinforcement learning towards more practical applications.", "keywords": "reinforcement learning;partial observability;representation learning", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/fc5c4ac9bbd40be966c5edeae8b4fb523f38941d.pdf", "author": "Hongming Zhang;Tongzheng Ren;Chenjun Xiao;Dale Schuurmans;Bo Dai", "authorids": "~Hongming_Zhang3;~Tongzheng_Ren1;~Chenjun_Xiao1;~Dale_Schuurmans1;~Bo_Dai1", "gender": "M;M;;;", "homepage": "https://github.com/initial-h;https://www.cs.utexas.edu/~tzren/;https://chenjun-x.github.io/;;https://bo-dai.github.io/", "dblp": ";211/8004;178/8641;;64/2903", "google_scholar": "https://scholar.google.ca/citations?user=mwbsY3AAAAAJ;VgNDYeYAAAAJ;;;TIKl_foAAAAJ", "orcid": ";;0000-0002-5493-1500;;0009-0002-8070-574X", "linkedin": ";;;;", "or_profile": "~Hongming_Zhang3;~Tongzheng_Ren1;~Chenjun_Xiao1;~Dale_Schuurmans1;~Bo_Dai1", "aff": "University of Alberta;University of Texas, Austin;Huawei Technologies Ltd.;;Google Brain", "aff_domain": "ualberta.ca;utexas.edu;huawei.com;;google.com", "position": "PhD student;PhD student;Researcher;;Research Scientist", "bibtex": "@misc{\nzhang2024provable,\ntitle={Provable Representation with Efficient Planning for Partially Observable Reinforcement Learning},\nauthor={Hongming Zhang and Tongzheng Ren and Chenjun Xiao and Dale Schuurmans and Bo Dai},\nyear={2024},\nurl={https://openreview.net/forum?id=B5kAfAC7hO}\n}", "github": "", "project": "", "reviewers": "rUpE;wQNF;cvkF", "site": "https://openreview.net/forum?id=B5kAfAC7hO", "pdf_size": 1082436, "rating": "5;5;6", "confidence": "3;3;3", "soundness": "3;2;2", "contribution": "2;3;2", "presentation": "1;1;2", "wc_summary": "99;48;72", "wc_strengths": "58;63;142", "wc_weaknesses": "172;280;195", "wc_questions": "120;63;24", "wc_review": "449;454;433", "wc_reply_reviewers": "1246;0;0", "wc_reply_authors": "2250;479;457", "reply_reviewers": "3;0;0", "reply_authors": "5;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 73.0, 20.83266665599966 ], "wc_strengths_avg": [ 87.66666666666667, 38.473656210740124 ], "wc_weaknesses_avg": [ 215.66666666666666, 46.449494674921446 ], "wc_questions_avg": [ 69.0, 39.42080668885405 ], "wc_review_avg": [ 445.3333333333333, 8.9566858950296 ], "wc_reply_reviewers_avg": [ 415.3333333333333, 587.3700329056255 ], "wc_reply_authors_avg": [ 1062.0, 840.0908681009851 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5187570997639685783&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Alberta;University of Texas at Austin;Huawei;Google", "aff_unique_dep": ";;Huawei Technologies;Google Brain", "aff_unique_url": "https://www.ualberta.ca;https://www.utexas.edu;https://www.huawei.com;https://brain.google.com", "aff_unique_abbr": "UAlberta;UT Austin;Huawei;Google Brain", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Mountain View", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Canada;United States;China" }, { "title": "ToolChain*: Efficient Action Space Navigation in Large Language Models with A* Search", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19230", "id": "B6pQxqUcT8", "author_site": "Yuchen Zhuang, Xiang Chen, Tong Yu, Saayan Mitra, Victor Bursztyn, Ryan Rossi, Somdeb Sarkhel, Chao Zhang", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated powerful decision-making and planning capabilities in solving complicated real-world problems. LLM-based autonomous agents can interact with diverse tools (e.g., functional APIs) and generate solution plans that execute a series of API function calls in a step-by-step manner. The multitude of candidate API function calls significantly expands the action space, amplifying the critical need for efficient action space navigation. However, existing methods either struggle with unidirectional exploration in expansive action spaces, trapped into a locally optimal solution, or suffer from exhaustively traversing all potential actions, causing inefficient navigation. To address these issues, we propose ToolChain*, an efficient tree search-based planning algorithm for LLM-based agents. It formulates the entire action space as a decision tree, where each node represents a possible API function call involved in a solution plan. By incorporating the A$^*$ search algorithm with task-specific cost function design, it efficiently prunes high-cost branches that may involve incorrect actions, identifying the most low-cost valid path as the solution. Extensive experiments on multiple tool-use and reasoning tasks demonstrate that ToolChain* efficiently balances exploration and exploitation within an expansive action space. It outperforms state-of-the-art baselines on planning and reasoning tasks by 3.1% and 3.5% on average while requiring 7.35x and 2.31x less time, respectively.", "keywords": "Large Language Model;Tool Use;Tree Search;A* Search", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Yuchen Zhuang;Xiang Chen;Tong Yu;Saayan Mitra;Victor Bursztyn;Ryan A. Rossi;Somdeb Sarkhel;Chao Zhang", "authorids": "~Yuchen_Zhuang1;~Xiang_Chen9;~Tong_Yu3;~Saayan_Mitra1;~Victor_Bursztyn1;~Ryan_A._Rossi2;~Somdeb_Sarkhel2;~Chao_Zhang15", "gender": "M;M;;;M;;M;", "homepage": "https://night-chen.github.io/;;https://www.linkedin.com/in/tong-yu-42790744;;https://vbursztyn.github.io/;;http://www.utdallas.edu/~somdeb.sarkhel/;http://chaozhang.org/", "dblp": "191/5231.html;;32/1593-1;;154/7800.html;;138/5583;94/3019-14", "google_scholar": "T-f6XlEAAAAJ;aPq10m4AAAAJ;https://scholar.google.com/citations?hl=en;;HRx3epUAAAAJ;;ZrJWmUoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-5991-2050;;;;0009-0006-2055-9647;0000-0003-3009-598X", "linkedin": ";;tong-yu-42790744;;;;somdebsarkhel;", "or_profile": "~Yuchen_Zhuang1;~Xiang_Chen9;~Tong_Yu3;~Saayan_Mitra1;~Victor_Bursztyn1;~Ryan_A._Rossi2;~Somdeb_Sarkhel2;~Chao_Zhang15", "aff": "Georgia Institute of Technology;Adobe Systems;Adobe Research;;Adobe Systems;;Adobe Research;Georgia Institute of Technology", "aff_domain": "gatech.edu;adobe.com;adobe.com;;adobe.com;;adobe.com;gatech.edu", "position": "PhD student;Researcher;Senior Research Scientist;;Researcher;;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nzhuang2024toolchain,\ntitle={ToolChain*: Efficient Action Space Navigation in Large Language Models with A* Search},\nauthor={Yuchen Zhuang and Xiang Chen and Tong Yu and Saayan Mitra and Victor Bursztyn and Ryan A. Rossi and Somdeb Sarkhel and Chao Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=B6pQxqUcT8}\n}", "github": "", "project": "", "reviewers": "tHXf;7Kjr;52p8;gdT8", "pdf_size": 809133, "rating": "6;8;8;8", "confidence": "4;5;3;3", "soundness": "3;4;3;4", "contribution": "2;4;3;4", "presentation": "4;4;3;4", "wc_summary": "65;84;141;97", "wc_strengths": "27;124;101;58", "wc_weaknesses": "17;26;184;78", "wc_questions": "133;93;232;96", "wc_review": "242;327;658;329", "wc_reply_reviewers": "91;23;0;13", "wc_reply_authors": "680;471;2983;1632", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;6;4", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.75, 27.9676152004421 ], "wc_strengths_avg": [ 77.5, 37.566607512523674 ], "wc_weaknesses_avg": [ 76.25, 66.42429901775404 ], "wc_questions_avg": [ 138.5, 56.233886580957574 ], "wc_review_avg": [ 389.0, 159.2278242016765 ], "wc_reply_reviewers_avg": [ 31.75, 35.16656793035112 ], "wc_reply_authors_avg": [ 1441.5, 991.7490862108218 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1777782046065029064&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=B6pQxqUcT8", "pdf": "https://openreview.net/pdf?id=B6pQxqUcT8", "email": "gatech.edu;adobe.com;adobe.com;;adobe.com;;adobe.com;gatech.edu", "author_num": 8, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Georgia Institute of Technology;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.gatech.edu;https://www.adobe.com", "aff_unique_abbr": "Georgia Tech;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "B6t5wy6g5a", "title": "Aligning Large Multimodal Models with Factually Augmented RLHF", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large Multimodal Models (LMM) are built across modalities and the misalignment between two modalities can result in ``hallucination'', generating textual outputs that are not grounded by the multimodal information in context. To address the multimodal misalignment issue, we adapt the Reinforcement Learning from Human Feedback (RLHF) from the text domain to the vision-language alignment, where human annotators are asked to compare two responses and pinpoint the more hallucinated one, and the vision-language model is trained to maximize the simulated human rewards. We propose a new alignment algorithm called Factually Augmented RLHF that augments the reward model with additional factual information such as image captions and ground-truth multi-choice options, which alleviates the reward hacking phenomenon in RLHF and further improves the performance. We also enhance the GPT-4-generated training data (for vision instruction tuning) with previously available human-written image-text pairs to improve the general capabilities of our model. To evaluate the proposed approach in real-world scenarios, we develop a new evaluation benchmark MMHAL-BENCH with a special focus on penalizing hallucinations. As the first LMM trained with RLHF, our approach achieves remarkable improvement on the LLaVA-Bench\ndataset with the 96% performance level of the text-only GPT-4 (while previous best methods can only achieve the 87% level), and an improvement of 60% on MMHAL-BENCH over other baselines", "keywords": "AI Alignment;Large Multimodal Models;RLHF", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/6d4dd4d9d307ffd69f22ae293cbcf93dd5b96315.zip", "author": "Zhiqing Sun;Sheng Shen;Shengcao Cao;Haotian Liu;Chunyuan Li;Yikang Shen;Chuang Gan;Liangyan Gui;Yu-Xiong Wang;Yiming Yang;Kurt Keutzer;Trevor Darrell", "authorids": "~Zhiqing_Sun1;~Sheng_Shen2;~Shengcao_Cao1;~Haotian_Liu1;~Chunyuan_Li1;~Yikang_Shen1;~Chuang_Gan1;~Liangyan_Gui1;~Yu-Xiong_Wang1;~Yiming_Yang1;~Kurt_Keutzer1;~Trevor_Darrell2", "gender": "M;M;M;;;M;M;F;;F;M;M", "homepage": "https://www.cs.cmu.edu/~zhiqings/;https://sincerass.github.io;https://shengcao-cao.github.io/;https://hliu.cc;http://chunyuan.li/;;http://people.csail.mit.edu/ganchuang/;;https://yxw.cs.illinois.edu/;http://www.cs.cmu.edu/~yiming/;https://people.eecs.berkeley.edu/~keutzer/;https://people.eecs.berkeley.edu/~trevor/", "dblp": "211/7692;138/5764-1.html;236/4681;66/10511;64/9590;152/8226;139/6993;155/5055;35/10700;25/1666;k/KurtKeutzer.html;d/TrevorDarrell", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;yMYTz3AAAAAJ;Xo6wfnQAAAAJ;Zd7WmXUAAAAJ;qff5rRYAAAAJ;PTeSCbIAAAAJ;3aE0r9QAAAAJ;T_Q-xDkAAAAJ;MlZq4XwAAAAJ;ID9QePIAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";;;;;;;;;0000-0001-8322-607X;0000-0003-3868-8501;", "linkedin": "zhiqing-sun-5781b3100/;sheng-s-ab198a174/;;;;;;;;yiming-yang-24100924/;kurtkeutzer/;", "or_profile": "~Zhiqing_Sun1;~Sheng_Shen2;~Shengcao_Cao1;~Haotian_Liu1;~Chunyuan_Li1;~Yikang_Shen1;~Chuang_Gan1;~Liangyan_Gui1;~Yu-Xiong_Wang1;~Yiming_Yang1;~Kurt_Keutzer1;~trevor_darrell1", "aff": "Carnegie Mellon University;University of California, Berkeley;Adobe Systems;Department of Computer Science, University of Wisconsin - Madison;Microsoft Research;International Business Machines;University of Massachusetts at Amherst;UIUC;Department of Computer Science, University of Illinois Urbana-Champaign;School of Computer Science, Carnegie Mellon University;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_domain": "cs.cmu.edu;berkeley.edu;adobe.com;cs.wisc.edu;microsoft.com;ibm.com;umass.edu;cs.illinois.edu;cs.illinois.edu;cs.cmu.edu;berkeley.edu;eecs.berkeley.edu", "position": "PhD student;PhD student;Intern;PhD student;Principal Researcher;Researcher;Assistant Professor;Assistant Professor;Assistant Professor;Full Professor;Full Professor;Professor", "bibtex": "@misc{\nsun2024aligning,\ntitle={Aligning Large Multimodal Models with Factually Augmented {RLHF}},\nauthor={Zhiqing Sun and Sheng Shen and Shengcao Cao and Haotian Liu and Chunyuan Li and Yikang Shen and Chuang Gan and Liangyan Gui and Yu-Xiong Wang and Yiming Yang and Kurt Keutzer and Trevor Darrell},\nyear={2024},\nurl={https://openreview.net/forum?id=B6t5wy6g5a}\n}", "github": "", "project": "", "reviewers": "Kjps;Y7nK;XL3H;vv7w", "site": "https://openreview.net/forum?id=B6t5wy6g5a", "pdf_size": 4291360, "rating": "3;5;6;6", "confidence": "3;3;4;4", "soundness": "3;2;3;3", "contribution": "2;2;3;3", "presentation": "1;3;2;3", "wc_summary": "247;181;46;46", "wc_strengths": "45;49;80;55", "wc_weaknesses": "45;193;74;113", "wc_questions": "71;4;2;42", "wc_review": "408;427;202;256", "wc_reply_reviewers": "0;137;0;0", "wc_reply_authors": "266;1215;271;355", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 130.0, 87.18084652032235 ], "wc_strengths_avg": [ 57.25, 13.608361400256829 ], "wc_weaknesses_avg": [ 106.25, 55.59395200918891 ], "wc_questions_avg": [ 29.75, 28.656369274560934 ], "wc_review_avg": [ 323.25, 96.39858660789587 ], "wc_reply_reviewers_avg": [ 34.25, 59.322740159234044 ], "wc_reply_authors_avg": [ 526.75, 398.93130674340415 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 310, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17054470781093797244&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;2;3;4;5;6;7;7;0;1;8", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley;Adobe;University of Wisconsin-Madison;Microsoft;International Business Machines Corporation;University of Massachusetts Amherst;University of Illinois Urbana-Champaign;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";;Adobe Systems Incorporated;Department of Computer Science;Microsoft Research;;;;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu;https://www.adobe.com;https://www.wisc.edu;https://www.microsoft.com/en-us/research;https://www.ibm.com;https://www.umass.edu;https://www illinois.edu;", "aff_unique_abbr": "CMU;UC Berkeley;Adobe;UW-Madison;MSR;IBM;UMass Amherst;UIUC;", "aff_campus_unique_index": "1;2;3;4;4;5;1", "aff_campus_unique": ";Berkeley;Madison;Amherst;Urbana-Champaign;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "B8FA2ixkPN", "title": "GML-NeRF: Gate-guided Mutual Learning Framework for Neural Rendering", "track": "main", "status": "Reject", "tldr": "", "abstract": "Although the neural radiance field (NeRF) exhibits high-fidelity visualization on the rendering task, it still suffers from rendering defects in complex scenes. One of the primary reasons is the limited model capacity. However, directly increasing the network's width and depth cannot significantly improve the rendering quality. To address this issue, existing work adopts scene partitioning and assigns different 3D points to different network parameters. However, a 3D point may be invisible to some rays due to occlusions in complex scenes. On such a point, training with those rays that do not contain valid information about the point might interfere with the NeRF training. Based on the above intuition, we allocate model parameters in the ray dimension and propose a Gate-guided Mutual Learning framework for neural rendering (GML-NeRF). Specifically, we construct an ensemble of sub-NeRFs and train a soft gate module to assign the gating scores to these sub-NeRFs based on specific rays. The gate module is jointly optimized with the sub-NeRF ensemble, enabling it to learn the preference of sub-NeRFs for different rays automatically. Furthermore, we introduce depth-based mutual learning to enhance the rendering consistency among multiple sub-NeRFs and mitigate the depth ambiguity. Experiments on five diverse datasets demonstrate that GML-NeRF can enhance the rendering performance across a wide range of scene types compared with existing single-NeRF and multi-NeRF methods.", "keywords": "Neural rendering field;Mutual learning;Novel view synthesis;Soft gate module;Complex scenes with occlusions", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/b7bf1ebeba43dc63e6dbfdf5967837783eadc7d7.zip", "author": "Lidong Guo;Xuefei Ning;Yonggan Fu;Tianchen Zhao;Zhuoliang Kang;Jincheng Yu;Yingyan Celine Lin;Yu Wang", "authorids": "~Lidong_Guo1;~Xuefei_Ning1;~Yonggan_Fu1;~Tianchen_Zhao2;~Zhuoliang_Kang3;~Jincheng_Yu2;~Yingyan_Celine_Lin1;~Yu_Wang3", "gender": "M;Not Specified;M;M;M;M;M;F", "homepage": ";https://nics-effalg.com/ningxuefei/;https://www.yongganfu.com/;https://nicsefc.ee.tsinghua.edu.cn/people/tianchen-zhao/;https://zhuoliang.me/;http://nicsefc.ee.tsinghua.edu.cn/people/JinchengYu;https://nicsefc.ee.tsinghua.edu.cn;https://eiclab.scs.gatech.edu/", "dblp": "233/2101;202/9525;244/8166;217/2471;;;w/YuWang2.html;120/6981", "google_scholar": ";oVslpJsAAAAJ;https://scholar.google.com/citations?hl=en;;W1ZXjMkAAAAJ;1UDGpucAAAAJ;https://scholar.google.com.hk/citations?user=j8JGVvoAAAAJ;dio8IesAAAAJ", "orcid": "0000-0003-4162-6360;;;;;;0000-0001-6108-5157;", "linkedin": ";;yonggan-fu-b211831b0;;;;;yingyan-celine-lin-a281211a/", "or_profile": "~Lidong_Guo1;~Xuefei_Ning1;~Yonggan_Fu1;~Tianchen_Zhao2;~Zhuoliang_Kang3;~Jincheng_Yu2;~Yu_Wang3;~Yingyan_Lin1", "aff": "Tsinghua University;Tsinghua University;Georgia Institute of Technology;Infinigence;Meituan;;Tsinghua University;Georgia Institute of Technology", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;gatech.edu;infini-ai.com;meituan.com;;tsinghua.edu.cn;gatech.edu", "position": "PhD student;Research Assistant Professor;PhD student;Intern;Researcher;;Full Professor;Associate Professor", "bibtex": "@misc{\nguo2024gmlnerf,\ntitle={{GML}-Ne{RF}: Gate-guided Mutual Learning Framework for Neural Rendering},\nauthor={Lidong Guo and Xuefei Ning and Yonggan Fu and Tianchen Zhao and Zhuoliang Kang and Jincheng Yu and Yingyan Celine Lin and Yu Wang},\nyear={2024},\nurl={https://openreview.net/forum?id=B8FA2ixkPN}\n}", "github": "", "project": "", "reviewers": "Vfdc;3Q2w;PcY6;MyQ8", "site": "https://openreview.net/forum?id=B8FA2ixkPN", "pdf_size": 7938395, "rating": "5;5;5;5", "confidence": "4;2;3;5", "soundness": "2;2;3;2", "contribution": "2;3;2;2", "presentation": "3;2;2;3", "wc_summary": "109;51;109;93", "wc_strengths": "68;57;12;79", "wc_weaknesses": "124;425;331;329", "wc_questions": "111;6;72;7", "wc_review": "412;539;524;508", "wc_reply_reviewers": "0;62;0;0", "wc_reply_authors": "762;1385;1145;1027", "reply_reviewers": "0;1;0;0", "reply_authors": "3;5;4;4", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.5, 23.722352328552915 ], "wc_strengths_avg": [ 54.0, 25.465663156493687 ], "wc_weaknesses_avg": [ 302.25, 109.98039598037461 ], "wc_questions_avg": [ 49.0, 44.68221122549778 ], "wc_review_avg": [ 495.75, 49.58011194017214 ], "wc_reply_reviewers_avg": [ 15.5, 26.846787517317598 ], "wc_reply_authors_avg": [ 1079.75, 224.2670004704214 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kLGKA6GXkFwJ:scholar.google.com/&scioq=GML-NeRF:+Gate-guided+Mutual+Learning+Framework+for+Neural+Rendering&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;3;0;1", "aff_unique_norm": "Tsinghua University;Georgia Institute of Technology;Infinigence;Meituan", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.gatech.edu;;https://www.meituan.com", "aff_unique_abbr": "THU;Georgia Tech;;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1", "aff_country_unique": "China;United States;" }, { "title": "Compressing LLMs: The Truth is Rarely Pure and Never Simple", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19229", "id": "B9klVS7Ddk", "author_site": "AJAY JAISWAL, Zhe Gan, Xianzhi Du, Bowen Zhang, Zhangyang Wang, Yinfei Yang", "tldr": "", "abstract": "Despite their remarkable achievements, modern Large Language Models (LLMs) encounter exorbitant computational and memory footprints. Recently, several works have shown significant success in *training-free* and *data-free* compression (pruning and quantization) of LLMs achieving 50-60\\% sparsity and reducing the bit-width down to 3 or 4 bits per weight, with negligible perplexity degradation over the uncompressed baseline. As recent research efforts are focused on developing increasingly sophisticated compression methods, our work takes a step back, and re-evaluates the effectiveness of existing SoTA compression methods, which rely on a fairly simple and widely questioned metric, perplexity (even for dense LLMs). We introduce **K**nowledge-**I**ntensive **C**ompressed LLM Benchmar**K** **(LLM-KICK)**, a collection of carefully-curated tasks to re-define the evaluation protocol for compressed LLMs, which have significant alignment with their dense counterparts, and perplexity fail to capture subtle change in their true capabilities. LLM-KICK unveils many favorable merits and unfortunate plights of current SoTA compression methods: all pruning methods suffer significant performance degradation, sometimes at trivial sparsity ratios (*e.g.*, 25-30\\%), and fail for N:M sparsity on knowledge-intensive tasks; current quantization methods are more successful than pruning; yet, pruned LLMs even at $\\geq 50$\\% sparsity are robust in-context retrieval and summarization systems; among others. LLM-KICK is designed to holistically access compressed LLMs' ability for language understanding, reasoning, generation, in-context retrieval, in-context summarization, *etc.* We hope our study can foster the development of better LLM compression methods. The reproduced codes are available at https://github.com/VITA-Group/llm-kick.", "keywords": "Compression;Large Language Models;Pruning;Quantization", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "AJAY KUMAR JAISWAL;Zhe Gan;Xianzhi Du;Bowen Zhang;Zhangyang Wang;Yinfei Yang", "authorids": "~AJAY_KUMAR_JAISWAL1;~Zhe_Gan1;~Xianzhi_Du4;~Bowen_Zhang2;~Zhangyang_Wang1;~Yinfei_Yang1", "gender": "M;M;M;M;M;", "homepage": "https://ajay1994.github.io/;http://zhegan27.github.io/;;https://zbwglory.github.io;https://vita-group.github.io;", "dblp": "30/9707;41/7845;;85/7433-2;119/4026;117/4082", "google_scholar": "I783HxYAAAAJ;E64XWyMAAAAJ;l1hP40AAAAAJ;nI3cKV8AAAAJ;pxFyKAIAAAAJ;kvDbu90AAAAJ", "orcid": ";;;;;", "linkedin": ";zhe-gan-a2229a78/;xianzhi-du-1b128934/;;;", "or_profile": "~AJAY_KUMAR_JAISWAL1;~Zhe_Gan1;~Xianzhi_Du4;~Bowen_Zhang2;~Zhangyang_Wang1;~Yinfei_Yang1", "aff": "University of Texas, Austin;Apple;Apple;Apple;University of Texas at Austin;Apple", "aff_domain": "utexas.edu;apple.com;apple.com;apple.com;utexas.edu;apple.com", "position": "PhD student;Principal Researcher;Researcher;Research Scientist;Associate Professor;Researcher", "bibtex": "@inproceedings{\njaiswal2024compressing,\ntitle={Compressing {LLM}s: The Truth is Rarely Pure and Never Simple},\nauthor={AJAY KUMAR JAISWAL and Zhe Gan and Xianzhi Du and Bowen Zhang and Zhangyang Wang and Yinfei Yang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=B9klVS7Ddk}\n}", "github": "", "project": "", "reviewers": "PUep;X7p4;b73d;QryF", "pdf_size": 1165142, "rating": "5;6;8;8", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "contribution": "3;2;3;3", "presentation": "3;4;3;3", "wc_summary": "16;27;100;54", "wc_strengths": "25;89;71;61", "wc_weaknesses": "119;68;83;13", "wc_questions": "10;37;22;1", "wc_review": "170;221;276;129", "wc_reply_reviewers": "81;0;0;0", "wc_reply_authors": "873;770;504;119", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 49.25, 32.39888115352134 ], "wc_strengths_avg": [ 61.5, 23.339880033967614 ], "wc_weaknesses_avg": [ 70.75, 38.14691992808856 ], "wc_questions_avg": [ 17.5, 13.5 ], "wc_review_avg": [ 199.0, 55.122590650295095 ], "wc_reply_reviewers_avg": [ 20.25, 35.074028853269766 ], "wc_reply_authors_avg": [ 566.5, 291.34043660295424 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15743144799258029889&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=B9klVS7Ddk", "pdf": "https://openreview.net/pdf?id=B9klVS7Ddk", "email": "utexas.edu;apple.com;apple.com;apple.com;utexas.edu;apple.com", "author_num": 6, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "University of Texas at Austin;Apple", "aff_unique_dep": ";Apple Inc.", "aff_unique_url": "https://www.utexas.edu;https://www.apple.com", "aff_unique_abbr": "UT Austin;Apple", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "BAX3NXJ6vU", "title": "Escaping Saddle Point Efficiently in Minimax and Bilevel Optimizations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Hierarchical optimization (including minimax optimization and bilevel optimization) is attracting significant attentions as it can be broadly applied to many machine learning tasks such as adversarial training, policy optimization, meta-learning and hyperparameter optimization. Recently, many algorithms have been studied to improve the theoretical analysis results of minimax and bilevel optimizations. Among these works, one of the most crucial issues is to escape saddle point and find local minimum, which is also of importance in conventional nonconvex optimization. In this paper, thus, we focus on investigating the methods to achieve second-order stationary point for nonconvex-strongly-concave minimax optimization and nonconvex-strongly-convex bilevel optimization. Specifically, we propose a new algorithm named PRGDA via perturbed stochastic gradient which does not require the computation of second order derivatives. In stochastic nonconvex-strongly-concave minimax optimization, we prove that our algorithm can find an $O(\\epsilon, \\sqrt{\\rho_{\\Phi} \\epsilon})$ second-order stationary point within gradient complexity of $\\tilde{O} (\\kappa^3 \\epsilon^{-3})$, which matches state-of-the-art to find first-order stationary point. To our best knowledge, our algorithm is the first stochastic algorithm that is guaranteed to obtain the second-order stationary point for nonconvex minimax problems. Besides, in stochastic nonconvex-strongly-convex bilevel optimization, our method also achieves better gradient complexity of $Gc(f, \\epsilon) = \\tilde{O}(\\kappa^3 \\epsilon^{-3})$ and $Gc(g, \\epsilon) = \\tilde{O}(\\kappa^7 \\epsilon^{-3})$ to find local minimum. Finally, we conduct a numerical experiment to validate the performance of our new method.", "keywords": "saddle point;minimax optimization;bilevel optimization", "primary_area": "optimization", "supplementary_material": "/attachment/ff6c97761f73a1c540d5d504c7604545bc0b51b1.zip", "author": "Wenhan Xian;Feihu Huang;Heng Huang", "authorids": "~Wenhan_Xian1;~Feihu_Huang1;~Heng_Huang1", "gender": "M;M;M", "homepage": ";;https://www.cs.umd.edu/~heng/", "dblp": "246/3134;169/6247;03/281", "google_scholar": ";tRQwlHUAAAAJ;4OqLaDwAAAAJ", "orcid": ";0000-0003-0806-6074;", "linkedin": "wenhan-xian-3392ba170;;", "or_profile": "~Wenhan_Xian1;~Feihu_Huang1;~Heng_Huang1", "aff": "University of Maryland, College Park;Nanjing University of Aeronautics and Astronautics;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umd.edu;nuaa.edu.cn;cs.umd.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nxian2024escaping,\ntitle={Escaping Saddle Point Efficiently in Minimax and Bilevel Optimizations},\nauthor={Wenhan Xian and Feihu Huang and Heng Huang},\nyear={2024},\nurl={https://openreview.net/forum?id=BAX3NXJ6vU}\n}", "github": "", "project": "", "reviewers": "tUHe;JNob;5LK2", "site": "https://openreview.net/forum?id=BAX3NXJ6vU", "pdf_size": 1039506, "rating": "5;5;6", "confidence": "3;3;3", "soundness": "3;2;3", "contribution": "2;2;3", "presentation": "3;1;2", "wc_summary": "112;51;57", "wc_strengths": "46;20;49", "wc_weaknesses": "475;409;167", "wc_questions": "9;5;246", "wc_review": "642;485;519", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "553;411;197", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 73.33333333333333, 27.450966386551052 ], "wc_strengths_avg": [ 38.333333333333336, 13.021349989749739 ], "wc_weaknesses_avg": [ 350.3333333333333, 132.4067806252971 ], "wc_questions_avg": [ 86.66666666666667, 112.67751427069298 ], "wc_review_avg": [ 548.6666666666666, 67.44050876307371 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 387.0, 146.3238417574753 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GrriULR_qzMJ:scholar.google.com/&scioq=Escaping+Saddle+Point+Efficiently+in+Minimax+and+Bilevel+Optimizations&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Maryland;Nanjing University of Aeronautics and Astronautics;University of Maryland, College Park", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www/umd.edu;http://www.nuaa.edu.cn;https://www/umd.edu", "aff_unique_abbr": "UMD;NUAA;UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "BBD4cFDKxQ", "title": "AdaProj: Adaptively Scaled Angular Margin Subspace Projections for Anomaly Detection with Auxiliary Classification Tasks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "One of the state-of-the-art approaches for semi-supervised anomaly detection is to first learn an embedding space and then estimate the distribution of normal data. This can be done by using one-class losses or by using auxiliary classification tasks based on meta information or self-supervised learning. Angular margin losses are a popular training objective because they increase intra-class similarity and avoid learning trivial solutions by reducing inter-class similarity. In this work, AdaProj a novel loss function that generalizes upon angular margin losses is presented. In contrast to angular margin losses, which project data of each class as close as possible to their corresponding class centers, AdaProj learns to project data onto class-specific subspaces. By doing so, the resulting distributions of embeddings belonging to normal data are not required to be as restrictive as other loss functions allowing a more detailed view on the data. This enables a system to more accurately detect anomalous samples during testing. In experiments conducted on the DCASE2022 and DCASE2023 datasets, it is shown that using AdaProj to learn an embedding space significantly outperforms other commonly used loss functions achieving a new state-of-the-art performance on the DCASE2023 dataset.", "keywords": "representation learning;anomaly detection;semi-supervised learning;angular margin loss", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Kevin Wilkinghoff", "authorids": "~Kevin_Wilkinghoff1", "gender": "M", "homepage": "https://wilkinghoff.com/", "dblp": "207/9559", "google_scholar": "https://scholar.google.de/citations?user=UKE_q8wAAAAJ", "orcid": "0000-0003-4200-9129", "linkedin": "kevin-wilkinghoff/", "or_profile": "~Kevin_Wilkinghoff1", "aff": "Fraunhofer FKIE", "aff_domain": "fkie.fraunhofer.de", "position": "Researcher", "bibtex": "@misc{\nwilkinghoff2024adaproj,\ntitle={AdaProj: Adaptively Scaled Angular Margin Subspace Projections for Anomaly Detection with Auxiliary Classification Tasks},\nauthor={Kevin Wilkinghoff},\nyear={2024},\nurl={https://openreview.net/forum?id=BBD4cFDKxQ}\n}", "github": "", "project": "", "reviewers": "3pzd;RkC3;PZSP;t49Z", "site": "https://openreview.net/forum?id=BBD4cFDKxQ", "pdf_size": 214829, "rating": "3;3;5;6", "confidence": "4;4;3;4", "soundness": "2;2;2;2", "contribution": "1;2;2;2", "presentation": "2;1;2;2", "wc_summary": "118;53;42;137", "wc_strengths": "49;19;32;86", "wc_weaknesses": "166;203;102;102", "wc_questions": "32;2;7;2", "wc_review": "365;277;183;327", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.5, 40.74616546375867 ], "wc_strengths_avg": [ 46.5, 25.16445906432324 ], "wc_weaknesses_avg": [ 143.25, 43.27455950093542 ], "wc_questions_avg": [ 10.75, 12.43734296383275 ], "wc_review_avg": [ 288.0, 68.18357573492314 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9065947214425446785&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Fraunhofer Institute for Communication, Information Processing and Ergonomics", "aff_unique_dep": "", "aff_unique_url": "https://www.fkie.fraunhofer.de/", "aff_unique_abbr": "FKIE", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Hybrid Directional Graph Neural Network for Molecules", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19228", "id": "BBD6KXIGJL", "author_site": "Junyi An, Chao Qu, Zhipeng Zhou, Fenglei Cao, Xu Yinghui, Yuan Qi, Furao Shen", "tldr": "", "abstract": "Equivariant message passing neural networks have emerged as the prevailing approach for predicting chemical properties of molecules due to their ability to leverage translation and rotation symmetries, resulting in a strong inductive bias. However, the equivariant operations in each layer can impose excessive constraints on the function form and network flexibility. To address these challenges, we introduce a novel network called the Hybrid Directional Graph Neural Network (HDGNN), which effectively combines strictly equivariant operations with learnable modules. We evaluate the performance of HDGNN on the QM9 dataset and the IS2RE dataset of OC20, demonstrating its state-of-the-art performance on several tasks and competitive performance on others. Our code is anonymously released on https://github.com/ajy112/HDGNN.", "keywords": "Graph Neural Networks; Equivariance; Molecular model", "primary_area": "learning on graphs and other geometries & topologies", "supplementary_material": "/attachment/bc98d38347f0c395dece9bd36609fe3ae80bf6a0.pdf", "author": "Junyi An;Chao Qu;Zhipeng Zhou;Fenglei Cao;Xu Yinghui;Yuan Qi;Furao Shen", "authorids": "~Junyi_An1;~Chao_Qu3;~Zhipeng_Zhou3;~Fenglei_Cao1;~Xu_Yinghui3;~Yuan_Qi2;~Furao_Shen1", "gender": "M;M;M;M;M;M;M", "homepage": ";;;https://www.infotech.ai;;https://cs.nju.edu.cn/58/05/c2639a153605/page.htm;https://www.google.com.hk/webhp?hl=en&sa=X&ved=0ahUKEwjBm6iTvd7tAhUhqlkKHT10DS0QPAgI", "dblp": "254/1449.html;;;;;80/4685;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;DI2NyPsAAAAJ;Ot0PPAcAAAAJ;;;https://scholar.google.com.tw/citations?user=bjSi-dIAAAAJ;https://scholar.google.com.hk/citations?user=CTdXJYwAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;yuan-alan-qi-30ba1b4/;;", "or_profile": "~Junyi_An1;~Chao_Qu3;~Zhipeng_Zhou3;~Fenglei_Cao1;~Yuan_Qi2;~Shen_Furao1;~Xu_Yinghui2", "aff": "Nanjing University;Inftech;INF (Shanghai) Technology Co., LTD;;Fudan University;Nanjing University;Fudan University", "aff_domain": "nju.edu.cn;inftech.ai;inftech.ai;;fudan.edu.cn;nju.edu.cn;fudan.edu.cn", "position": "PhD student;Researcher;Researcher;;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nan2024hybrid,\ntitle={Hybrid Directional Graph Neural Network for Molecules},\nauthor={Junyi An and Chao Qu and Zhipeng Zhou and Fenglei Cao and Xu Yinghui and Yuan Qi and Furao Shen},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BBD6KXIGJL}\n}", "github": "", "project": "", "reviewers": "Liwe;yQB9;rs3C", "pdf_size": 578867, "rating": "6;8;8", "confidence": "2;2;4", "soundness": "2;3;2", "contribution": "2;3;3", "presentation": "3;3;3", "wc_summary": "63;39;177", "wc_strengths": "18;14;45", "wc_weaknesses": "42;1;1569", "wc_questions": "58;1;22", "wc_review": "181;55;1813", "wc_reply_reviewers": "0;0;1424", "wc_reply_authors": "635;23;4491", "reply_reviewers": "0;0;6", "reply_authors": "1;1;9", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.0, 60.199667773169644 ], "wc_strengths_avg": [ 25.666666666666668, 13.767917618708921 ], "wc_weaknesses_avg": [ 537.3333333333334, 729.6904975551088 ], "wc_questions_avg": [ 27.0, 23.53720459187964 ], "wc_review_avg": [ 683.0, 800.6847069852153 ], "wc_reply_reviewers_avg": [ 474.6666666666667, 671.2800376064291 ], "wc_reply_authors_avg": [ 1716.3333333333333, 1977.8300117946324 ], "reply_reviewers_avg": [ 2.0, 2.8284271247461903 ], "reply_authors_avg": [ 3.6666666666666665, 3.7712361663282534 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14475517015581943774&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 2, "openreview": "https://openreview.net/forum?id=BBD6KXIGJL", "pdf": "https://openreview.net/pdf?id=BBD6KXIGJL", "email": "nju.edu.cn;inftech.ai;inftech.ai;;fudan.edu.cn;nju.edu.cn;fudan.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;0;3", "aff_unique_norm": "Nanjing University;Inftech;INF Technology Co., LTD;Fudan University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nju.edu.cn;;;https://www.fudan.edu.cn", "aff_unique_abbr": "Nanjing U;;;Fudan", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "BC4AUywMow", "title": "Zero-Level-Set Encoder for Neural Distance Fields", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Neural shape representation generally refers to representing 3D geometry using neural networks, e.g., to compute a signed distance or occupancy value at a specific spatial position. Previous methods tend to rely on the auto-decoder paradigm, which often requires densely-sampled and accurate signed distances to be known during training and testing, as well as an additional optimization loop during inference. This introduces a lot of computational overhead, in addition to having to compute signed distances analytically, even during testing. In this paper, we present a novel encoder-decoder neural network for embedding 3D shapes in a single forward pass. Our architecture is based on a multi-scale hybrid system incorporating graph-based and voxel-based components, as well as a continuously differentiable decoder. Furthermore, the network is trained to solve the Eikonal equation and only requires knowledge of the zero-level set for training and inference. Additional volumetric samples can be generated on-the-fly, and incorporated in an unsupervised manner. This means that in contrast to most previous work, our network is able to output valid signed distance fields without explicit prior knowledge of non-zero distance values or shape occupancy. In other words, our network computes approximate solutions to the boundary-valued Eikonal equation. It also requires only a single forward pass during inference, instead of the common latent code optimization. We further propose a modification of the loss function in case that surface normals are not well defined, e.g., in the context of non-watertight surface-meshes and non-manifold geometry. Overall, this can help reduce the computational overhead of training and evaluating neural distance fields, as well as enabling the application to difficult shapes. We finally demonstrate the efficacy, generalizability and scalability of our method on datasets consisting of deforming 3D shapes, single class encoding and multiclass encoding, showcasing a wide range of possible applications.", "keywords": "neural shape representation;neural distance fields;eikonal equation;surface mesh;encoder-decoder", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/1facf82bd05b0f7cd3809292f07b333b2e390880.zip", "author": "Stefan Rhys Jeske;Jonathan Klein;Dominik Michels;Jan Bender", "authorids": "~Stefan_Rhys_Jeske1;~Jonathan_Klein2;~Dominik_Michels1;~Jan_Bender1", "gender": "M;M;M;M", "homepage": "https://srjeske.de;https://jonathank.de/research/;https://www.kaust.edu.sa/en/study/faculty/dominik-michels;https://www.animation.rwth-aachen.de/", "dblp": ";22/5462;131/3147;63/2863.html", "google_scholar": ";wzejV1EAAAAJ;;https://scholar.google.com.tw/citations?user=POEoFagAAAAJ", "orcid": "0000-0003-3920-7765;0000-0001-6560-0988;;", "linkedin": ";;;", "or_profile": "~Stefan_Rhys_Jeske1;~Jonathan_Klein2;~Dominik_Michels1;~Jan_Bender1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;King Abdullah University of Science and Technology;KAUST;RWTH Aachen University", "aff_domain": "rwth-aachen.de;kaust.edu.sa;kaust.edu.sa;rwth-aachen.de", "position": "PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@misc{\njeske2024zerolevelset,\ntitle={Zero-Level-Set Encoder for Neural Distance Fields},\nauthor={Stefan Rhys Jeske and Jonathan Klein and Dominik Michels and Jan Bender},\nyear={2024},\nurl={https://openreview.net/forum?id=BC4AUywMow}\n}", "github": "", "project": "", "reviewers": "ysCV;Msao;ruBo", "site": "https://openreview.net/forum?id=BC4AUywMow", "pdf_size": 22366387, "rating": "3;5;5", "confidence": "5;4;4", "soundness": "3;2;3", "contribution": "1;2;2", "presentation": "3;3;3", "wc_summary": "49;176;79", "wc_strengths": "106;96;77", "wc_weaknesses": "297;469;112", "wc_questions": "45;96;84", "wc_review": "497;837;352", "wc_reply_reviewers": "0;165;35", "wc_reply_authors": "119;1339;44", "reply_reviewers": "0;2;1", "reply_authors": "1;3;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.33333333333333, 54.19922098661157 ], "wc_strengths_avg": [ 93.0, 12.027745701779143 ], "wc_weaknesses_avg": [ 292.6666666666667, 145.7768462029855 ], "wc_questions_avg": [ 75.0, 21.77154105707724 ], "wc_review_avg": [ 562.0, 203.26501584548842 ], "wc_reply_reviewers_avg": [ 66.66666666666667, 70.98513146348951 ], "wc_reply_authors_avg": [ 500.6666666666667, 593.58140881339 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "RWTH Aachen University;King Abdullah University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.rwth-aachen.de;https://www.kast.kau.edu.sa", "aff_unique_abbr": "RWTH;KAUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Aachen;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Germany;Saudi Arabia" }, { "id": "BCRZq5nNZu", "title": "Chunking: Forgetting Matters in Continual Learning even without Changing Tasks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Work on continual learning (CL) has largely focused on the problems arising from the dynamically-changing data distribution. However, CL can be decomposed into two sub-problems: (a) shifts in the data distribution, and (b) dealing with the fact that the data is split into chunks and so only a part of the data is available to be trained on at any point in time. In this work, we look at the latter sub-problem---the chunking of data---and note that previous analysis of chunking in the CL literature is sparse. We show that chunking is an important part of CL, accounting for around half of the performance drop from offline learning in our experiments. Furthermore, our results reveal that current CL algorithms do not address the chunking sub-problem, only performing as well as plain SGD training when there is no shift in the data distribution. We analyse why performance drops when learning occurs on chunks of data, and find that forgetting, which is often seen to be a problem due to distribution shift, still arises and is a significant problem. Motivated by an analysis of the linear case, we show that per-chunk weight averaging improves performance in the chunking setting and that this performance transfers to the full CL setting. Hence, we argue that work on chunking can help advance CL in general.", "keywords": "Continual Learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "/attachment/c4eb9b6dd0ad9985103367e42348e5ecaebbd7fe.zip", "author": "Thomas L Lee;Amos Storkey", "authorids": "~Thomas_L_Lee1;~Amos_Storkey1", "gender": ";Not Specified", "homepage": "https://tlee43.github.io/;http://homepages.inf.ed.ac.uk/amos/", "dblp": ";", "google_scholar": "pRcPv_cAAAAJ;", "orcid": ";", "linkedin": "thomas-lee-aa27a9176/;", "or_profile": "~Thomas_L_Lee1;~Amos_Storkey1", "aff": "Huawei Technologies Ltd.;University of Edinburgh", "aff_domain": "huawei.com;ed.ac.uk", "position": "Intern;Full Professor", "bibtex": "@misc{\nlee2024chunking,\ntitle={Chunking: Forgetting Matters in Continual Learning even without Changing Tasks},\nauthor={Thomas L Lee and Amos Storkey},\nyear={2024},\nurl={https://openreview.net/forum?id=BCRZq5nNZu}\n}", "github": "", "project": "", "reviewers": "BU9q;4UBT;w2jf;T7s5", "site": "https://openreview.net/forum?id=BCRZq5nNZu", "pdf_size": 845347, "rating": "3;3;3;5", "confidence": "5;4;4;4", "soundness": "2;2;2;2", "contribution": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "104;37;94;117", "wc_strengths": "100;1;148;105", "wc_weaknesses": "556;550;326;503", "wc_questions": "10;4;223;29", "wc_review": "770;592;791;754", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "745;947;805;725", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.0, 30.553232234904378 ], "wc_strengths_avg": [ 88.5, 53.85396921304873 ], "wc_weaknesses_avg": [ 483.75, 93.3605243130093 ], "wc_questions_avg": [ 66.5, 90.82538191496913 ], "wc_review_avg": [ 726.75, 78.89668877715971 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 805.5, 86.83749190297932 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3591496541474882982&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Huawei;University of Edinburgh", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.ed.ac.uk", "aff_unique_abbr": "Huawei;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "BCe9ut1s7i", "title": "On the Importance of Backbone to the Adversarial Robustness of Object Detectors", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Object detection is a critical component of various security-sensitive applications, such as autonomous driving and video surveillance. However, existing object detectors are vulnerable to adversarial attacks, which poses a significant challenge to their reliability and safety.\nThrough experiments, first, we found that existing works on improving the adversarial robustness of object detectors give a false sense of security. Second, we found that using adversarially pre-trained backbone networks was essential for enhancing the adversarial robustness of object detectors. We then proposed a simple yet effective recipe for fast adversarial fine-tuning on object detectors with adversarially pre-trained backbones. Without any modifications to the structure of object detectors, our recipe achieved significantly better adversarial robustness than previous works. Finally, we explored the potential of different modern object detectors to improve adversarial robustness using our recipe and demonstrated interesting findings, which inspired us to design several state-of-the-art (SOTA) robust detectors with faster inference speed. Our empirical results set a new milestone for adversarially robust object detection. Code and trained checkpoints will be publicly available.", "keywords": "adversarial robustness;object detection", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Xiao Li;Hang Chen;Xiaolin Hu", "authorids": "~Xiao_Li16;~Hang_Chen4;~Xiaolin_Hu1", "gender": "M;M;M", "homepage": ";;http://www.xlhu.cn/", "dblp": "66/2069-28;;60/6028-1", "google_scholar": "Is24dqwAAAAJ;WZbgD9oAAAAJ;PksdgoUAAAAJ", "orcid": "0000-0001-8992-4944;;0000-0002-4907-7354", "linkedin": ";;", "or_profile": "~Xiao_Li16;~Hang_Chen4;~Xiaolin_Hu1", "aff": "Computer Science, Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nli2024on,\ntitle={On the Importance of Backbone to the Adversarial Robustness of Object Detectors},\nauthor={Xiao Li and Hang Chen and Xiaolin Hu},\nyear={2024},\nurl={https://openreview.net/forum?id=BCe9ut1s7i}\n}", "github": "", "project": "", "reviewers": "xgd4;fyi7;wtFb", "site": "https://openreview.net/forum?id=BCe9ut1s7i", "pdf_size": 1449805, "rating": "3;5;5", "confidence": "5;3;5", "soundness": "2;3;3", "contribution": "1;3;2", "presentation": "2;3;4", "wc_summary": "36;73;59", "wc_strengths": "55;23;74", "wc_weaknesses": "207;233;299", "wc_questions": "105;16;6", "wc_review": "403;345;438", "wc_reply_reviewers": "175;0;0", "wc_reply_authors": "1109;896;1127", "reply_reviewers": "1;0;0", "reply_authors": "2;2;2", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 56.0, 15.253414918196734 ], "wc_strengths_avg": [ 50.666666666666664, 21.044925490219462 ], "wc_weaknesses_avg": [ 246.33333333333334, 38.72409528388695 ], "wc_questions_avg": [ 42.333333333333336, 44.4996878890428 ], "wc_review_avg": [ 395.3333333333333, 38.35216928530756 ], "wc_reply_reviewers_avg": [ 58.333333333333336, 82.49579113843053 ], "wc_reply_authors_avg": [ 1044.0, 104.90948479522717 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12470143202775810721&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Computer Science", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "BCocsAF7MY", "title": "Fine-tune Language Models to Approximate Unbiased In-context Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "In-context learning (ICL) is an astonishing emergent ability of large language models (LLMs). By presenting a prompt that includes multiple input-output pairs as examples and introducing a new query input, models can generate the corresponding output. However, the performance of models heavily relies on the quality of the input prompt when implementing in-context learning. Biased or imbalanced input prompts can significantly degrade the performance of language models. To address this issue, we introduce a reweighted algorithm called RICL (Reweighted In-context Learning). This algorithm fine-tunes language models using an unbiased validation set to determine the optimal weight for each input-output example to approximate unbiased in-context learning. Furthermore, we also introduce a low-cost reweighted algorithm, a linear optimal weight approximation algorithm called LARICL (Linear Approximation of Reweighted In-context Learning). This algorithm requires minimal training cost while providing effective results. We prove the convergence of our algorithm and validate its performance through experiments conducted on a numerical dataset. The experimental findings reveal a substantial improvement in comparison to benchmarks including the performance of casual prompt-based in-context learning and the performance of a classic fine-tuning method.", "keywords": "fine-tune;in-context learning", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/3ab7a88a102f875d28b3b9f90cf192215c15dd4c.pdf", "author": "Timothy Zer-An Chu;Zhao Song;Chiwun Yang", "authorids": "~Timothy_Zer-An_Chu1;~Zhao_Song3;~Chiwun_Yang1", "gender": "M;M;M", "homepage": ";https://www.youtube.com/@zhaosong2031;https://christianyang37.github.io/", "dblp": ";76/4051-2;355/2807", "google_scholar": "https://scholar.google.com/citations?hl=en;yDZct7UAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Timothy_Zer-An_Chu1;~Zhao_Song3;~Chiwun_Yang1", "aff": "AAAS;Adobe;SUN YAT-SEN UNIVERSITY", "aff_domain": "aaas.org;adobe.com;sysu.edu.cn", "position": "Researcher;Researcher;Undergrad student", "bibtex": "@misc{\nchu2024finetune,\ntitle={Fine-tune Language Models to Approximate Unbiased In-context Learning},\nauthor={Timothy Zer-An Chu and Zhao Song and Chiwun Yang},\nyear={2024},\nurl={https://openreview.net/forum?id=BCocsAF7MY}\n}", "github": "", "project": "", "reviewers": "NqVg;A8do;sUUz", "site": "https://openreview.net/forum?id=BCocsAF7MY", "pdf_size": 379670, "rating": "3;3;5", "confidence": "3;3;3", "soundness": "2;2;3", "contribution": "2;2;3", "presentation": "2;1;3", "wc_summary": "68;40;58", "wc_strengths": "68;77;59", "wc_weaknesses": "145;220;133", "wc_questions": "75;2;53", "wc_review": "356;339;303", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 55.333333333333336, 11.585431464655176 ], "wc_strengths_avg": [ 68.0, 7.3484692283495345 ], "wc_weaknesses_avg": [ 166.0, 38.49675310984031 ], "wc_questions_avg": [ 43.333333333333336, 30.575952787916336 ], "wc_review_avg": [ 332.6666666666667, 22.095751225568733 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5893653897567298517&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "American Association for the Advancement of Science;Adobe;Sun Yat-sen University", "aff_unique_dep": ";Adobe Inc.;", "aff_unique_url": "https://www.aaas.org;https://www.adobe.com;http://www.sysu.edu.cn", "aff_unique_abbr": "AAAS;Adobe;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "title": "A Unified and General Framework for Continual Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19227", "id": "BE5aK0ETbp", "author_site": "Zhenyi Wang, Yan Li, Li Shen, Heng Huang", "tldr": "", "abstract": "Continual Learning (CL) focuses on learning from dynamic and changing data distributions while retaining previously acquired knowledge. Various methods have been developed to address the challenge of catastrophic forgetting, including regularization-based, Bayesian-based, and memory-replay-based techniques. However, these methods lack a unified framework and common terminology for describing their approaches. This research aims to bridge this gap by introducing a comprehensive and overarching framework that encompasses and reconciles these existing methodologies. Notably, this new framework is capable of encompassing established CL approaches as special instances within a unified and general optimization objective.\nAn intriguing finding is that despite their diverse origins, these methods share common mathematical structures. This observation highlights the compatibility of these seemingly distinct techniques, revealing their interconnectedness through a shared underlying optimization objective. Moreover, the proposed general framework introduces an innovative concept called *refresh learning*, specifically designed to enhance the CL performance. This novel approach draws inspiration from neuroscience, where the human brain often sheds outdated information to improve the retention of crucial knowledge and facilitate the acquisition of new information. In essence, *refresh learning* operates by initially unlearning current data and subsequently relearning it. It serves as a versatile plug-in that seamlessly integrates with existing CL methods, offering an adaptable and effective enhancement to the learning process. Extensive experiments on CL benchmarks and theoretical analysis demonstrate the effectiveness of the proposed *refresh learning*.", "keywords": "Continual Learning", "primary_area": "transfer learning, meta learning, and lifelong learning", "supplementary_material": "", "author": "Zhenyi Wang;Yan Li;Li Shen;Heng Huang", "authorids": "~Zhenyi_Wang1;~Yan_Li18;~Li_Shen1;~Heng_Huang1", "gender": ";;M;M", "homepage": ";;https://sites.google.com/site/mathshenli/home;https://www.cs.umd.edu/~heng/", "dblp": ";;91/3680-8;03/281", "google_scholar": ";;yVhgENIAAAAJ;4OqLaDwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zhenyi_Wang1;~Yan_Li18;~Li_Shen1;~Heng_Huang1", "aff": ";;JD Explore Academy;Department of Computer Science, University of Maryland, College Park", "aff_domain": ";;jd.com;cs.umd.edu", "position": ";;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2024a,\ntitle={A Unified and General Framework for Continual Learning},\nauthor={Zhenyi Wang and Yan Li and Li Shen and Heng Huang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BE5aK0ETbp}\n}", "github": "", "project": "", "reviewers": "tVq3;KmRA;Dw6m;ngze", "pdf_size": 389005, "rating": "3;6;6;6", "confidence": "4;3;2;3", "soundness": "1;2;3;3", "contribution": "2;3;3;2", "presentation": "2;3;3;2", "wc_summary": "43;201;45;81", "wc_strengths": "1;109;45;51", "wc_weaknesses": "187;231;70;208", "wc_questions": "5;55;46;49", "wc_review": "236;596;206;389", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1170;1094;601;1720", "reply_reviewers": "0;0;0;0", "reply_authors": "4;3;2;4", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 92.5, 64.44183423832689 ], "wc_strengths_avg": [ 51.5, 38.40247387864485 ], "wc_weaknesses_avg": [ 174.0, 62.028219384406 ], "wc_questions_avg": [ 38.75, 19.753164303473 ], "wc_review_avg": [ 356.75, 154.5855345755223 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1146.25, 396.7936333914646 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12568688008196456109&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=BE5aK0ETbp", "pdf": "https://openreview.net/pdf?id=BE5aK0ETbp", "email": ";;jd.com;cs.umd.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "JD;University of Maryland, College Park", "aff_unique_dep": "JD Explore Academy;Department of Computer Science", "aff_unique_url": ";https://www/umd.edu", "aff_unique_abbr": ";UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "Pre-training Sequence, Structure, and Surface Features for Comprehensive Protein Representation Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19226", "id": "BEH4mGo7zP", "author_site": "Youhan Lee, Hasun Yu, Jaemyung Lee, Jaehoon Kim", "tldr": "", "abstract": "Proteins can be represented in various ways, including their sequences, 3D structures, and surfaces. While recent studies have successfully employed sequence- or structure-based representations to address multiple tasks in protein science, there has been significant oversight in incorporating protein surface information, a critical factor for protein function. In this paper, we present a pre-training strategy that incorporates information from protein sequences, 3D structures, and surfaces to improve protein representation learning. Specifically, we utilize Implicit Neural Representations (INRs) for learning surface characteristics, and name it ProteinINR. We confirm that ProteinINR successfully reconstructs protein surfaces, and integrate this surface learning into the existing pre-training strategy of sequences and structures. Our results demonstrate that our approach can enhance performance in various downstream tasks, thereby underscoring the importance of including surface attributes in protein representation learning. These findings underline the importance of understanding protein surfaces for generating effective protein representations.", "keywords": "Protein representation learning;self-supervised learning;implicit neural representation", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Youhan Lee;Hasun Yu;Jaemyung Lee;Jaehoon Kim", "authorids": "~Youhan_Lee1;~Hasun_Yu2;~Jaemyung_Lee1;~Jaehoon_Kim1", "gender": "M;M;M;M", "homepage": ";;https://www.linkedin.com/in/jaemyung-lee-5576b4119/;", "dblp": "190/1819;153/5409.html;;", "google_scholar": "https://scholar.google.co.kr/citations?user=EFNg9UcAAAAJ;https://scholar.google.co.kr/citations?user=CvbGPQYAAAAJ;UNoy5N8AAAAJ;", "orcid": ";;;0000-0001-8598-3429", "linkedin": "youhanlee/;https://kr.linkedin.com/in/hasun-yu-733291119;;", "or_profile": "~Youhan_Lee1;~Hasun_Yu2;~Jaemyung_Lee1;~Jaehoon_Kim1", "aff": "Kakao Brain Corp;Kakao Brain;Kakao Brain Corp;Kakaobrain", "aff_domain": "kakaobrain.com;kakaobrain.com;kakaobrain.com;kakaobrain.com", "position": "Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nlee2024pretraining,\ntitle={Pre-training Sequence, Structure, and Surface Features for Comprehensive Protein Representation Learning},\nauthor={Youhan Lee and Hasun Yu and Jaemyung Lee and Jaehoon Kim},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BEH4mGo7zP}\n}", "github": "", "project": "", "reviewers": "rs2T;VAn9;AyLE;1KWf", "pdf_size": 1966808, "rating": "5;6;6;6", "confidence": "5;3;2;4", "soundness": "2;3;3;3", "contribution": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "64;120;62;61", "wc_strengths": "30;97;60;81", "wc_weaknesses": "376;332;70;39", "wc_questions": "173;75;40;75", "wc_review": "643;624;232;256", "wc_reply_reviewers": "55;182;68;0", "wc_reply_authors": "1387;1198;641;518", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.75, 24.993749218554626 ], "wc_strengths_avg": [ 67.0, 25.06990227344335 ], "wc_weaknesses_avg": [ 204.25, 150.95425631627614 ], "wc_questions_avg": [ 90.75, 49.59019560356664 ], "wc_review_avg": [ 438.75, 195.05047423679852 ], "wc_reply_reviewers_avg": [ 76.25, 66.17542973037652 ], "wc_reply_authors_avg": [ 936.0, 365.30603608481476 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10063730667276780299&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=BEH4mGo7zP", "pdf": "https://openreview.net/pdf?id=BEH4mGo7zP", "email": "kakaobrain.com;kakaobrain.com;kakaobrain.com;kakaobrain.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Kakao Brain", "aff_unique_dep": "Corp", "aff_unique_url": "https://www.kakaobrain.com", "aff_unique_abbr": "Kakao Brain", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "DP-SGD Without Clipping: The Lipschitz Neural Network Way", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19225", "id": "BEyEziZ4R6", "author_site": "Louis B\u00e9thune, Thomas Massena, Thibaut Boissin, Aur\u00e9lien Bellet, Franck Mamalet, Yannick Prudent, Corentin Friedrich, Mathieu Serrurier, David Vigouroux", "tldr": "", "abstract": "State-of-the-art approaches for training Differentially Private (DP) Deep Neural Networks (DNN) face difficulties to estimate tight bounds on the sensitivity of the network's layers, and instead rely on a process of per-sample gradient clipping. This clipping process not only biases the direction of gradients but also proves costly both in memory consumption and in computation. To provide sensitivity bounds and bypass the drawbacks of the clipping process, we propose to rely on Lipschitz constrained networks. Our theoretical analysis reveals an unexplored link between the Lipschitz constant with respect to their input and the one with respect to their parameters. By bounding the Lipschitz constant of each layer with respect to its parameters, we prove that we can train these networks with privacy guarantees. Our analysis not only allows the computation of the aforementioned sensitivities at scale, but also provides guidance on how to maximize the gradient-to-noise ratio for fixed privacy guarantees. To facilitate the application of Lipschitz networks and foster robust and certifiable learning under privacy guarantees, we provide a Python package that implements building blocks allowing the construction and private training of such networks.", "keywords": "lipschitz neural networks;dp-sgd;privacy;robustness", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Louis B\u00e9thune;Thomas Massena;Thibaut Boissin;Aur\u00e9lien Bellet;Franck Mamalet;Yannick Prudent;Corentin Friedrich;Mathieu Serrurier;David Vigouroux", "authorids": "~Louis_B\u00e9thune1;~Thomas_Massena1;~Thibaut_Boissin1;~Aur\u00e9lien_Bellet1;~Franck_Mamalet2;~Yannick_Prudent1;~Corentin_Friedrich1;~Mathieu_Serrurier1;~David_Vigouroux1", "gender": "M;M;M;;M;M;M;M;", "homepage": "https://louis-bethune.fr/;;;http://researchers.lille.inria.fr/abellet/;https://www.researchgate.net/profile/Franck-Mamalet;;;;", "dblp": "270/0797;;;61/8017;15/6625;;258/6442;30/2092;", "google_scholar": "1zvpCDcAAAAJ;n09aacYAAAAJ;zC-MstIAAAAJ;https://scholar.google.fr/citations?user=j8svx3IAAAAJ;https://scholar.google.fr/citations?user=5C5p0osAAAAJ;;;https://scholar.google.com/scholar?scilib=1;", "orcid": "0000-0003-1498-8251;;;0000-0003-3440-1251;;;;;", "linkedin": ";thomas-mass%C3%A9na-9240b5223/;;;franck-mamalet-0453a91b;yannick-prudent/;corentin-friedrich/;;", "or_profile": "~Louis_B\u00e9thune1;~Thomas_Massena1;~Thibaut_Boissin1;~Aur\u00e9lien_Bellet1;~Franck_Mamalet2;~Yannick_Prudent1;~Corentin_Friedrich1;~Mathieu_Serrurier1;~David_Vigouroux1", "aff": "Apple ;IRIT / SNCF DTIPG;IRT Saint exup\u00e9ry;INRIA;IRT Saint Exupery;IRT Saint-Exup\u00e9ry;IRT Saint Exup\u00e9ry;university Paul Sabatier;", "aff_domain": "apple.com;irit.fr;irt-saintexupery.com;inria.fr;irt-saintexupery.com;irt-saintexupery.com;irt-saintexupery.com;irit.fr;", "position": "Researcher;PhD student;Researcher;Tenured researcher;Researcher;Researcher;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nb{\\'e}thune2024dpsgd,\ntitle={{DP}-{SGD} Without Clipping: The Lipschitz Neural Network Way},\nauthor={Louis B{\\'e}thune and Thomas Massena and Thibaut Boissin and Aur{\\'e}lien Bellet and Franck Mamalet and Yannick Prudent and Corentin Friedrich and Mathieu Serrurier and David Vigouroux},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BEyEziZ4R6}\n}", "github": "", "project": "", "reviewers": "GMKM;ah9n;L744", "pdf_size": 7397919, "rating": "5;6;8", "confidence": "4;4;3", "soundness": "3;3;3", "contribution": "2;3;3", "presentation": "2;2;4", "wc_summary": "52;24;106", "wc_strengths": "21;27;360", "wc_weaknesses": "323;18;212", "wc_questions": "2;29;311", "wc_review": "398;98;989", "wc_reply_reviewers": "70;0;21", "wc_reply_authors": "844;60;214", "reply_reviewers": "1;0;1", "reply_authors": "3;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 60.666666666666664, 34.03266404826725 ], "wc_strengths_avg": [ 136.0, 158.41085821369697 ], "wc_weaknesses_avg": [ 184.33333333333334, 126.04320246998205 ], "wc_questions_avg": [ 114.0, 139.73546436034053 ], "wc_review_avg": [ 495.0, 370.159425113018 ], "wc_reply_reviewers_avg": [ 30.333333333333332, 29.32954520994525 ], "wc_reply_authors_avg": [ 372.6666666666667, 339.16105646465695 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1811895221198899715&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 32, "openreview": "https://openreview.net/forum?id=BEyEziZ4R6", "pdf": "https://openreview.net/pdf?id=BEyEziZ4R6", "email": "apple.com;irit.fr;irt-saintexupery.com;inria.fr;irt-saintexupery.com;irt-saintexupery.com;irt-saintexupery.com;irit.fr;", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;2;6", "aff_unique_norm": "Apple;Institut de Recherche en Informatique de Toulouse;IRT Saint Exup\u00e9ry;INRIA;IRT Saint Exupery;IRT Saint-Exup\u00e9ry;Paul Sabatier University", "aff_unique_dep": "Apple Inc.;;;;;;", "aff_unique_url": "https://www.apple.com;https://www.irit.fr;;https://www.inria.fr;;https://www.irt-saintexupery.com;https://www.univ-toulouse1.fr", "aff_unique_abbr": "Apple;IRIT;;INRIA;;;UT1", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1", "aff_country_unique": "United States;France" }, { "title": "A 2-Dimensional State Space Layer for Spatial Inductive Bias", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19224", "id": "BGkqypmGvm", "author_site": "Ethan Baron, Itamar Zimerman, Lior Wolf", "tldr": "", "abstract": "A central objective in computer vision is to design models with appropriate 2-D inductive bias. Desiderata for 2-D inductive bias include two-dimensional position awareness, dynamic spatial locality, and translation and permutation invariance. To address these goals, we leverage an expressive variation of the multidimensional State Space Model (SSM). Our approach introduces efficient parameterization, accelerated computation, and a suitable normalization scheme. Empirically, we observe that incorporating our layer at the beginning of each transformer block of Vision Transformers (ViT), as well as when replacing the Conv2D filters of ConvNeXT with our proposed layers significantly enhances performance for multiple backbones and across multiple datasets. The new layer is effective even with a negligible amount of additional parameters and inference time. Ablation studies and visualizations demonstrate that the layer has a strong 2-D inductive bias. For example, vision transformers equipped with our layer exhibit effective performance even without positional encoding. Our code is attached as supplementary.", "keywords": "SSM;Dimensional State Spaces;Spatial Representation", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/0f8c60423e0dd1a58c4d7b84907f5b05bbdf998b.zip", "author": "Ethan Baron;Itamar Zimerman;Lior Wolf", "authorids": "~Ethan_Baron1;~Itamar_Zimerman1;~Lior_Wolf1", "gender": "M;M;M", "homepage": ";;http://www.cs.tau.ac.il/~wolf", "dblp": ";294/8621;83/4103", "google_scholar": "02O0z30AAAAJ;01s_DpwAAAAJ;UbFrXTsAAAAJ", "orcid": ";0000-0001-8321-0609;0000-0001-5578-8892", "linkedin": "ethan-baron-a423a112a/;;", "or_profile": "~Ethan_Baron1;~Itamar_Zimerman1;~Lior_Wolf1", "aff": "Tel Aviv University;International Business Machines;Tel Aviv University", "aff_domain": "tau.ac.il;ibm.com;tau.ac.il", "position": "MS student;Researcher;Full Professor", "bibtex": "@inproceedings{\nbaron2024a,\ntitle={A 2-Dimensional State Space Layer for Spatial Inductive Bias},\nauthor={Ethan Baron and Itamar Zimerman and Lior Wolf},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BGkqypmGvm}\n}", "github": "", "project": "", "reviewers": "SZ7R;mKTe;ZC1E;CN5B", "pdf_size": 3876440, "rating": "6;6;6;6", "confidence": "2;3;3;2", "soundness": "3;3;4;3", "contribution": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "95;55;86;40", "wc_strengths": "56;110;174;40", "wc_weaknesses": "120;427;207;70", "wc_questions": "104;59;5;32", "wc_review": "375;651;472;182", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "559;932;382;358", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.0, 22.371857321197094 ], "wc_strengths_avg": [ 95.0, 52.46903848937962 ], "wc_weaknesses_avg": [ 206.0, 136.68760002282576 ], "wc_questions_avg": [ 50.0, 36.55817282086182 ], "wc_review_avg": [ 420.0, 169.3620382494259 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 557.75, 229.59352669446062 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5637507426116350084&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "openreview": "https://openreview.net/forum?id=BGkqypmGvm", "pdf": "https://openreview.net/pdf?id=BGkqypmGvm", "email": "tau.ac.il;ibm.com;tau.ac.il", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tel Aviv University;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.tau.ac.il;https://www.ibm.com", "aff_unique_abbr": "TAU;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Israel;United States" }, { "title": "A Multi-Level Framework for Accelerating Training Transformer Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19223", "id": "BI1N3lTWtn", "author_site": "Longwei Zou, Han Zhang, Yangdong Deng", "tldr": "", "abstract": "The fast growing capabilities of large-scale deep learning models, such as Bert, GPT and ViT, are revolutionizing the landscape of NLP, CV and many other domains. Training such models, however, poses an unprecedented demand for computing power, which incurs exponentially increasing energy cost and carbon dioxide emissions. It is thus critical to develop efficient training solutions to reduce the training costs. Motivated by a set of key observations of inter- and intra-layer similarities among feature maps and attentions that can be identified from typical training processes, we propose a multi-level framework for training acceleration. Specifically, the framework is based on three basic operators, Coalescing, De-coalescing and Interpolation, which can be orchestrated to build a multi-level training framework. The framework consists of a V-cycle training process, which progressively down- and up-scales the model size and projects the parameters between adjacent levels of models via coalescing and de-coalescing. The key idea is that a smaller model that can be trained for fast convergence and the trained parameters provides high-qualities intermediate solutions for the next level larger network. The interpolation operator is designed to break the symmetry of neurons incurred by de-coalescing for better convergence performance. Our experiments on transformer-based language models (e.g. Bert, GPT) as well as a vision model (e.g. DeiT) prove that the proposed framework reduces the computational cost by about 20% on training BERT/GPT-Base models and up to 51.6% on training the BERT-Large model while preserving the performance.", "keywords": "Large Model;Transformer;Multi-Level;Training Acceleration", "primary_area": "optimization", "supplementary_material": "", "author": "Longwei Zou;Han Zhang;Yangdong Deng", "authorids": "~Longwei_Zou1;~Han_Zhang22;~Yangdong_Deng1", "gender": "M;M;M", "homepage": "https://photooon.github.io/;https://github.com/explorerZH;http://www.thss.tsinghua.edu.cn/publish/soften/3131/2014/20140115102144786540201/20140115102144786540201_.html", "dblp": "375/3580;;90/5987", "google_scholar": "TaG_k80AAAAJ/;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Longwei_Zou1;~Han_Zhang22;~Yangdong_Deng1", "aff": "Tsinghua University;University of Washington;Tsinghua University", "aff_domain": "tsinghua.edu.cn;uw.edu;tsinghua.edu.cn", "position": "MEng student;MS student;Associate Professor", "bibtex": "@inproceedings{\nzou2024a,\ntitle={A Multi-Level Framework for Accelerating Training Transformer Models},\nauthor={Longwei Zou and Han Zhang and Yangdong Deng},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BI1N3lTWtn}\n}", "github": "", "project": "", "reviewers": "subK;Csxk;d5rc;2jMP", "pdf_size": 653625, "rating": "5;6;6;6", "confidence": "4;5;2;2", "soundness": "3;3;3;3", "contribution": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "179;69;155;112", "wc_strengths": "18;215;46;63", "wc_weaknesses": "179;132;54;63", "wc_questions": "247;2;47;31", "wc_review": "623;418;302;269", "wc_reply_reviewers": "121;0;27;0", "wc_reply_authors": "1899;538;269;870", "reply_reviewers": "1;0;1;0", "reply_authors": "3;1;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 128.75, 42.02603359823527 ], "wc_strengths_avg": [ 85.5, 76.4738517403171 ], "wc_weaknesses_avg": [ 107.0, 51.36633138545131 ], "wc_questions_avg": [ 81.75, 96.76098128894725 ], "wc_review_avg": [ 403.0, 138.5478256776338 ], "wc_reply_reviewers_avg": [ 37.0, 49.73429400323282 ], "wc_reply_authors_avg": [ 894.0, 618.0538002471953 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12765167249535678442&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=BI1N3lTWtn", "pdf": "https://openreview.net/pdf?id=BI1N3lTWtn", "email": "tsinghua.edu.cn;uw.edu;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tsinghua University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.washington.edu", "aff_unique_abbr": "THU;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "BIglOUjfXX", "title": "Forked Diffusion for Conditional Graph Generation", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce a novel score-based diffusion framework that incorporates forking for conditional generation. In this framework, a single parent diffusion process is associated with a primary variable (e.g., structure), while multiple child diffusion processes are employed, each dedicated to a dependent variable (e.g., property). The parent process guides the co-evolution of its child processes towards segregated representation spaces. This approach allows our models to manage conditional information flow effectively, uncover intricate interactions and dependencies, and ultimately unlock new generative capabilities. Our experimental results demonstrate the significant superiority of our method over contemporary baselines in the context of conditional graph generation, highlighting the potential of forking diffusion for enhancing conditional generation tasks and inverse molecular design tasks.", "keywords": "conditional generative model;graph neural network;score-based diffusion", "primary_area": "generative models", "supplementary_material": "", "author": "Giangiacomo Mercatali;Yogesh Verma;Andre Freitas;Vikas Garg", "authorids": "~Giangiacomo_Mercatali1;~Yogesh_Verma1;~Andre_Freitas1;~Vikas_Garg2", "gender": ";M;;", "homepage": ";https://yoverma.github.io/yoerma.github.io/;http://andrefreitas.org;", "dblp": ";284/2155;47/9409.html;", "google_scholar": ";9W9u4owAAAAJ;ExmHmMoAAAAJ;", "orcid": ";;;", "linkedin": ";yogeshverma1998/;andrefreitas/;", "or_profile": "~Giangiacomo_Mercatali1;~Yogesh_Verma1;~Andre_Freitas1;~Vikas_Garg2", "aff": ";Aalto University;University of Manchester;", "aff_domain": ";aalto.fi;manchester.ac.uk;", "position": ";PhD student;Associate Professor;", "bibtex": "@misc{\nmercatali2024forked,\ntitle={Forked Diffusion for Conditional Graph Generation},\nauthor={Giangiacomo Mercatali and Yogesh Verma and Andre Freitas and Vikas Garg},\nyear={2024},\nurl={https://openreview.net/forum?id=BIglOUjfXX}\n}", "github": "", "project": "", "reviewers": "hTgA;GVsE;qecv;FTyY", "site": "https://openreview.net/forum?id=BIglOUjfXX", "pdf_size": 561353, "rating": "3;3;5;5", "confidence": "4;4;3;3", "soundness": "2;2;3;2", "contribution": "2;1;2;2", "presentation": "1;3;2;2", "wc_summary": "85;70;66;60", "wc_strengths": "27;54;51;10", "wc_weaknesses": "281;208;347;211", "wc_questions": "42;31;52;34", "wc_review": "435;363;516;315", "wc_reply_reviewers": "125;26;0;22", "wc_reply_authors": "690;519;643;448", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.25, 9.229707470987366 ], "wc_strengths_avg": [ 35.5, 18.062391868188442 ], "wc_weaknesses_avg": [ 261.75, 57.23362211148269 ], "wc_questions_avg": [ 39.75, 8.13557004763649 ], "wc_review_avg": [ 407.25, 75.93541663808791 ], "wc_reply_reviewers_avg": [ 43.25, 48.225382320931374 ], "wc_reply_authors_avg": [ 575.0, 96.32497080196806 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5C871pxEaKQJ:scholar.google.com/&scioq=Forked+Diffusion+for+Conditional+Graph+Generation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Aalto University;University of Manchester", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.manchester.ac.uk", "aff_unique_abbr": "Aalto;UoM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Finland;United Kingdom" }, { "title": "Equivariant Scalar Fields for Molecular Docking with Fast Fourier Transforms", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19222", "id": "BIveOmD1Nh", "author_site": "Bowen Jing, Tommi Jaakkola, Bonnie Berger", "tldr": "", "abstract": "Molecular docking is critical to structure-based virtual screening, yet the throughput of such workflows is limited by the expensive optimization of scoring functions involved in most docking algorithms. We explore how machine learning can accelerate this process by learning a scoring function with a functional form that allows for more rapid optimization. Specifically, we define the scoring function to be the cross-correlation of multi-channel ligand and protein scalar fields parameterized by equivariant graph neural networks, enabling rapid optimization over rigid-body degrees of freedom with fast Fourier transforms. The runtime of our approach can be amortized at several levels of abstraction, and is particularly favorable for virtual screening settings with a common binding pocket. We benchmark our scoring functions on two simplified docking-related tasks: decoy pose scoring and rigid conformer docking. Our method attains similar but faster performance on crystal structures compared to the widely-used Vina and Gnina scoring functions, and is more robust on computationally predicted structures. Code is available at https://github.com/bjing2016/scalar-fields.", "keywords": "protein structure;structural biology;drug discovery;molecular docking", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "", "author": "Bowen Jing;Tommi S. Jaakkola;Bonnie Berger", "authorids": "~Bowen_Jing1;~Tommi_S._Jaakkola1;~Bonnie_Berger1", "gender": ";;F", "homepage": ";;https://people.csail.mit.edu/bab/", "dblp": ";;b/BonnieBerger", "google_scholar": ";;bYjKaowAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Bowen_Jing1;~Tommi_S._Jaakkola1;~Bonnie_Berger1", "aff": ";;Massachusetts Institute of Technology", "aff_domain": ";;mit.edu", "position": ";;Full Professor", "bibtex": "@inproceedings{\njing2024equivariant,\ntitle={Equivariant Scalar Fields for Molecular Docking with Fast Fourier Transforms},\nauthor={Bowen Jing and Tommi S. Jaakkola and Bonnie Berger},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BIveOmD1Nh}\n}", "github": "", "project": "", "reviewers": "BpA5;ncny;9kCY", "pdf_size": 2238837, "rating": "5;6;8", "confidence": "3;3;4", "soundness": "3;4;4", "contribution": "2;3;4", "presentation": "3;4;4", "wc_summary": "37;109;55", "wc_strengths": "25;126;55", "wc_weaknesses": "39;107;55", "wc_questions": "21;170;61", "wc_review": "122;512;226", "wc_reply_reviewers": "0;50;8", "wc_reply_authors": "1087;815;540", "reply_reviewers": "0;1;1", "reply_authors": "3;3;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 67.0, 30.59411708155671 ], "wc_strengths_avg": [ 68.66666666666667, 42.35039026450117 ], "wc_weaknesses_avg": [ 67.0, 29.028721409436322 ], "wc_questions_avg": [ 84.0, 62.96559907335645 ], "wc_review_avg": [ 286.6666666666667, 164.89457911715056 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 21.9291789378647 ], "wc_reply_authors_avg": [ 814.0, 223.31293439177827 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9739782478513044686&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 8, "openreview": "https://openreview.net/forum?id=BIveOmD1Nh", "pdf": "https://openreview.net/pdf?id=BIveOmD1Nh", "email": ";;mit.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "BJ4WgPgFqJ", "title": "PQ-VAE: Learning Hierarchical Discrete Representations with Progressive Quantization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Variational auto-encoders (VAEs) are widely used in generative modeling and representation learning, with applications ranging from image generation to data compression. However, conventional VAEs face challenges in balancing the tradeoff between compactness and informativeness of the learned latent codes. In this work, we propose Progressive Quantization VAE (PQ-VAE), which aims to learn a progressive sequential structure for data representation that maximizes the mutual information between the latent representations and the original data in a limited description length. The resulting representations provide a global, compact, and hierarchical understanding of the data semantics, making it suitable for high-level tasks while achieving high compression rates. The proposed model offers an effective solution for generative modeling and data compression while enabling improved performance in high-level tasks such as image understanding and generation.", "keywords": "representation learning;deep generative models;variational autoencoders;VQ-VAE", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Lun Huang;Qiang Qiu;Guillermo Sapiro", "authorids": "~Lun_Huang1;~Qiang_Qiu1;~Guillermo_Sapiro1", "gender": ";;", "homepage": ";https://web.ics.purdue.edu/~qqiu/;", "dblp": "84/7455;97/360;82/5175", "google_scholar": "https://scholar.google.com/citations?hl=en;jdLtt_YAAAAJ;https://scholar.google.co.il/citations?user=ISRNX3gAAAAJ", "orcid": "0000-0001-5121-0460;;", "linkedin": ";;", "or_profile": "~Lun_Huang1;~Qiang_Qiu1;~Guillermo_Sapiro1", "aff": "Duke University;Purdue University;Duke University", "aff_domain": "duke.edu;purdue.edu;duke.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nhuang2024pqvae,\ntitle={{PQ}-{VAE}: Learning Hierarchical Discrete Representations with Progressive Quantization},\nauthor={Lun Huang and Qiang Qiu and Guillermo Sapiro},\nyear={2024},\nurl={https://openreview.net/forum?id=BJ4WgPgFqJ}\n}", "github": "", "project": "", "reviewers": "L7SW;JP89;bWca", "site": "https://openreview.net/forum?id=BJ4WgPgFqJ", "pdf_size": 2645559, "rating": "1;3;3", "confidence": "4;4;4", "soundness": "1;2;2", "contribution": "1;2;3", "presentation": "1;2;1", "wc_summary": "49;36;128", "wc_strengths": "1;27;25", "wc_weaknesses": "533;286;87", "wc_questions": "22;2;298", "wc_review": "605;351;538", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 71.0, 40.65300316909769 ], "wc_strengths_avg": [ 17.666666666666668, 11.8133634311129 ], "wc_weaknesses_avg": [ 302.0, 182.4298952109184 ], "wc_questions_avg": [ 107.33333333333333, 135.06870679604347 ], "wc_review_avg": [ 498.0, 107.48333204114331 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3676146220517968528&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Duke University;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.purdue.edu", "aff_unique_abbr": "Duke;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CellPLM: Pre-training of Cell Language Model Beyond Single Cells", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19221", "id": "BKXvPDekud", "author_site": "Hongzhi Wen, Wenzhuo Tang, Xinnan Dai, Jiayuan Ding, Wei Jin, Yuying Xie, Jiliang Tang", "tldr": "", "abstract": "The current state-of-the-art single-cell pre-trained models are greatly inspired by the success of large language models. They trained transformers by treating genes as tokens and cells as sentences. However, three fundamental differences between single-cell data and natural language data are overlooked: (1) scRNA-seq data are presented as bag-of-genes instead of sequences of RNAs; (2) Cell-cell relations are more intricate and important than inter-sentence relations; and (3) The quantity of single-cell data is considerably inferior to text data, and they are very noisy. In light of these characteristics, we propose a new pre-trained model, $\\textit{CellPLM}$, which takes cells as tokens and tissues as sentences. In addition, we leverage spatially-resolved transcriptomic data in pre-training to facilitate learning cell-cell relationships and introduce a Gaussian prior distribution as an additional inductive bias to overcome data limitations. $\\textit{CellPLM}$ is the first single-cell pre-trained transformer that encodes cell-cell relations and it consistently outperforms existing pre-trained and non-pre-trained models in diverse downstream tasks, with 100 times higher inference speed on generating cell embeddings than previous pre-trained models.", "keywords": "Single-cell analysis;Pretrained models;AI for science", "primary_area": "applications to physical sciences (physics, chemistry, biology, etc.)", "supplementary_material": "/attachment/565910ede0800a17a38788810da38efffdc30a34.zip", "author": "Hongzhi Wen;Wenzhuo Tang;Xinnan Dai;Jiayuan Ding;Wei Jin;Yuying Xie;Jiliang Tang", "authorids": "~Hongzhi_Wen1;~Wenzhuo_Tang1;~Xinnan_Dai1;~Jiayuan_Ding1;~Wei_Jin4;~Yuying_Xie1;~Jiliang_Tang1", "gender": "M;M;F;M;;M;M", "homepage": "https://www.cse.msu.edu/~wenhongz/;;;;http://www.cs.emory.edu/~wjin30/;https://cmse.msu.edu/directory/faculty/yuying-xie/;https://www.cse.msu.edu/~tangjili/", "dblp": "179/0477;;;197/1055;66/2173-9;24/2813-1;64/10812", "google_scholar": ";;LGKDd2AAAAAJ;7lwkXGEAAAAJ;eWow24EAAAAJ;https://scholar.google.com/citations?hl=en;WtzKMWAAAAAJ", "orcid": "0000-0003-0775-8538;;;;;0000-0002-1049-2219;0000-0001-7125-3898", "linkedin": ";wenzhuo-tang-66b757207;;jiayuand/;;yuying-xie-b754bb17/;", "or_profile": "~Hongzhi_Wen1;~Wenzhuo_Tang1;~Xinnan_Dai1;~Jiayuan_Ding1;~Wei_Jin4;~Yuying_Xie1;~Jiliang_Tang1", "aff": "Michigan State University;Michigan State University;Michigan State University;Michigan State University;Emory University;Michigan State University;Michigan State University", "aff_domain": "msu.edu;msu.edu;msu.edu;msu.edu;emory.edu;msu.edu;msu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwen2024cellplm,\ntitle={Cell{PLM}: Pre-training of Cell Language Model Beyond Single Cells},\nauthor={Hongzhi Wen and Wenzhuo Tang and Xinnan Dai and Jiayuan Ding and Wei Jin and Yuying Xie and Jiliang Tang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BKXvPDekud}\n}", "github": "", "project": "", "reviewers": "C7dG;1rfQ;QJ4u;3Haz", "pdf_size": 6685800, "rating": "6;6;6;8", "confidence": "2;2;4;4", "soundness": "3;3;3;4", "contribution": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "18;70;137;62", "wc_strengths": "33;25;90;69", "wc_weaknesses": "234;72;378;95", "wc_questions": "5;2;3;96", "wc_review": "290;169;608;322", "wc_reply_reviewers": "30;0;0;0", "wc_reply_authors": "1175;773;2274;1325", "reply_reviewers": "2;0;0;0", "reply_authors": "4;3;7;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.75, 42.558048592481306 ], "wc_strengths_avg": [ 54.25, 26.47050245084139 ], "wc_weaknesses_avg": [ 194.75, 122.61601649050584 ], "wc_questions_avg": [ 26.5, 40.1403786728526 ], "wc_review_avg": [ 347.25, 160.9959238614444 ], "wc_reply_reviewers_avg": [ 7.5, 12.99038105676658 ], "wc_reply_authors_avg": [ 1386.75, 550.5798647789437 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 4.25, 1.6393596310755 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15887212670928548591&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=BKXvPDekud", "pdf": "https://openreview.net/pdf?id=BKXvPDekud", "email": "msu.edu;msu.edu;msu.edu;msu.edu;emory.edu;msu.edu;msu.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Michigan State University;Emory University", "aff_unique_dep": ";", "aff_unique_url": "https://www.msu.edu;https://www.emory.edu", "aff_unique_abbr": "MSU;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "BKinRUoBN9", "title": "Investigating the Impact of Data Distribution Shifts on Cross-Modal Knowledge Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Cross-modal knowledge distillation (KD) has expanded the traditional KD approach to encompass multimodal learning, achieving notable success in various applications. However, in cases where there is a considerable shift in data distribution during cross-modal KD, even a more accurate teacher model may not effectively instruct the student model. In this paper, we conduct a comprehensive analysis and evaluation of the effectiveness of cross-modal KD, focusing on its dependence on the distribution shifts in multimodal data. We initially view cross-modal KD as training a maximum entropy model using pseudo-labels and establish conditions under which it outperforms unimodal KD. Subsequently, we introduced the hypothesis of solution space divergence, which unveils the crucial factor influencing the efficacy of cross-modal KD. Our key observation is that the accuracy of the teacher model is not the primary determinant of the student model's accuracy; instead, the data distribution shifts play a more significant role. We demonstrate that as the data distribution shifts decrease, the effectiveness of cross-modal KD improves, and vice versa. Finally, to address significant data distribution differences, we propose a method called the ``perceptual solution space mask'' to enhance the effectiveness of cross-modal KD. Through experimental results on four multimodal datasets, we validate our assumptions and provide directions for future enhancements in cross-modal knowledge transfer. Notably, our enhanced KD method demonstrated an approximate 2\\% improvement in \\emph{mIoU} compared to the Baseline on the SemanticKITTI dataset.", "keywords": "Cross-Modal Knowledge Distillation; Data distribution shifts", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/671da5f37467b3eba247f38a874b6f5c82bb5e95.zip", "author": "Yilong Chen;Zongyi Xu;Xiaoshui Huang;Xinbo Gao", "authorids": "~Yilong_Chen1;~Zongyi_Xu1;~Xiaoshui_Huang1;~Xinbo_Gao5", "gender": "F;Not Specified;M;M", "homepage": ";https://xiaoshuihuang.github.io/;https://faculty.cqupt.edu.cn/gaoxinbo/zh_CN/index.htm;", "dblp": "125/3642;167/9599;;", "google_scholar": "PUseiVAAAAAJ;https://scholar.google.ca/citations?user=rp7mYNsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;DsNNLTwAAAAJ", "orcid": ";;0000-0002-7985-0037;0000-0002-3649-3519", "linkedin": ";;xinbo-gao-151a2224/;", "or_profile": "~Zongyi_Xu1;~Xiaoshui_Huang1;~Xinbo_Gao5;~Elon_Chen1", "aff": "Chongqing University of Post and Telecommunications;Shanghai AI Laboratory;Chongqing University of Post and Telecommunications;Chongqing University of Post and Telecommunications", "aff_domain": "cqupt.edu.cn;pjlab.org.cn;cqupt.edu.cn;cqupt.edu.cn", "position": "Associate Professor;Research Fellow;Full Professor;PhD student", "bibtex": "@misc{\nchen2024investigating,\ntitle={Investigating the Impact of Data Distribution Shifts on Cross-Modal Knowledge Distillation},\nauthor={Yilong Chen and Zongyi Xu and Xiaoshui Huang and Xinbo Gao},\nyear={2024},\nurl={https://openreview.net/forum?id=BKinRUoBN9}\n}", "github": "", "project": "", "reviewers": "Pm6d;voCm;GxY4;NCD3", "site": "https://openreview.net/forum?id=BKinRUoBN9", "pdf_size": 24818612, "rating": "5;5;6;6", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "contribution": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "67;60;19;66", "wc_strengths": "46;70;21;104", "wc_weaknesses": "147;52;57;104", "wc_questions": "44;42;49;5", "wc_review": "304;224;146;279", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "554;340;544;367", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.0, 19.81161275615895 ], "wc_strengths_avg": [ 60.25, 30.629846555280032 ], "wc_weaknesses_avg": [ 90.0, 38.658763560155414 ], "wc_questions_avg": [ 35.0, 17.507141400011598 ], "wc_review_avg": [ 238.25, 60.615076507416866 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 451.25, 98.2786217852082 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5Tm2TZStsRkJ:scholar.google.com/&scioq=Investigating+the+Impact+of+Data+Distribution+Shifts+on+Cross-Modal+Knowledge+Distillation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Chongqing University of Post and Telecommunications;Shanghai AI Laboratory", "aff_unique_dep": ";", "aff_unique_url": "http://www.cqupt.edu.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "CQUPT;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "LogicMP: A Neuro-symbolic Approach for Encoding First-order Logic Constraints", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19220", "id": "BLGQ3oqldb", "author_site": "Weidi Xu, Jingwei Wang, Lele Xie, Jianshan He, Hongting Zhou, Taifeng Wang, Xiaopei Wan, Jingdong Chen, Chao Qu, Wei Chu", "tldr": "", "abstract": "Integrating first-order logic constraints (FOLCs) with neural networks is a crucial but challenging problem since it involves modeling intricate correlations to satisfy the constraints. This paper proposes a novel neural layer, LogicMP, which performs mean-field variational inference over a Markov Logic Network (MLN). It can be plugged into any off-the-shelf neural network to encode FOLCs while retaining modularity and efficiency. By exploiting the structure and symmetries in MLNs, we theoretically demonstrate that our well-designed, efficient mean-field iterations greatly mitigate the difficulty of MLN inference, reducing the inference from sequential calculation to a series of parallel tensor operations. Empirical results in three kinds of tasks over images, graphs, and text show that LogicMP outperforms advanced competitors in both performance and efficiency.", "keywords": "Variational Inference", "primary_area": "probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)", "supplementary_material": "/attachment/78afeaa0ecb9c0bbfd077557c85242e1ea59347b.zip", "author": "Weidi Xu;Jingwei Wang;Lele Xie;Jianshan He;Hongting Zhou;Taifeng Wang;Xiaopei Wan;Jingdong Chen;Chao Qu;Wei Chu", "authorids": "~Weidi_Xu1;~Jingwei_Wang1;~Lele_Xie1;~Jianshan_He1;~Hongting_Zhou1;~Taifeng_Wang2;~Xiaopei_Wan2;~Jingdong_Chen1;~Chao_Qu3;~Wei_Chu1", "gender": "M;;M;M;F;M;M;M;M;M", "homepage": ";;;;https://github.com/Nurikol;https://scholar.google.com/citations?user=aMNBEk0AAAAJ&hl=zh-CN;;;;http://weichu.github.io", "dblp": "00/11534;;214/0068;225/5402;253/0325;01/1483;;33/5656;;", "google_scholar": ";;vH97Cd4AAAAJ;https://scholar.google.com.hk/citations?user=0bq6rAkAAAAJ;;aMNBEk0AAAAJ;zU9TT-AAAAAJ;8SCEv-YAAAAJ;DI2NyPsAAAAJ;3J4zb7gAAAAJ", "orcid": "0000-0002-7279-9339;;;;;;;0000-0002-1872-2592;;", "linkedin": ";;;;%E8%99%B9%E5%BB%B7-%E5%91%A8-6524b6149/;;;;;", "or_profile": "~Weidi_Xu1;~Jingwei_Wang1;~Lele_Xie1;~Jianshan_He1;~Hongting_Zhou1;~Taifeng_Wang2;~Xiaopei_Wan2;~Jingdong_Chen1;~Chao_Qu3;~Wei_Chu1", "aff": "Infly Technology;;Ant Group;Ant Group;Antgroup;BioMap;;Ant Group;Inftech;Inf Tech", "aff_domain": "inftech.ai;;antgroup.com;antgroup.com;antgroup.com;biomap.com;;antgroup.com;inftech.ai;inftech.ai", "position": "Researcher;;Researcher;Researcher;Researcher;Principal Researcher;;Senior Staff Algorithm Engineer;Researcher;Researcher", "bibtex": "@inproceedings{\nxu2024logicmp,\ntitle={Logic{MP}: A Neuro-symbolic Approach for Encoding First-order Logic Constraints},\nauthor={Weidi Xu and Jingwei Wang and Lele Xie and Jianshan He and Hongting Zhou and Taifeng Wang and Xiaopei Wan and Jingdong Chen and Chao Qu and Wei Chu},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BLGQ3oqldb}\n}", "github": "", "project": "", "reviewers": "RzRh;svJm;9vvz;XeZX", "pdf_size": 2972462, "rating": "5;5;6;8", "confidence": "3;2;4;4", "soundness": "3;2;2;3", "contribution": "1;2;2;3", "presentation": "2;1;3;3", "wc_summary": "35;26;67;94", "wc_strengths": "13;106;37;48", "wc_weaknesses": "133;177;126;154", "wc_questions": "72;2;131;96", "wc_review": "253;311;361;392", "wc_reply_reviewers": "0;0;89;4", "wc_reply_authors": "743;1175;1788;649", "reply_reviewers": "0;0;1;1", "reply_authors": "1;2;4;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 55.5, 26.949025956423732 ], "wc_strengths_avg": [ 51.0, 34.18332927027442 ], "wc_weaknesses_avg": [ 147.5, 19.90602923739438 ], "wc_questions_avg": [ 75.25, 47.20897690058534 ], "wc_review_avg": [ 329.25, 52.661062465544695 ], "wc_reply_reviewers_avg": [ 23.25, 37.995887935406905 ], "wc_reply_authors_avg": [ 1088.75, 449.80905671184524 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.7385489458759963, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DjV_uspSItUJ:scholar.google.com/&scioq=LogicMP:+A+Neuro-symbolic+Approach+for+Encoding+First-order+Logic+Constraints&hl=en&as_sdt=0,5", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=BLGQ3oqldb", "pdf": "https://openreview.net/pdf?id=BLGQ3oqldb", "email": "inftech.ai;;antgroup.com;antgroup.com;antgroup.com;biomap.com;;antgroup.com;inftech.ai;inftech.ai", "author_num": 10, "aff_unique_index": "0;1;1;2;3;1;4;5", "aff_unique_norm": "Infly Technology;Ant Group;Antgroup;BioMap;Inftech;Information Technology", "aff_unique_dep": ";;;;;", "aff_unique_url": ";https://www.antgroup.com;https://www.antgroup.com;;;", "aff_unique_abbr": ";Ant Group;Antgroup;;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;1", "aff_country_unique": ";China" }, { "id": "BMZYh3IyAU", "title": "Provably Doubly Accelerated Federated Learning: The First Theoretically Successful Combination of Local Training and Communication Compression", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "In federated learning, a large number of users collaborate to learn a global model. They alternate local computations and two-way communication with a distant server. Communication, which can be slow and costly, is the main bottleneck in this setting. To reduce the communication load and therefore accelerate distributed gradient descent, two strategies are popular: 1) communicate less frequently; that is, perform several iterations of local computations between the communication rounds; and 2) communicate compressed information instead of full-dimensional vectors. We propose the first algorithm for distributed optimization and federated learning, which harnesses these two strategies jointly and converges linearly to an exact solution in the strongly convex setting, with a doubly accelerated rate: our algorithm benefits from the two acceleration mechanisms provided by local training and compression, namely a better dependency on the condition number of the functions and on the dimension of the model, respectively.", "keywords": "Federated learning;local training;compression;communication;optimization", "primary_area": "optimization", "supplementary_material": "/attachment/8c0d96120d9d802906205931fc08aa1f095fa4d6.zip", "author": "Laurent Condat;Ivan Agarsk\u00fd;Peter Richt\u00e1rik", "authorids": "~Laurent_Condat1;~Ivan_Agarsk\u00fd1;~Peter_Richt\u00e1rik1", "gender": "M;M;M", "homepage": "https://lcondat.github.io/;;https://richtarik.org", "dblp": "88/1335;331/8298;62/8001", "google_scholar": "PixYHyEAAAAJ;534mR8QAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-7087-1002;0000-0003-2007-881X;0000-0003-4380-5848", "linkedin": "laurent-condat-40291720b/;agarsky/;richtarik/", "or_profile": "~Laurent_Condat1;~Ivan_Agarsk\u00fd1;~Peter_Richtarik1", "aff": "KAUST;Kempelen Institute of Intelligent Technologies;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kinit.sk;kaust.edu.sa", "position": "research scientist;PhD student;Full Professor", "bibtex": "@misc{\ncondat2024provably,\ntitle={Provably Doubly Accelerated Federated Learning: The First Theoretically Successful Combination of Local Training and Communication Compression},\nauthor={Laurent Condat and Ivan Agarsk{\\'y} and Peter Richt{\\'a}rik},\nyear={2024},\nurl={https://openreview.net/forum?id=BMZYh3IyAU}\n}", "github": "", "project": "", "reviewers": "Va6u;5itr", "site": "https://openreview.net/forum?id=BMZYh3IyAU", "pdf_size": 1077068, "rating": "6;6", "confidence": "3;3", "soundness": "3;3", "contribution": "3;3", "presentation": "2;4", "wc_summary": "49;56", "wc_strengths": "48;48", "wc_weaknesses": "57;181", "wc_questions": "278;103", "wc_review": "432;388", "wc_reply_reviewers": "0;22", "wc_reply_authors": "965;742", "reply_reviewers": "0;1", "reply_authors": "2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 52.5, 3.5 ], "wc_strengths_avg": [ 48.0, 0.0 ], "wc_weaknesses_avg": [ 119.0, 62.0 ], "wc_questions_avg": [ 190.5, 87.5 ], "wc_review_avg": [ 410.0, 22.0 ], "wc_reply_reviewers_avg": [ 11.0, 11.0 ], "wc_reply_authors_avg": [ 853.5, 111.5 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7249144093151301792&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "King Abdullah University of Science and Technology;Kempelen Institute of Intelligent Technologies", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaust.edu.sa;http://www.kempeleninstitute.com", "aff_unique_abbr": "KAUST;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Saudi Arabia;Hungary" }, { "id": "BMw4Cm0gGO", "title": "C-MCTS: Safe Planning with Monte Carlo Tree Search", "track": "main", "status": "Reject", "tldr": "", "abstract": "The Constrained Markov Decision Process (CMDP) allows to solve safety-critical decision making tasks that are subject to constraints. \nWhile CMDPs have been extensively studied in the Reinforcement Learning literature, little attention has been given to sampling-based planning algorithms such as MCTS for solving them. Previous approaches perform conservatively with respect to costs as they avoid constraint violations by using Monte Carlo cost estimates that suffer from high variance. We propose Constrained MCTS (C-MCTS), which estimates cost using a safety critic that is trained with Temporal Difference learning in an offline phase prior to agent deployment. The critic limits exploration by pruning unsafe trajectories within MCTS during deployment. C-MCTS satisfies cost constraints but operates closer to the constraint boundary, achieving higher rewards than previous work. As a nice byproduct, the planner is more efficient w.r.t. planning steps. Most importantly, under model mismatch between the planner and the real world, C-MCTS is less susceptible to cost violations than previous work.", "keywords": "Monte Carlo Tree Search;Safety Critic;Safe Reinforcement Learning;Reinforcement Learning;RL", "primary_area": "reinforcement learning", "supplementary_material": "/attachment/e2339d1062b46e11afc46da672c9047490f63e34.zip", "author": "Dinesh Parthasarathy;Georgios Kontes;Axel Plinge;Christopher Mutschler", "authorids": "~Dinesh_Parthasarathy1;~Georgios_Kontes1;~Axel_Plinge1;~Christopher_Mutschler1", "gender": "M;;M;M", "homepage": ";;https://www.iis.fraunhofer.de/;https://www.cmutschler.de", "dblp": ";;36/1064;118/7748", "google_scholar": ";;TLskmTcAAAAJ;https://scholar.google.de/citations?user=gKDSp8YAAAAJ", "orcid": ";;0000-0001-7757-2953;0000-0001-8108-0230", "linkedin": "dineshkumar-93/;;aplinge/;christopher-mutschler-28431576/", "or_profile": "~Dinesh_Parthasarathy1;~Georgios_Kontes1;~Axel_Plinge1;~Christopher_Mutschler1", "aff": "Lawrence Livermore National Labs;;;Fraunhofer IIS", "aff_domain": "llnl.gov;;;fraunhofer.de", "position": "Intern;;;Principal Researcher", "bibtex": "@misc{\nparthasarathy2024cmcts,\ntitle={C-{MCTS}: Safe Planning with Monte Carlo Tree Search},\nauthor={Dinesh Parthasarathy and Georgios Kontes and Axel Plinge and Christopher Mutschler},\nyear={2024},\nurl={https://openreview.net/forum?id=BMw4Cm0gGO}\n}", "github": "", "project": "", "reviewers": "DU49;q4t7;EfYF;q5zX", "site": "https://openreview.net/forum?id=BMw4Cm0gGO", "pdf_size": 483528, "rating": "3;5;5;5", "confidence": "5;3;3;3", "soundness": "2;2;1;3", "contribution": "2;2;2;2", "presentation": "3;3;2;3", "wc_summary": "49;95;41;35", "wc_strengths": "66;59;33;17", "wc_weaknesses": "64;135;178;29", "wc_questions": "88;353;52;226", "wc_review": "267;642;304;307", "wc_reply_reviewers": "17;25;31;17", "wc_reply_authors": "619;2040;1010;1081", "reply_reviewers": "1;1;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.0, 23.62202362203543 ], "wc_strengths_avg": [ 43.75, 19.74050404624968 ], "wc_weaknesses_avg": [ 101.5, 58.38878316937252 ], "wc_questions_avg": [ 179.75, 119.2610057814372 ], "wc_review_avg": [ 380.0, 152.08385844658204 ], "wc_reply_reviewers_avg": [ 22.5, 5.894913061275798 ], "wc_reply_authors_avg": [ 1187.5, 522.6846563655757 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15509113104036491848&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Lawrence Livermore National Laboratory;Fraunhofer Institute for Integrated Circuits", "aff_unique_dep": ";", "aff_unique_url": "https://www.llnl.gov;https://www.iis.fraunhofer.de/", "aff_unique_abbr": "LLNL;Fraunhofer IIS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Germany" }, { "id": "BO3aRwGzq0", "title": "DINAR: Fine-Grained Privacy Preserving Federated Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated Learning (FL) enables collaborative model training among several participants, while keeping local data private at the participants' premises.However, despite its merits, FL remains vulnerable to privacy attacks, and in particular, to membership inference attacks that allow adversaries to deduce confidential information about participants' training data.\nIn this paper, we propose DINAR, a novel privacy-preserving FL method. DINAR follows a fine-grained approach that specifically tackles FL neural network layers that leak more private information than other layers, thus, efficiently protecting the FL model against membership inference attacks in a non-intrusive way. And in order to compensate for any potential loss in the accuracy of the protected model, DINAR combines the proposed fine-grained approach with adaptive gradient descent.The paper presents our extensive empirical evaluation of DINAR, conducted with six widely used datasets, four neural networks, and comparing against three state-of-the-art FL privacy protection mechanisms.The evaluation results show that DINAR reduces the membership inference attack success rate to reach its optimal value, without hurting model accuracy, and without inducing computational overhead. In contrast, existing FL defense mechanisms incur an overhead of up to +36% and +3,000% on respectively FL client-side and FL server-side computation times, and up to +168% on memory usage.", "keywords": "Federetad Learning;Privacy;Membership Inference Attacks;Cross-Silo Federated Learning", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "C\u00e9dric BOSCHER;Fatima Elhattab;Sara Bouchenak", "authorids": "~C\u00e9dric_BOSCHER1;~Fatima_Elhattab1;~Sara_Bouchenak1", "gender": "M;F;F", "homepage": "https://liris.cnrs.fr/page-membre/cedric-boscher;https://liris.cnrs.fr/page-membre/fatima-el-hattab;https://perso.liris.cnrs.fr/sara.bouchenak/", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;sara-bouchenak-587bbb24/", "or_profile": "~C\u00e9dric_BOSCHER1;~Fatima_Elhattab1;~Sara_Bouchenak1", "aff": "Institut National des Sciences Appliqu\u00e9es de Lyon;;Institut National des Sciences Appliqu\u00e9es de Lyon", "aff_domain": "insa-lyon.fr;;insa-lyon.fr", "position": "PhD student;;Full Professor", "bibtex": "@misc{\nboscher2024dinar,\ntitle={{DINAR}: Fine-Grained Privacy Preserving Federated Learning},\nauthor={C{\\'e}dric BOSCHER and Fatima Elhattab and Sara Bouchenak},\nyear={2024},\nurl={https://openreview.net/forum?id=BO3aRwGzq0}\n}", "github": "", "project": "", "reviewers": "nS4z;AcVa;uPn1;Y9fY", "site": "https://openreview.net/forum?id=BO3aRwGzq0", "pdf_size": 1213686, "rating": "5;5;5;5", "confidence": "3;4;4;4", "soundness": "2;2;3;3", "contribution": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "73;71;120;65", "wc_strengths": "16;42;107;46", "wc_weaknesses": "29;129;702;100", "wc_questions": "175;4;6;110", "wc_review": "293;246;935;321", "wc_reply_reviewers": "88;13;11;0", "wc_reply_authors": "454;519;704;624", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 21.992896580487074 ], "wc_strengths_avg": [ 52.75, 33.37195679009548 ], "wc_weaknesses_avg": [ 240.0, 269.20531198325193 ], "wc_questions_avg": [ 73.75, 72.49267204345554 ], "wc_review_avg": [ 448.75, 282.0127435063884 ], "wc_reply_reviewers_avg": [ 28.0, 34.99285641384538 ], "wc_reply_authors_avg": [ 575.25, 95.94106263743382 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0N-qxFgLJj8J:scholar.google.com/&scioq=DINAR:+Fine-Grained+Privacy+Preserving+Federated+Learning&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Institut National des Sciences Appliqu\u00e9es", "aff_unique_dep": "", "aff_unique_url": "https://www.insa-lyon.fr", "aff_unique_abbr": "INSA Lyon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lyon", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "LMSYS-Chat-1M: A Large-Scale Real-World LLM Conversation Dataset", "status": "Spotlight", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19219", "id": "BOfDKxfwt0", "author_site": "Lianmin Zheng, Wei-Lin Chiang, Ying Sheng, Tianle Li, Siyuan Zhuang, Zhanghao Wu, Yonghao Zhuang, Zhuohan Li, Zi Lin, Eric Xing, Joseph E Gonzalez, Ion Stoica, Hao Zhang", "tldr": "", "abstract": "Studying how people interact with large language models (LLMs) in real-world scenarios is increasingly important due to their widespread use in various applications. In this paper, we introduce LMSYS-Chat-1M, a large-scale dataset containing one million real-world conversations with 25 state-of-the-art LLMs. This dataset is collected from 210K unique IP addresses in the wild on our Vicuna demo and Chatbot Arena website. We offer an overview of the dataset's content, including its curation process, basic statistics, and topic distribution, highlighting its diversity, originality, and scale. We demonstrate its versatility through four use cases: developing content moderation models that perform similarly to GPT-4, building a safety benchmark, training instruction-following models that perform similarly to Vicuna, and creating challenging benchmark questions. We believe that this dataset will serve as a valuable resource for understanding and advancing LLM capabilities. The dataset is publicly available at https://huggingface.co/datasets/lmsys/lmsys-chat-1m.", "keywords": "large language models;dataset;conversation;safety;benchmark", "primary_area": "datasets and benchmarks", "supplementary_material": "", "author": "Lianmin Zheng;Wei-Lin Chiang;Ying Sheng;Tianle Li;Siyuan Zhuang;Zhanghao Wu;Yonghao Zhuang;Zhuohan Li;Zi Lin;Eric Xing;Joseph E. Gonzalez;Ion Stoica;Hao Zhang", "authorids": "~Lianmin_Zheng2;~Wei-Lin_Chiang1;~Ying_Sheng1;~Tianle_Li2;~Siyuan_Zhuang1;~Zhanghao_Wu1;~Yonghao_Zhuang1;~Zhuohan_Li1;~Zi_Lin1;~Eric_Xing1;~Joseph_E._Gonzalez1;~Ion_Stoica1;~Hao_Zhang2", "gender": "M;;F;M;M;M;M;M;F;M;M;M;M", "homepage": "http://lmzheng.net/;https://infwinston.github.io/;https://sites.google.com/view/yingsheng;;https://suquark.github.io/;https://zhanghaowu.me;https://zyhowell.github.io/;http://zhuohan.li;https://zi-lin.com/;http://www.cs.cmu.edu/~epxing/;http://eecs.berkeley.edu/~jegonzal;http://people.eecs.berkeley.edu/~istoica/;https://cseweb.ucsd.edu/~haozhang/", "dblp": "211/7027;174/2148;262/6232.html;;;;;;81/2999;36/3855;61/8262;s/IonStoica;55/2270-25", "google_scholar": "_7Q8uIYAAAAJ;https://scholar.google.com/citations?hl=en;xMhGYpgAAAAJ;1M79iLwAAAAJ;KSZmI5EAAAAJ;YfyMDFgAAAAJ;oh297TsAAAAJ;;kgZYttUAAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;vN-is70AAAAJ;H1d4BS8AAAAJ", "orcid": ";;0000-0002-1883-2126;;0009-0007-3787-0316;;;;;;0000-0003-2921-956X;;", "linkedin": ";;;tianleli/;siyuanzhuang;;;;zi-lin/;;;ionstoica;", "or_profile": "~Lianmin_Zheng2;~Wei-Lin_Chiang1;~Ying_Sheng1;~Tianle_Li2;~Siyuan_Zhuang1;~Zhanghao_Wu1;~Yonghao_Zhuang1;~Zhuohan_Li1;~Zi_Lin1;~Eric_Xing1;~Joseph_E._Gonzalez1;~Ion_Stoica1;~Hao_Zhang2", "aff": "University of California, Berkeley;University of California, Berkeley;Stanford University;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Carnegie Mellon University;University of California, Berkeley;University of California, San Diego;School of Computer Science, Carnegie Mellon University;University of California, Berkeley;University of California, Berkeley;Carnegie Mellon University", "aff_domain": "berkeley.edu;berkeley.edu;stanford.edu;berkeley.edu;berkeley.edu;berkeley.edu;andrew.cmu.edu;berkeley.edu;ucsd.edu;cs.cmu.edu;berkeley.edu;berkeley.edu;cmu.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;PhD student;PhD student;PhD student;PhD student;Graduate student;Full Professor;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzheng2024lmsyschatm,\ntitle={{LMSYS}-Chat-1M: A Large-Scale Real-World {LLM} Conversation Dataset},\nauthor={Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Tianle Li and Siyuan Zhuang and Zhanghao Wu and Yonghao Zhuang and Zhuohan Li and Zi Lin and Eric Xing and Joseph E. Gonzalez and Ion Stoica and Hao Zhang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BOfDKxfwt0}\n}", "github": "", "project": "", "reviewers": "M7pJ;kFoy;hTkb;D9AT", "pdf_size": 916611, "rating": "6;8;8;8", "confidence": "4;4;4;4", "soundness": "2;3;4;3", "contribution": "4;4;3;4", "presentation": "3;3;3;3", "wc_summary": "55;47;69;45", "wc_strengths": "94;110;94;81", "wc_weaknesses": "61;70;141;26", "wc_questions": "2;10;2;32", "wc_review": "212;237;306;184", "wc_reply_reviewers": "0;0;10;10", "wc_reply_authors": "196;163;331;178", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.0, 9.433981132056603 ], "wc_strengths_avg": [ 94.75, 10.280442597476044 ], "wc_weaknesses_avg": [ 74.5, 41.76421913552317 ], "wc_questions_avg": [ 11.5, 12.278029157808675 ], "wc_review_avg": [ 234.75, 45.20716204319842 ], "wc_reply_reviewers_avg": [ 5.0, 5.0 ], "wc_reply_authors_avg": [ 217.0, 66.84683986547158 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 160, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1730324882341676130&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "openreview": "https://openreview.net/forum?id=BOfDKxfwt0", "pdf": "https://openreview.net/pdf?id=BOfDKxfwt0", "email": "berkeley.edu;berkeley.edu;stanford.edu;berkeley.edu;berkeley.edu;berkeley.edu;andrew.cmu.edu;berkeley.edu;ucsd.edu;cs.cmu.edu;berkeley.edu;berkeley.edu;cmu.edu", "author_num": 13, "aff_unique_index": "0;0;1;0;0;0;2;0;3;2;0;0;2", "aff_unique_norm": "University of California, Berkeley;Stanford University;Carnegie Mellon University;University of California, San Diego", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.berkeley.edu;https://www.stanford.edu;https://www.cmu.edu;https://www.ucsd.edu", "aff_unique_abbr": "UC Berkeley;Stanford;CMU;UCSD", "aff_campus_unique_index": "0;0;1;0;0;0;0;3;4;0;0", "aff_campus_unique": "Berkeley;Stanford;;San Diego;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "BOm1RYdHHu", "title": "SAFHE: Defending Against Backdoor and Gradient Inversion Attacks in Federated Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated learning (FL) is an increasingly popular approach in machine learning that enables a set of clients to jointly train a global model without ever sharing their private data, using a central server to aggregate clients' local weight updates. However, previous work has shown that the distributed nature of federated learning makes it susceptible to two major attacks: backdoor attacks, where malicious clients submit large weights that incorrectly change model behavior, and gradient inversion attacks, where a malicious eavesdropper is able to reconstruct the clients' training data by viewing the weight updates sent by clients to the central server. Although various solutions have been proposed in the literature that defend against these two attacks separately, present approaches remain largely incompatible, creating a trade-off between defending against the two types of attacks. This poses a major challenge in deploying FL in privacy-sensitive ML applications.\n\nWe present SAFHE (Secure Aggregation with Fully Homomorphic Encryption), a novel scheme to defend against both backdoor attacks and gradient inversion attacks. Our secure aggregation method combines the use of fully homomorphic encryption (FHE) and the gradient norm clipping defense to defend against large malicious client updates, by pre-weighting client updates using a function that can be evaluated in the encrypted domain. This allows the server to reject large-magnitude updates without seeing their cleartext values. We demonstrate that Chebyshev approximations of a product of sigmoids work for this purpose, and perform simulations suggesting that such a scheme can defend against backdoor attacks without significantly impacting model accuracy. Additionally, we show that these approximations can be accurately and efficiently computed in the encrypted domain.", "keywords": "federated learning;fully homomorphic encryption;backdoor attacks;gradient inversion attacks", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "", "author": "Jordan Barkin;Ratip Emin Berker;S\u00edlvia Casacuberta;Janet Li", "authorids": "~Jordan_Barkin1;~Ratip_Emin_Berker1;~S\u00edlvia_Casacuberta1;~Janet_Li1", "gender": "M;M;;F", "homepage": ";;;", "dblp": ";315/8807.html;;", "google_scholar": ";HxaUFCkAAAAJ;;", "orcid": ";;;", "linkedin": "jordan-barkin/;;;janetli19/", "or_profile": "~Jordan_Barkin1;~Ratip_Emin_Berker1;~S\u00edlvia_Casacuberta1;~Janet_Li1", "aff": ";Carnegie Mellon University;;Harvard University", "aff_domain": ";cs.cmu.edu;;harvard.edu", "position": ";PhD student;;Undergrad student", "bibtex": "@misc{\nbarkin2024safhe,\ntitle={{SAFHE}: Defending Against Backdoor and Gradient Inversion Attacks in Federated Learning},\nauthor={Jordan Barkin and Ratip Emin Berker and S{\\'\\i}lvia Casacuberta and Janet Li},\nyear={2024},\nurl={https://openreview.net/forum?id=BOm1RYdHHu}\n}", "github": "", "project": "", "reviewers": "wYb4;uyJ6;cV7D", "site": "https://openreview.net/forum?id=BOm1RYdHHu", "pdf_size": 922413, "rating": "1;5;6", "confidence": "5;5;2", "soundness": "1;2;3", "contribution": "1;2;3", "presentation": "2;3;3", "wc_summary": "88;48;70", "wc_strengths": "13;40;84", "wc_weaknesses": "528;29;62", "wc_questions": "340;8;138", "wc_review": "969;125;354", "wc_reply_reviewers": "680;101;71", "wc_reply_authors": "2094;778;1398", "reply_reviewers": "1;1;1", "reply_authors": "3;1;2", "rating_avg": [ 4.0, 2.160246899469287 ], "confidence_avg": [ 4.0, 1.4142135623730951 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 68.66666666666667, 16.35712552851373 ], "wc_strengths_avg": [ 45.666666666666664, 29.261275129806325 ], "wc_weaknesses_avg": [ 206.33333333333334, 227.85131604233104 ], "wc_questions_avg": [ 162.0, 136.59673007311218 ], "wc_review_avg": [ 482.6666666666667, 356.3709054092691 ], "wc_reply_reviewers_avg": [ 284.0, 280.28200084914477 ], "wc_reply_authors_avg": [ 1423.3333333333333, 537.5533048503706 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6546536707079772, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WGcafUPoGVUJ:scholar.google.com/&scioq=SAFHE:+Defending+Against+Backdoor+and+Gradient+Inversion+Attacks+in+Federated+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.harvard.edu", "aff_unique_abbr": "CMU;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Demystifying Poisoning Backdoor Attacks from a Statistical Perspective", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19218", "id": "BPHcEpGvF8", "author_site": "Ganghua Wang, Xun Xian, Ashish Kundu, Jayanth Srinivasa, Xuan Bi, Mingyi Hong, Jie Ding", "tldr": "", "abstract": "Backdoor attacks pose a significant security risk to machine learning applications due to their stealthy nature and potentially serious consequences. Such attacks involve embedding triggers within a learning model with the intention of causing malicious behavior when an active trigger is present while maintaining regular functionality without it. This paper derives a fundamental understanding of backdoor attacks that applies to both discriminative and generative models, including diffusion models and large language models. We evaluate the effectiveness of any backdoor attack incorporating a constant trigger, by establishing tight lower and upper boundaries for the performance of the compromised model on both clean and backdoor test data. The developed theory answers a series of fundamental but previously underexplored problems, including (1) what are the determining factors for a backdoor attack's success, (2) what is the direction of the most effective backdoor attack, and (3) when will a human-imperceptible trigger succeed. We demonstrate the theory by conducting experiments using benchmark datasets and state-of-the-art backdoor attack scenarios. Our code is available \\href{https://github.com/KeyWgh/DemystifyBackdoor}{here}.", "keywords": "backdoor attack;machine learning safety;asymptotic;statistical risk", "primary_area": "societal considerations including fairness, safety, privacy", "supplementary_material": "/attachment/d1d4f7b70b114eb7ce8739d1ff6e5ea4fa6435a9.zip", "author": "Ganghua Wang;Xun Xian;Ashish Kundu;Jayanth Srinivasa;Xuan Bi;Mingyi Hong;Jie Ding", "authorids": "~Ganghua_Wang1;~Xun_Xian1;~Ashish_Kundu1;~Jayanth_Srinivasa1;~Xuan_Bi1;~Mingyi_Hong1;~Jie_Ding2", "gender": "M;M;;M;;M;M", "homepage": "https://gwang.umn.edu;https://jeremyxianx.github.io/;;;;http://people.ece.umn.edu/~mhong/mingyi.html;http://jding.org", "dblp": "200/9632;262/3278;;285/5006;;57/8053;94/1825-2", "google_scholar": ";https://scholar.google.com/citations?hl=en;;HtNfeKYAAAAJ;F3eRk9MAAAAJ;qRnP-p0AAAAJ;ZyqvoqcAAAAJ", "orcid": "0000-0002-0888-167X;;;;;;", "linkedin": ";;;;;;", "or_profile": "~Ganghua_Wang1;~Xun_Xian1;~Ashish_Kundu1;~Jayanth_Srinivasa1;~Xuan_Bi1;~Mingyi_Hong1;~Jie_Ding2", "aff": "University of Minnesota, Minneapolis;University of Minnesota, Minneapolis;;Cisco;University of Minnesota - Twin Cities;University of Minnesota, Minneapolis;University of Minnesota - Twin Cities", "aff_domain": "umn.edu;umn.edu;;cisco.com;umn.edu;umn.edu;umn.edu", "position": "PhD student;PhD student;;Researcher;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024demystifying,\ntitle={Demystifying Poisoning Backdoor Attacks from a Statistical Perspective},\nauthor={Ganghua Wang and Xun Xian and Ashish Kundu and Jayanth Srinivasa and Xuan Bi and Mingyi Hong and Jie Ding},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BPHcEpGvF8}\n}", "github": "", "project": "", "reviewers": "DhoY;76rk;DvbB;bWko", "pdf_size": 1750496, "rating": "3;6;6;8", "confidence": "5;3;3;4", "soundness": "2;3;3;3", "contribution": "1;2;3;4", "presentation": "2;3;2;3", "wc_summary": "137;95;118;48", "wc_strengths": "20;33;76;63", "wc_weaknesses": "184;155;253;118", "wc_questions": "6;3;1;15", "wc_review": "347;286;448;244", "wc_reply_reviewers": "745;42;65;0", "wc_reply_authors": "1596;806;836;421", "reply_reviewers": "3;1;1;0", "reply_authors": "4;3;3;2", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 99.5, 33.24530041975858 ], "wc_strengths_avg": [ 48.0, 22.4610774452162 ], "wc_weaknesses_avg": [ 177.5, 49.46968768852296 ], "wc_questions_avg": [ 6.25, 5.356071321407137 ], "wc_review_avg": [ 331.25, 76.71171683647812 ], "wc_reply_reviewers_avg": [ 213.0, 308.03327742307323 ], "wc_reply_authors_avg": [ 914.75, 426.0043280296575 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5488604301969737, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2162895515699683008&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "openreview": "https://openreview.net/forum?id=BPHcEpGvF8", "pdf": "https://openreview.net/pdf?id=BPHcEpGvF8", "email": "umn.edu;umn.edu;;cisco.com;umn.edu;umn.edu;umn.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "University of Minnesota;Cisco Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.minnesota.edu;https://www.cisco.com", "aff_unique_abbr": "UMN;Cisco", "aff_campus_unique_index": "0;0;2;0;2", "aff_campus_unique": "Minneapolis;;Twin Cities", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "FreeReg: Image-to-Point Cloud Registration Leveraging Pretrained Diffusion Models and Monocular Depth Estimators", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19217", "id": "BPb5AhT2Vf", "author_site": "Haiping Wang, Yuan Liu, Bing WANG, YUJING SUN, Zhen Dong, Wenping Wang, Bisheng Yang", "tldr": "", "abstract": "Matching cross-modality features between images and point clouds is a fundamental problem for image-to-point cloud registration. However, due to the modality difference between images and points, it is difficult to learn robust and discriminative cross-modality features by existing metric learning methods for feature matching. Instead of applying metric learning on cross-modality data, we propose to unify the modality between images and point clouds by pretrained large-scale models first, and then establish robust correspondence within the same modality. We show that the intermediate features, called diffusion features, extracted by depth-to-image diffusion models are semantically consistent between images and point clouds, which enables the building of coarse but robust cross-modality correspondences. We further extract geometric features on depth maps produced by the monocular depth estimator. By matching such geometric features, we significantly improve the accuracy of the coarse correspondences produced by diffusion features. Extensive experiments demonstrate that without any task-specific training, direct utilization of both features produces accurate image-to-point cloud registration. On three public indoor and outdoor benchmarks, the proposed method averagely achieves a 20.6 percent improvement in Inlier Ratio, a $3.0\\times$ higher Inlier Number, and a 48.6 percent improvement in Registration Recall than existing state-of-the-arts. The code and additional results are available at \\url{https://whu-usi3dv.github.io/FreeReg/}.", "keywords": "Image-to-point cloud registration;cross-modality feature extraction;diffusion models", "primary_area": "visualization or interpretation of learned representations", "supplementary_material": "", "author": "Haiping Wang;Yuan Liu;Bing WANG;YUJING SUN;Zhen Dong;Wenping Wang;Bisheng Yang", "authorids": "~Haiping_Wang1;~Yuan_Liu3;~Bing_WANG8;~YUJING_SUN2;~Zhen_Dong4;~Wenping_Wang1;~Bisheng_Yang1", "gender": "M;M;M;;M;M;M", "homepage": "https://hpwang-whu.github.io/;https://liuyuan-pal.github.io/;http://www.cs.ox.ac.uk/people/bing.wang/;https://yujingsun.github.io/;https://dongzhenwhu.github.io/index.html;https://engineering.tamu.edu/cse/profiles/Wang-Wenping.html;", "dblp": ";87/2948-25;06/1909-13;64/8656-1;;;", "google_scholar": "https://scholar.google.com.hk/citations?user=YAdDCr0AAAAJ;yRAHVcgAAAAJ;W7QhPeUAAAAJ;https://scholar.google.com.hk/citations?user=AC9Ky6AAAAAJ;https://scholar.google.com/citations?hl=zh-CN;28shvv0AAAAJ;TJkm8igAAAAJ", "orcid": ";;0000-0003-0977-0426;0000-0003-0819-296X;;0000-0002-2284-3952;", "linkedin": ";;;;;;", "or_profile": "~Haiping_Wang1;~Yuan_Liu3;~Bing_WANG8;~YUJING_SUN2;~Zhen_Dong4;~Wenping_Wang1;~Bisheng_Yang1", "aff": "Wuhan University;The University of Hong Kong;Hong Kong Polytechnic University;the University of Hong Kong, University of Hong Kong;Wuhan University;Texas A&M University - College Station;Wuhan University", "aff_domain": "whu.edu.cn;hku.hk;polyu.edu.hk;cs.hku.hk;whu.edu;tamu.edu;whu.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Research Assistant Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024freereg,\ntitle={FreeReg: Image-to-Point Cloud Registration Leveraging Pretrained Diffusion Models and Monocular Depth Estimators},\nauthor={Haiping Wang and Yuan Liu and Bing WANG and YUJING SUN and Zhen Dong and Wenping Wang and Bisheng Yang},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BPb5AhT2Vf}\n}", "github": "", "project": "", "reviewers": "fhKS;BaLi;zNJ2", "pdf_size": 21920377, "rating": "6;6;8", "confidence": "3;4;2", "soundness": "2;2;3", "contribution": "3;2;3", "presentation": "3;3;3", "wc_summary": "24;88;127", "wc_strengths": "24;55;60", "wc_weaknesses": "220;89;92", "wc_questions": "4;27;102", "wc_review": "272;259;381", "wc_reply_reviewers": "213;0;18", "wc_reply_authors": "1352;428;724", "reply_reviewers": "2;0;1", "reply_authors": "4;1;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.66666666666667, 42.460439103816256 ], "wc_strengths_avg": [ 46.333333333333336, 15.923427883328248 ], "wc_weaknesses_avg": [ 133.66666666666666, 61.05916984550496 ], "wc_questions_avg": [ 44.333333333333336, 41.84362423223984 ], "wc_review_avg": [ 304.0, 54.705270922157645 ], "wc_reply_reviewers_avg": [ 77.0, 96.44687656943589 ], "wc_reply_authors_avg": [ 834.6666666666666, 385.2525866954764 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11736741740449634379&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "openreview": "https://openreview.net/forum?id=BPb5AhT2Vf", "pdf": "https://openreview.net/pdf?id=BPb5AhT2Vf", "email": "whu.edu.cn;hku.hk;polyu.edu.hk;cs.hku.hk;whu.edu;tamu.edu;whu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;1;0;3;0", "aff_unique_norm": "Wuhan University;University of Hong Kong;Hong Kong Polytechnic University;Texas A&M University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.hku.hk;https://www.polyu.edu.hk;https://www.tamu.edu", "aff_unique_abbr": "WHU;HKU;PolyU;TAMU", "aff_campus_unique_index": "1;1;1;2", "aff_campus_unique": ";Hong Kong SAR;College Station", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "BPdagk1mV7", "title": "Implicit Semi-auto-regressive Image-to-Video Diffusion", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion models have demonstrated exceptional performance in various generative domains, particularly in the context of image and video generation. Despite their remarkable success, image-to-video (I2V) generation still remains a formidable challenge for most existing methods. Prior research has primarily concentrated on temporally modeling the entire video sequence, resulting in semantic correspondence but often lacking consistency with the initial image input in detail. In this paper, we present a novel temporal recurrent look-back approach for modeling video dynamics, leveraging prior information from the first frame (provided as a given image) as an implicit semi-auto-regressive process. Conditioned solely on preceding frames, our approach achieves enhanced consistency with the initial frame, thus avoiding unexpected generation results. Furthermore, we introduce a hybrid input initialization strategy to enhance the propagation of information within the look-back module. Our extensive experiments demonstrate that our approach is able to generate video clips with greater detail consistency relative to the provided image.", "keywords": "video generation;diffusion model", "primary_area": "generative models", "supplementary_material": "/attachment/7ac911870a6944b6c3b08f1335f53fcfee3a2407.zip", "author": "Tianyi Li;Kai Wang;Ziheng Qin;David Junhao Zhang;Tianle Zhang;Junbo Zhao;Mike Zheng Shou;Yang You", "authorids": "~Tianyi_Li5;~Kai_Wang8;~Ziheng_Qin1;~David_Junhao_Zhang1;~Tianle_Zhang4;~Junbo_Zhao1;~Mike_Zheng_Shou1;~Yang_You1", "gender": "M;M;M;M;M;M;M;", "homepage": "https://scholar.google.com/citations?user=pHXKrL0AAAAJ;https://kaiwang960112.github.io/;;;http://jakezhao.net/;https://www.comp.nus.edu.sg/~youy/;https://scholar.google.com/citations?user=6dCcnNEAAAAJ&hl=en;http://www.columbia.edu/~zs2262/", "dblp": ";78/2022-36;342/2679;;191/6665;33/8167-1.html;307/3295;284/0807", "google_scholar": ";i2II0XIAAAAJ;I04VhPMAAAAJ;;8ipao8MAAAAJ;jF4dPZwAAAAJ;6dCcnNEAAAAJ;h1-3lSoAAAAJ", "orcid": ";0000-0002-1154-5175;0009-0001-8571-1228;0000-0003-1502-9730;;;;", "linkedin": ";;ziheng-qin-635551184/;;;yang-you-0b92914b/;;", "or_profile": "~Tianyi_Li5;~Kai_Wang8;~Ziheng_Qin1;~Tianle_Zhang4;~Junbo_Zhao1;~Yang_You1;~Junhao_Zhang1;~Zheng_Shou1", "aff": "Zhejiang University;National University of Singapore;ByteDance Inc.;University of Electronic Science and Technology of China;Zhejiang University;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "zju.edu.cn;u.nus.edu;bytedance.com;cn.edu;zju.edu.cn;nus.edu.sg;nus.edu;nus.edu.sg", "position": "Undergrad student;PhD student;Intern;Undergrad student;Assistant Professor;Professor;PhD student;Assistant Professor", "bibtex": "@misc{\nli2024implicit,\ntitle={Implicit Semi-auto-regressive Image-to-Video Diffusion},\nauthor={Tianyi Li and Kai Wang and Ziheng Qin and David Junhao Zhang and Tianle Zhang and Junbo Zhao and Mike Zheng Shou and Yang You},\nyear={2024},\nurl={https://openreview.net/forum?id=BPdagk1mV7}\n}", "github": "", "project": "", "reviewers": "sTJV;hgs2;PWSR;7B9z", "site": "https://openreview.net/forum?id=BPdagk1mV7", "pdf_size": 3654334, "rating": "3;5;5;5", "confidence": "5;4;4;3", "soundness": "1;2;3;2", "contribution": "2;2;2;2", "presentation": "2;3;2;3", "wc_summary": "83;59;110;66", "wc_strengths": "30;6;69;75", "wc_weaknesses": "138;175;431;124", "wc_questions": "7;27;10;89", "wc_review": "258;267;620;354", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.5, 19.653244007033546 ], "wc_strengths_avg": [ 45.0, 28.38133189263675 ], "wc_weaknesses_avg": [ 217.0, 124.949989995998 ], "wc_questions_avg": [ 33.25, 33.07850510527947 ], "wc_review_avg": [ 374.75, 146.47418714572203 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ElPuD3lKsgEJ:scholar.google.com/&scioq=Implicit+Semi-auto-regressive+Image-to-Video+Diffusion&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0;1;1;1", "aff_unique_norm": "Zhejiang University;National University of Singapore;ByteDance;University of Electronic Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.nus.edu.sg;https://www.bytedance.com;https://www.uestc.edu.cn", "aff_unique_abbr": "ZJU;NUS;ByteDance;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1;1;1", "aff_country_unique": "China;Singapore" }, { "id": "BQvbL2sFQx", "title": "Model-Agnostic Shift-Equivariant Downsampling", "track": "main", "status": "Reject", "tldr": "", "abstract": "The performance of convolutional neural networks (CNNs) are thought to be insensitive to image shifts. However, recent studies have revealed that downsampling layers in CNNs result in inconsistent outputs for shifted input images. In this\nstudy, we present an approach for performing downsampling that ensures absolute shift equivariance. By employing model-agnostic downsampling method that leverages origin selection functions obtained from coordinate-independent statistics of the feature map, we can achieve perfect shift equivariance, while still adhering to the conventional downsampling procedures. Our method allows CNNs to exhibit both improved accuracy and perfect shift invariance for image classification, while also achieving shift equivariance in semantic segmentation benchmarks. Furthermore, we introduce a methodology for achieving shift equivariance without the need for any additional training process. This is accomplished by transferring pretrained weights and replacing existing layers with shift-equivariant\ncounterparts. Additionaly, we show that fine-tuning of the modified CNNs leads superior performance compared to previously proposed models.", "keywords": "Shift equivariance;Shift invariance;Downsampling;Convolutional neural networks", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "", "author": "Myungjoon Kim;Arthur Baucour;Jonghwa Shin", "authorids": "~Myungjoon_Kim1;~Arthur_Baucour1;~Jonghwa_Shin1", "gender": "M;M;M", "homepage": "https://github.com/myungjoon;;https://apmd.kaist.ac.kr", "dblp": ";;", "google_scholar": "https://scholar.google.co.kr/citations?user=dEDl6yQAAAAJ;HQLRKVkAAAAJ;O_qshZoAAAAJ", "orcid": ";0000-0002-8251-5504;0000-0003-0712-464X", "linkedin": ";arthur-baucour/?locale=en_US;", "or_profile": "~Myungjoon_Kim1;~Arthur_Baucour1;~Jonghwa_Shin1", "aff": "Korea Advanced Institute of Science & Technology;;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;;kaist.ac.kr", "position": "Postdoc;;Associate Professor", "bibtex": "@misc{\nkim2024modelagnostic,\ntitle={Model-Agnostic Shift-Equivariant Downsampling},\nauthor={Myungjoon Kim and Arthur Baucour and Jonghwa Shin},\nyear={2024},\nurl={https://openreview.net/forum?id=BQvbL2sFQx}\n}", "github": "", "project": "", "reviewers": "1mgo;pcfU;1rzF;QNhG;gzfx", "site": "https://openreview.net/forum?id=BQvbL2sFQx", "pdf_size": 1113970, "rating": "3;3;3;6;8", "confidence": "4;4;4;4;4", "soundness": "2;3;2;4;4", "contribution": "2;3;2;3;3", "presentation": "1;3;2;3;2", "wc_summary": "78;48;15;73;63", "wc_strengths": "52;31;59;63;28", "wc_weaknesses": "527;64;463;302;92", "wc_questions": "10;124;29;42;45", "wc_review": "667;267;566;480;228", "wc_reply_reviewers": "20;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 4.6, 2.0591260281974 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 55.4, 22.650386310171402 ], "wc_strengths_avg": [ 46.6, 14.430523206037957 ], "wc_weaknesses_avg": [ 289.6, 187.89422556321418 ], "wc_questions_avg": [ 50.0, 39.00256401827962 ], "wc_review_avg": [ 441.6, 169.62617722509697 ], "wc_reply_reviewers_avg": [ 4.0, 8.0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V2OEY6K8ONgJ:scholar.google.com/&scioq=Model-Agnostic+Shift-Equivariant+Downsampling&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "BRO4PfCiwb", "title": "OS-net: Orbitally Stable Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce OS-net (Orbitally Stable neural NETworks), a new family of neural network architectures specifically designed for periodic dynamical data. OS-net is a special case of Neural Ordinary Differential Equations (NODEs) and takes fully advantage of the adjoint method based backpropagation method. Utilizing ODE theory, we derive conditions on the network weights to ensure stability of the resulting dynamics. We demonstrate the efficacy of our approach by applying OS-net to discover the dynamics underlying the R\\\"{o}ssler and Sprott's systems, two dynamical systems known for their period doubling attractors and chaotic behavior.", "keywords": "neural networks;dynamical systems;chaotic systems;periodic attractor;stable attractors", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/d4e728fe850257d5cf3e0cfdecc4d512a9f6bd73.pdf", "author": "Marieme Ngom;Carlo Graziani", "authorids": "~Marieme_Ngom1;~Carlo_Graziani1", "gender": ";M", "homepage": ";https://www.anl.gov/profile/carlo-j-graziani", "dblp": ";", "google_scholar": ";V6g20IgAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Marieme_Ngom1;~Carlo_Graziani1", "aff": ";Argonne National Laboratory", "aff_domain": ";anl.gov", "position": ";Researcher", "bibtex": "@misc{\nngom2024osnet,\ntitle={{OS}-net: Orbitally Stable Neural Networks},\nauthor={Marieme Ngom and Carlo Graziani},\nyear={2024},\nurl={https://openreview.net/forum?id=BRO4PfCiwb}\n}", "github": "", "project": "", "reviewers": "Yjdz;xpia;2Hj6;YZQH", "site": "https://openreview.net/forum?id=BRO4PfCiwb", "pdf_size": 8640581, "rating": "3;3;3;5", "confidence": "2;2;4;2", "soundness": "2;2;2;2", "contribution": "2;2;2;3", "presentation": "1;2;1;2", "wc_summary": "34;42;71;103", "wc_strengths": "9;15;48;37", "wc_weaknesses": "403;247;342;83", "wc_questions": "2;5;136;17", "wc_review": "448;309;597;240", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 2.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 62.5, 27.13392710243027 ], "wc_strengths_avg": [ 27.25, 15.880412463157246 ], "wc_weaknesses_avg": [ 268.75, 120.79398784707789 ], "wc_questions_avg": [ 40.0, 55.70906568952669 ], "wc_review_avg": [ 398.5, 136.91694562763223 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ferq88qgI6cJ:scholar.google.com/&scioq=OS-net:+Orbitally+Stable+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Argonne National Laboratory", "aff_unique_dep": "", "aff_unique_url": "https://www.anl.gov", "aff_unique_abbr": "ANL", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "BRTyPCq4wL", "title": "Cascaded Contrastive Medical Language-Image Pretraining on Radiology Images", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Due to the concise design and the wonderful generalization performance, contrastive language-image pre-training (CLIP) has been investigated in the medical domain for medical image understanding. However, few studies have been done on CLIP for multilevel medical information alignment. In this paper, we proposed cascaded CLIP (casCLIP) where contrastive alignment is performed on multilevel information. In addition, we propose aligning the report with the entire image series and employ a multi-layer transformer to integrate the image embeddings from a study into a single embedding of image series. Moreover, we introduce support alignment opposition de-alignment method to enhance higher-level alignment. In this study, casCLIP was pre-trained on a dataset of chest X-ray images with reports and the high level disease information extracted from the reports. Experimental results on multiple public benchmarks demonstrate the effectiveness of our model for zero-shot classification.", "keywords": "Contrastive learning; medical imaging; multi-modality; clinical language model", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/d2b0dd4a9473a5369c5c84545cbb79023a43fdca.zip", "author": "Chengsheng Mao;Hanyin Wang;Yuan Luo", "authorids": "~Chengsheng_Mao1;~Hanyin_Wang1;~Yuan_Luo3", "gender": "M;F;M", "homepage": ";;https://www.feinberg.northwestern.edu/faculty-profiles/az/profile.html?xid=33821", "dblp": ";;90/6959-4", "google_scholar": "https://scholar.google.com/citations?hl=en;YfIObrUAAAAJ;txsHQx4AAAAJ", "orcid": ";0000-0001-9884-9683;", "linkedin": ";hanyinwang2022/;yuan-luo-16797137/", "or_profile": "~Chengsheng_Mao1;~Hanyin_Wang1;~Yuan_Luo3", "aff": "Northwestern University;Northwestern University, Northwestern University;Northwestern University", "aff_domain": "northwestern.edu;u.northwestern.edu;northwestern.edu", "position": "Assistant Professor;PhD student;Full Professor", "bibtex": "@misc{\nmao2024cascaded,\ntitle={Cascaded Contrastive Medical Language-Image Pretraining on Radiology Images},\nauthor={Chengsheng Mao and Hanyin Wang and Yuan Luo},\nyear={2024},\nurl={https://openreview.net/forum?id=BRTyPCq4wL}\n}", "github": "", "project": "", "reviewers": "V3gc;WmZb;6KtK;cZSY", "site": "https://openreview.net/forum?id=BRTyPCq4wL", "pdf_size": 789886, "rating": "3;3;5;5", "confidence": "3;4;4;4", "soundness": "2;1;2;2", "contribution": "2;1;2;3", "presentation": "3;1;3;2", "wc_summary": "130;49;75;86", "wc_strengths": "87;22;77;90", "wc_weaknesses": "193;400;682;276", "wc_questions": "27;2;169;89", "wc_review": "437;473;1003;541", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 85.0, 29.248931604419333 ], "wc_strengths_avg": [ 69.0, 27.55902755904134 ], "wc_weaknesses_avg": [ 387.75, 185.16799804501858 ], "wc_questions_avg": [ 71.75, 64.46462208064203 ], "wc_review_avg": [ 613.5, 227.9577811788841 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t3x3WoH87FAJ:scholar.google.com/&scioq=Cascaded+Contrastive+Medical+Language-Image+Pretraining+on+Radiology+Images&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Northwestern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northwestern.edu", "aff_unique_abbr": "NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DAFA: Distance-Aware Fair Adversarial Training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19216", "id": "BRdEBlwUW6", "author_site": "Hyungyu Lee, Saehyung Lee, Hyemi Jang, Junsung Park, Ho Bae, Sungroh Yoon", "tldr": "", "abstract": "The disparity in accuracy between classes in standard training is amplified during adversarial training, a phenomenon termed the robust fairness problem. Existing methodologies aimed to enhance robust fairness by sacrificing the model's performance on easier classes in order to improve its performance on harder ones. However, we observe that under adversarial attacks, the majority of the model's predictions for samples from the worst class are biased towards classes similar to the worst class, rather than towards the easy classes. Through theoretical and empirical analysis, we demonstrate that robust fairness deteriorates as the distance between classes decreases. Motivated by these insights, we introduce the Distance-Aware Fair Adversarial Training (DAFA) methodology, which addresses robust fairness by taking into account the similarities between classes. Specifically, our method assigns distinct adversarial margins and loss weights to each class and adjusts them to encourage a trade-off in robustness among similar classes. Experimental results across various datasets demonstrate that our method not only maintains average robust accuracy but also significantly improves the worst robust accuracy, indicating a marked improvement in robust fairness compared to existing methods.", "keywords": "adversarial robustness;robust fairness;adversarial examples;adversarial training", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "", "author": "Hyungyu Lee;Saehyung Lee;Hyemi Jang;Junsung Park;Ho Bae;Sungroh Yoon", "authorids": "~Hyungyu_Lee1;~Saehyung_Lee1;~Hyemi_Jang1;~Junsung_Park1;~Ho_Bae1;~Sungroh_Yoon1", "gender": "M;;M;M;;M", "homepage": ";http://data.snu.ac.kr;http://data.snu.ac.kr/;https://www.spai.co.kr;http://ailab.snu.ac.kr;https://snu.ac.kr", "dblp": "260/0442;224/0270;;199/1782;99/1474;", "google_scholar": "nS24h74AAAAJ;;;https://scholar.google.com/citations?hl=en;Bphl_fIAAAAJ;", "orcid": ";0000-0002-7736-0528;;0000-0002-5238-3547;0000-0002-2367-197X;", "linkedin": ";;;;;", "or_profile": "~Saehyung_Lee1;~Hyemi_Jang1;~Junsung_Park1;~Ho_Bae1;~Sungroh_Yoon1;~Hyungyu_Lee2", "aff": "Adobe Systems;Seoul National University;Seoul National University;Ewha Womans University;Seoul National University;Seoul National University", "aff_domain": "adobe.com;snu.ac.kr;snu.ac.kr;ewha.ac.kr;snu.ac.kr;snu.ac.kr", "position": "Intern;PhD student;PhD student;Assistant Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nlee2024dafa,\ntitle={{DAFA}: Distance-Aware Fair Adversarial Training},\nauthor={Hyungyu Lee and Saehyung Lee and Hyemi Jang and Junsung Park and Ho Bae and Sungroh Yoon},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BRdEBlwUW6}\n}", "github": "", "project": "", "reviewers": "nK3C;Wxgz;n9NT;73vn", "pdf_size": 3202744, "rating": "5;6;6;8", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "contribution": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "193;120;107;50", "wc_strengths": "30;12;75;93", "wc_weaknesses": "74;45;97;455", "wc_questions": "92;2;68;56", "wc_review": "389;179;347;654", "wc_reply_reviewers": "0;0;81;125", "wc_reply_authors": "1824;1448;2026;3162", "reply_reviewers": "0;0;1;2", "reply_authors": "3;3;4;6", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 117.5, 50.92396292512986 ], "wc_strengths_avg": [ 52.5, 32.76049450176233 ], "wc_weaknesses_avg": [ 167.75, 166.86427868180775 ], "wc_questions_avg": [ 54.5, 32.96589146375386 ], "wc_review_avg": [ 392.25, 170.3281758840856 ], "wc_reply_reviewers_avg": [ 51.5, 53.79823417176441 ], "wc_reply_authors_avg": [ 2115.0, 639.0813719707373 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17394704892862092861&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=BRdEBlwUW6", "pdf": "https://openreview.net/pdf?id=BRdEBlwUW6", "email": "adobe.com;snu.ac.kr;snu.ac.kr;ewha.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 6, "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "Adobe;Seoul National University;Ewha Womans University", "aff_unique_dep": "Adobe Systems Incorporated;;", "aff_unique_url": "https://www.adobe.com;https://www.snu.ac.kr;http://www.ewha.ac.kr", "aff_unique_abbr": "Adobe;SNU;Ewha", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United States;South Korea" }, { "id": "BRoBig6ov1", "title": "High-Order Tensor Recovery with A Tensor $U_1$ Norm", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recently, numerous tensor SVD (t-SVD)-based tensor recovery methods have emerged, showing promise in processing visual data. However, these methods often suffer from performance degradation when confronted with high-order tensor data exhibiting non-smooth changes (possibly caused by random slice permutation), commonly observed in real-world scenarios but ignored by the traditional t-SVD-based methods. Our objective in this study is to provide an effective tensor recovery technique for handling non-smooth changes in tensor data and efficiently exploring the correlations of high-order tensor data across its various dimensions. To this end, we introduce a new tensor decomposition and a new tensor norm called the Tensor U1 norm. An optimization algorithm is proposed to solve the resulting tensor completion model iteratively by combining the proximal algorithm with the Alternating Direction Method of Multipliers. Theoretical analysis showed the convergence of the algorithm to the Karush\u2013Kuhn\u2013Tucker (KKT) point of the optimization problem. Numerical experiments demonstrated the effectiveness of the proposed method in high-order tensor completion, especially for tensor data with non-smooth changes. This study fills a critical gap in the t-SVD-based tensor recovery by providing a practical and effective solution that enables the exploration of correlations in high-order tensor data across its different dimensions, even in the presence of non-smooth changes.", "keywords": "Tensor SVD; High Order Tensor Recovery; Tensor Completion", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "/attachment/d3e668c152a215584fe8a3bd3cd6287c65eff290.pdf", "author": "Jingjing Zheng;Wenzhe Wang;Xiaoqin Zhang;Yankai Cao;Xianta Jiang", "authorids": "~Jingjing_Zheng3;~Wenzhe_Wang2;~Xiaoqin_Zhang4;~Yankai_Cao1;~Xianta_Jiang2", "gender": "F;M;;M;", "homepage": "https://jzheng20.github.io/;;;https://optimal.chbe.ubc.ca;", "dblp": ";;;155/9335;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;;M-s3mjAAAAAJ;", "orcid": "0000-0003-1955-5308;;;0000-0001-9014-2552;", "linkedin": "jingjing-zheng-978303263/?originalSubdomain=ca;;;;", "or_profile": "~Jingjing_Zheng3;~Wenzhe_Wang2;~Xiaoqin_Zhang4;~Yankai_Cao1;~Xianta_Jiang2", "aff": "The University of British Columbia;;;University of British Columbia;", "aff_domain": "math.ubc.ca;;;ubc.ca;", "position": "PhD student;;;Associate Professor;", "bibtex": "@misc{\nanonymous2024highorder,\ntitle={High-Order Tensor Recovery with A Tensor \\$U\\_1\\$ Norm},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=BRoBig6ov1}\n}", "github": "", "project": "", "reviewers": "Yoac;w1M5;1gNF", "site": "https://openreview.net/forum?id=BRoBig6ov1", "pdf_size": 448595, "rating": "3;5;5", "confidence": "4;3;4", "soundness": "2;2;3", "contribution": "1;2;2", "presentation": "2;2;2", "wc_summary": "125;93;59", "wc_strengths": "132;44;24", "wc_weaknesses": "278;193;193", "wc_questions": "107;135;45", "wc_review": "642;465;321", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "770;779;598", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 92.33333333333333, 26.948510575210314 ], "wc_strengths_avg": [ 66.66666666666667, 46.91363222869115 ], "wc_weaknesses_avg": [ 221.33333333333334, 40.069384267237695 ], "wc_questions_avg": [ 95.66666666666667, 37.60614606978788 ], "wc_review_avg": [ 476.0, 131.27833027579229 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 715.6666666666666, 83.28398538868215 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9kQQ67QQUGAJ:scholar.google.com/&scioq=High-Order+Tensor+Recovery+with+A+Tensor+%24U_1%24+Norm&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "BSePKWwTUj", "title": "Multiobjective Stochastic Linear Bandits under Lexicographic Ordering", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper studies the multiobjective stochastic linear bandit (MOSLB) model under lexicographic ordering, where the agent aims to simultaneously maximize $m$ objectives in a hierarchical manner. This model has various real-world scenarios, including water resource planning and radiation treatment for cancer patients. However, there is no effort on the general MOSLB model except a special case called multiobjective multi-armed bandits. Previous literature provided a suboptimal algorithm for this special case, which enjoys a regret bound of $\\widetilde{O}(T^{2/3})$ under a priority-based regret measure. In this paper, we propose an algorithm achieving the almost optimal regret bound $\\widetilde{O}(d\\sqrt{T})$ for the MOSLB model, and its metric is the general regret. Here, $d$ is the dimension of arm vector and $T$ is the time horizon. The major novelties of our algorithm include a new arm filter and a multiple trade-off approach for exploration and exploitation. Experiments confirm the merits of our algorithms and provide compelling evidence to support our analysis.", "keywords": "multiobjective;bandits;lexicographic ordering", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Bo Xue;Xi Lin;Xiaoyuan Zhang;Qingfu Zhang", "authorids": "~Bo_Xue1;~Xi_Lin2;~Xiaoyuan_Zhang2;~Qingfu_Zhang1", "gender": "M;M;M;M", "homepage": "https://xueb1996.github.io/;https://xi-l.github.io/;;https://www.cs.cityu.edu.hk/~qzhan7/index.html", "dblp": "122/2421-4;43/489-1;;98/1240.html", "google_scholar": "1D4gVmIAAAAJ;QB_MUboAAAAJ;KQj18L8AAAAJ;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ", "orcid": "0000-0002-7295-4853;;0000-0002-3852-645X;", "linkedin": ";;;", "or_profile": "~Bo_Xue1;~Xi_Lin2;~Xiaoyuan_Zhang2;~Qingfu_Zhang1", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;City University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk", "position": "PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@misc{\nxue2024multiobjective,\ntitle={Multiobjective Stochastic Linear Bandits under Lexicographic Ordering},\nauthor={Bo Xue and Xi Lin and Xiaoyuan Zhang and Qingfu Zhang},\nyear={2024},\nurl={https://openreview.net/forum?id=BSePKWwTUj}\n}", "github": "", "project": "", "reviewers": "1R9s;1nFM;vLB6;chLG", "site": "https://openreview.net/forum?id=BSePKWwTUj", "pdf_size": 442112, "rating": "3;3;5;5", "confidence": "4;4;3;3", "soundness": "1;2;3;3", "contribution": "2;2;2;2", "presentation": "2;2;3;4", "wc_summary": "95;63;69;76", "wc_strengths": "111;22;66;81", "wc_weaknesses": "625;256;352;266", "wc_questions": "10;19;108;27", "wc_review": "841;360;595;450", "wc_reply_reviewers": "0;27;33;0", "wc_reply_authors": "1168;1071;1566;667", "reply_reviewers": "0;1;1;0", "reply_authors": "3;4;4;2", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.75, 12.028611723719408 ], "wc_strengths_avg": [ 70.0, 32.101401838549044 ], "wc_weaknesses_avg": [ 374.75, 149.22361575836447 ], "wc_questions_avg": [ 41.0, 39.147158262126766 ], "wc_review_avg": [ 561.5, 181.84952570738258 ], "wc_reply_reviewers_avg": [ 15.0, 15.149257407543116 ], "wc_reply_authors_avg": [ 1118.0, 319.6928213144612 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SZA6_kszl4QJ:scholar.google.com/&scioq=Multiobjective+Stochastic+Linear+Bandits+under+Lexicographic+Ordering&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "City University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cityu.edu.hk", "aff_unique_abbr": "CityU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "BSqVfAFJWz", "title": "The Distributional Reward Critic Architecture for Reinforcement Learning Under Confusion Matrix Reward Perturbations", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study reinforcement learning in the presence of an unknown reward perturbation. Existing methodologies for this problem make strong assumptions including reward smoothness, known perturbations, and/or perturbations that do not modify the optimal policy. We study the case of unknown arbitrary perturbations that discretize and shuffle reward space, but have the property that the true reward belongs to the most frequently observed class after perturbation. This class of perturbations generalizes existing classes (and, in the limit, all continuous bounded perturbations) and defeats existing methods. We introduce an adaptive distributional reward critic and show theoretically that it can recover the true rewards under technical conditions. Under the targeted perturbation in discrete and continuous control tasks, we win/tie the highest return in 40/57 settings (compared to 16/57 for the best baseline). Even under the untargeted perturbation, we still win an edge over the baseline designed especially for that setting.", "keywords": "Reinforcement Learning;policy gradient;reward perturbation", "primary_area": "reinforcement learning", "supplementary_material": "", "author": "Xi Chen;Zhihui Zhu;Andrew Perrault", "authorids": "~Xi_Chen42;~Zhihui_Zhu1;~Andrew_Perrault1", "gender": "M;M;M", "homepage": "https://engineering.osu.edu/people/chen.10183;https://zhihuizhu.github.io/;https://aperrault.github.io", "dblp": ";71/8081;151/3622", "google_scholar": ";gmSwszcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-5062-7958", "linkedin": ";;andrew-perrault-2b956733/", "or_profile": "~Xi_Chen42;~Zhihui_Zhu1;~Andrew_Perrault1", "aff": "Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University", "aff_domain": "osu.edu;osu.edu;osu.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nchen2024the,\ntitle={The Distributional Reward Critic Architecture for Reinforcement Learning Under Confusion Matrix Reward Perturbations},\nauthor={Xi Chen and Zhihui Zhu and Andrew Perrault},\nyear={2024},\nurl={https://openreview.net/forum?id=BSqVfAFJWz}\n}", "github": "", "project": "", "reviewers": "Jfuy;XsR2;wJ8T;m2dC", "site": "https://openreview.net/forum?id=BSqVfAFJWz", "pdf_size": 5730338, "rating": "3;5;6;6", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "contribution": "1;2;2;2", "presentation": "2;3;3;3", "wc_summary": "66;99;131;208", "wc_strengths": "29;162;100;93", "wc_weaknesses": "545;401;108;291", "wc_questions": "47;40;48;186", "wc_review": "687;702;387;778", "wc_reply_reviewers": "35;471;230;101", "wc_reply_authors": "2747;2473;1056;2262", "reply_reviewers": "1;3;1;1", "reply_authors": "5;4;3;5", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "contribution_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 126.0, 52.62603918213872 ], "wc_strengths_avg": [ 96.0, 47.090338711884414 ], "wc_weaknesses_avg": [ 336.25, 159.62044825146933 ], "wc_questions_avg": [ 80.25, 61.132540434698114 ], "wc_review_avg": [ 638.5, 149.24560295030471 ], "wc_reply_reviewers_avg": [ 209.25, 166.60188324265727 ], "wc_reply_authors_avg": [ 2134.5, 645.9792953338365 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 4.25, 0.82915619758885 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8164965809277259, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l8lSyEEwV9sJ:scholar.google.com/&scioq=The+Distributional+Reward+Critic+Architecture+for+Reinforcement+Learning+Under+Confusion+Matrix+Reward+Perturbations&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2024/poster/19215", "id": "BTKAeLqLMw", "author_site": "Wei Liu, Weihao Zeng, Keqing He, Yong Jiang, Junxian He", "tldr": "", "abstract": "Instruction tuning is a standard technique employed to align large language models to end tasks and user preferences after the initial pretraining phase. Recent research indicates the critical role of data engineering in instruction tuning -- when appropriately selected, only limited data is necessary to achieve superior performance. However, we still lack a principled understanding of what makes good instruction tuning data for alignment, and how we should select data automatically and effectively. In this work, we delve deeply into automatic data selection strategies for alignment. We start with controlled studies to measure data across three dimensions: complexity, quality, and diversity, along which we examine existing methods and introduce novel techniques for enhanced data measurement. Subsequently, we propose a simple strategy to select data samples based on the measurement. We present Deita (short for Data-Efficient Instruction Tuning for Alignment), a series of models fine-tuned from LLaMA models using data samples automatically selected with our proposed approach. When assessed through both automatic metrics and human evaluation, Deita performs better or on par with the state-of-the-art open-source alignment models such as Vicuna and WizardLM with only 6K training data samples -- 10x less than the data used in the baselines. We anticipate this work to provide clear guidelines and tools on automatic data selection, aiding researchers and practitioners in achieving data-efficient alignment.", "keywords": "data selection;instruction tuning;large language models", "primary_area": "general machine learning (i.e., none of the above)", "supplementary_material": "", "author": "Wei Liu;Weihao Zeng;Keqing He;Yong Jiang;Junxian He", "authorids": "~Wei_Liu25;~Weihao_Zeng2;~Keqing_He1;~Yong_Jiang1;~Junxian_He1", "gender": "M;M;;M;M", "homepage": "https://vpeterv.github.io/;https://zeng-wh.github.io/;https://helicqin.github.io/about/index.html;http://jiangyong.site/;https://jxhe.github.io", "dblp": "49/3283-131;174/3836;79/2314;;188/6127.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;811USNoAAAAJ;sxXZWQQAAAAJ;BIFGeoUAAAAJ", "orcid": "0000-0003-2195-2310;;;;", "linkedin": ";;;;", "or_profile": "~Wei_Liu25;~Weihao_Zeng2;~Keqing_He1;~Yong_Jiang1;~Junxian_He1", "aff": "ShanghaiTech University;Beijing University of Posts and Telecommunications;Meituan Group;Tongyi Lab;Hong Kong University of Science and Technology", "aff_domain": "shanghaitech.edu.cn;bupt.edu.cn;meituan.com;alibaba-inc.com;ust.hk", "position": "MS student;MS student;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nliu2024what,\ntitle={What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning},\nauthor={Wei Liu and Weihao Zeng and Keqing He and Yong Jiang and Junxian He},\nbooktitle={The Twelfth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=BTKAeLqLMw}\n}", "github": "", "project": "", "reviewers": "RtKh;zdke;NSy4", "pdf_size": 1834469, "rating": "5;6;8", "confidence": "2;3;4", "soundness": "2;3;2", "contribution": "2;2;3", "presentation": "3;4;3", "wc_summary": "101;57;279", "wc_strengths": "44;66;59", "wc_weaknesses": "265;77;251", "wc_questions": "179;35;95", "wc_review": "589;235;684", "wc_reply_reviewers": "135;26;0", "wc_reply_authors": "1042;728;1297", "reply_reviewers": "1;1;0", "reply_authors": "3;1;3", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 145.66666666666666, 95.97684906036224 ], "wc_strengths_avg": [ 56.333333333333336, 9.177266598624136 ], "wc_weaknesses_avg": [ 197.66666666666666, 85.5154307063286 ], "wc_questions_avg": [ 103.0, 59.0592922409336 ], "wc_review_avg": [ 502.6666666666667, 193.20167931177224 ], "wc_reply_reviewers_avg": [ 53.666666666666664, 58.482665997902515 ], "wc_reply_authors_avg": [ 1022.3333333333334, 232.70916517308802 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9819805060619659, "gs_citation": 169, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16674966283854801761&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "openreview": "https://openreview.net/forum?id=BTKAeLqLMw", "pdf": "https://openreview.net/pdf?id=BTKAeLqLMw", "email": "shanghaitech.edu.cn;bupt.edu.cn;meituan.com;alibaba-inc.com;ust.hk", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "ShanghaiTech University;Beijing University of Posts and Telecommunications;Meituan Group;Tongyi Lab;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.shanghaitech.edu.cn;http://www.bupt.edu.cn/;https://www.meituan.com;;https://www.ust.hk", "aff_unique_abbr": "ShanghaiTech;BUPT;Meituan;;HKUST", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "BTcZwitfgX", "title": "Rethinking The Dependence Between Gradients and The Initial Point in Deep Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Despite the considerable advancements in Deep Neural Networks (DNNs), their intrinsic opacity remains a challenge from their foundational design. \nIn this study, we elucidate a novel\nphenomenon wherein the representation of cumulative gradients (the\naggregate changes in iterative gradients) exhibits a certain\nindependence from the initial computation point of the gradients.\nThis implies that learned gradients can be assigned to other\narbitrarily initialized yet well-trained neural networks, while\nretaining a comparable representation to the original network.\nThis suggests that the cumulative gradients can be assigned to other arbitrarily initialized but adequately trained neural networks, maintaining a representation like the original one. \nThis occurrence is counterintuitive and can not be well explained via existing optimization theories.\nAdditionally, we observe that the learned model weights can also be\nreassigned to different neural networks. \nIn essence, these learned gradients can be viewed as a neural network with analogous representations.\nFuthermore, this reassignment of gradients and model weights can potentially mitigate catastrophic forgetting when learning multi-tasks. We provide a theoretical framework to support this claim. Our extensive experiments clearly illustrate this phenomenon and its potential to mitigate catastrophic forgetting.", "keywords": "black-box neural network", "primary_area": "learning theory", "supplementary_material": "", "author": "Hui Xu;Jie Shao", "authorids": "~Hui_Xu2;~Jie_Shao4", "gender": "M;M", "homepage": "http://cfm.uestc.edu.cn/~shaojie/;", "dblp": ";", "google_scholar": "ikbw5okAAAAJ;", "orcid": "0000-0003-2615-1555;0000-0003-2081-555X", "linkedin": ";", "or_profile": "~Jie_Shao4;~Hui.kim_Xu1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;uestc.edu.cn", "position": "Professor;Associate Professor", "bibtex": "@misc{\nxu2024rethinking,\ntitle={Rethinking The Dependence Between Gradients and The Initial Point in Deep Learning},\nauthor={Hui Xu and Jie Shao},\nyear={2024},\nurl={https://openreview.net/forum?id=BTcZwitfgX}\n}", "github": "", "project": "", "reviewers": "izxw;gRM6;vmiY;gVxp", "site": "https://openreview.net/forum?id=BTcZwitfgX", "pdf_size": 409432, "rating": "1;3;3;3", "confidence": "4;4;4;5", "soundness": "1;2;1;2", "contribution": "1;3;1;1", "presentation": "1;2;1;2", "wc_summary": "76;167;233;80", "wc_strengths": "9;147;24;52", "wc_weaknesses": "359;244;746;252", "wc_questions": "12;99;129;139", "wc_review": "456;657;1132;523", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 1.5, 0.5 ], "contribution_avg": [ 1.5, 0.8660254037844386 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 139.0, 65.32610504231826 ], "wc_strengths_avg": [ 58.0, 53.65165421494476 ], "wc_weaknesses_avg": [ 400.25, 204.71733561181378 ], "wc_questions_avg": [ 94.75, 49.991874339736455 ], "wc_review_avg": [ 692.0, 264.1410607989602 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TNZ5w7_MWdkJ:scholar.google.com/&scioq=Rethinking+The+Dependence+Between+Gradients+and+The+Initial+Point+in+Deep+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "BTd5Tak69u", "title": "Unsupervised Learning of Object-Centric Representation from Multi-Viewpoint Scenes", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Objects in a 2D image are influenced by factors like perspective, illumination, and occlusion in the corresponding 3D scene. This results in the challenge of identifying objects across different viewpoints. Humans can effortlessly identify objects from different viewpoints by recognizing their invariant characteristics in 3D dimensions. Motivated by this observation, we propose an object-centric learning method named Learning Object-centric Representation from Multi-viewpoint (LORM), which learns the representations of objects from multi-viewpoint scenes without any supervision. LORM leverages a novel slot attention encoder to decompose the representation of a scene into two distinct components: a viewpoint representation and several object representations. The former encompasses the viewpoint-dependent attributes (i.e., camera position and lighting) of the image observed from each viewpoint, while the latter captures the viewpoint-independent features (i.e., appearance, shape, scale, rotation and position) of the object across various perspectives. We propose a mixture patch decoder to enable LORM to simultaneously handle complex scenes and reconstruct an individual object's 2D appearance and shape at a specific viewpoint through the corresponding object representation and viewpoint representation. Extensive experiments are conducted on three complex simulation datasets, and the results demonstrate that our proposed method outperforms compared methods in individual object reconstruction while achieving comparable performance in scene decomposition.", "keywords": "Object-Centric Learning;Mulit-Viewpoints Learning", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/65b1c01b47c8a3004ec9b64cc8853e82ca0289a8.zip", "author": "Tonglin Chen;Yinxuan Huang;Zhimeng Shen;Jinghao Huang;Lin Zhu;Dakun Yang;Bin Li;Xiangyang Xue", "authorids": "~Tonglin_Chen1;~Yinxuan_Huang1;~Zhimeng_Shen2;~Jinghao_Huang1;~Lin_Zhu11;~Dakun_Yang1;~Bin_Li4;~Xiangyang_Xue2", "gender": "M;;;M;M;;M;M", "homepage": ";;;https://1827406014.github.io/;https://dblp.org/pid/72/527.html;;https://aimpressionist.github.io/publications;http://homepage.fudan.edu.cn//xyxue", "dblp": ";;;;72/527.html;145/5194.html;89/6764-15;84/3791", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;;;;8t97oL8AAAAJ;", "orcid": ";;;;;;0000-0002-9633-0033;0000-0002-4897-9209", "linkedin": ";;;;;;;", "or_profile": "~Tonglin_Chen1;~Yinxuan_Huang1;~Zhimeng_Shen2;~Jinghao_Huang1;~Lin_Zhu11;~Dakun_Yang1;~Bin_Li4;~Xiangyang_Xue2", "aff": "Fudan University;;;Fudan University;;;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;;;fudan.edu.cn;;;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;;;MS student;;;Full Professor;Full Professor", "bibtex": "@misc{\nchen2024unsupervised,\ntitle={Unsupervised Learning of Object-Centric Representation from Multi-Viewpoint Scenes},\nauthor={Tonglin Chen and Yinxuan Huang and Zhimeng Shen and Jinghao Huang and Lin Zhu and Dakun Yang and Bin Li and Xiangyang Xue},\nyear={2024},\nurl={https://openreview.net/forum?id=BTd5Tak69u}\n}", "github": "", "project": "", "reviewers": "LpCd;AcUg;UJby", "site": "https://openreview.net/forum?id=BTd5Tak69u", "pdf_size": 4390086, "rating": "3;3;5", "confidence": "4;3;4", "soundness": "3;2;3", "contribution": "2;2;2", "presentation": "2;1;3", "wc_summary": "31;59;40", "wc_strengths": "19;42;42", "wc_weaknesses": "246;565;126", "wc_questions": "2;33;10", "wc_review": "298;699;218", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 43.333333333333336, 11.671427600007732 ], "wc_strengths_avg": [ 34.333333333333336, 10.842303978193728 ], "wc_weaknesses_avg": [ 312.3333333333333, 185.25717859835342 ], "wc_questions_avg": [ 15.0, 13.140268896284683 ], "wc_review_avg": [ 405.0, 210.43922321341776 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Qp5scWleTboJ:scholar.google.com/&scioq=Unsupervised+Learning+of+Object-Centric+Representation+from+Multi-Viewpoint+Scenes&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "BUDxvMRkc4", "title": "BLG: BALANCED LANGUAGE DISTRIBUTION AS GUIDANCE FOR ROBUST LONG-TAILED VISION CLASSIFICATION", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, pre-trained contrastive visual-linguistic models such as CLIP have shown promising multi-modal capabilities in processing various downstream vision tasks. However, their effectiveness in handling the long-tailed vision recognition problem remains under-explored. In this work, we observe that \\textit{textual features from fine-tuned CLIP are relatively balanced and discriminative than the visual features}. Based on this observation, we propose to leverage balanced text features as prototypes to guide disentangled robust representation learning of biased visual features. Specifically, we first fine-tune CLIP via contrastive learning to help the encoders adapt to the target imbalanced dataset. Then we freeze the vision encoder and employ a linear adapter to refine the biased vision representation. For final vision recognition, a linear classifier initialized by fine-tuned textual features is integrated into the framework, where we consider the weights of the classifier as prototypes. For robust vision representation learning, we introduce a principled approach where we minimize the optimal transport distance between refined visual features and prototypes to help disentangle the biased vision features and continuously optimize prototypes moving towards the class center. We also design a supervised contrastive learning loss based on the transport plan to introduce more supervised signals and class-level information for further robust representation learning. Extensive experiments on long-tailed vision recognition benchmarks demonstrate the superiority of our method in using vision-language information for imbalanced visual recognition, achieving state-of-the-art (SOTA) performance.", "keywords": "Long-tailed vision recognition;multi-modality;optimal transport", "primary_area": "representation learning for computer vision, audio, language, and other modalities", "supplementary_material": "/attachment/b805fb2185572ba7feeabfb2e6cf778a213f331a.zip", "author": "Zhuo Li;He Zhao;Zhen Li;Dan dan Guo;Xiang Wan", "authorids": "~Zhuo_Li5;~He_Zhao1;~Zhen_Li6;~Dan_dan_Guo1;~Xiang_Wan1", "gender": "M;;;F;M", "homepage": ";;;https://github.com/Dan123dan;http://www.sribd.cn/teacher/28", "dblp": ";;;121/1618;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com.hk/citations?user=QLOY4JkAAAAJ;", "orcid": "0009-0000-6451-4877;;;;", "linkedin": ";;;;", "or_profile": "~Zhuo_Li5;~He_Zhao1;~Zhen_Li6;~Dan_dan_Guo1;~Xiang_Wan1", "aff": "The Chinese University of Hong Kong, Shenzhen;;;Jilin University;Shenzhen Research Institute of Big Data", "aff_domain": "link.cuhk.edu.cn;;;jlu.edu.cn;sribd.cn", "position": "PhD student;;;Lecturer;Principal Researcher", "bibtex": "@misc{\nli2024blg,\ntitle={{BLG}: {BALANCED} {LANGUAGE} {DISTRIBUTION} {AS} {GUIDANCE} {FOR} {ROBUST} {LONG}-{TAILED} {VISION} {CLASSIFICATION}},\nauthor={Zhuo Li and He Zhao and Zhen Li and Dan dan Guo and Xiang Wan},\nyear={2024},\nurl={https://openreview.net/forum?id=BUDxvMRkc4}\n}", "github": "", "project": "", "reviewers": "pMKo;nBTe;V37f", "site": "https://openreview.net/forum?id=BUDxvMRkc4", "pdf_size": 1696887, "rating": "3;5;6", "confidence": "5;4;2", "soundness": "2;3;3", "contribution": "1;2;3", "presentation": "2;3;2", "wc_summary": "57;105;63", "wc_strengths": "22;33;79", "wc_weaknesses": "221;92;100", "wc_questions": "4;163;70", "wc_review": "304;393;312", "wc_reply_reviewers": "118;264;82", "wc_reply_authors": "2089;2250;1172", "reply_reviewers": "2;1;1", "reply_authors": "6;6;3", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 75.0, 21.354156504062622 ], "wc_strengths_avg": [ 44.666666666666664, 24.689178916188272 ], "wc_weaknesses_avg": [ 137.66666666666666, 59.016005362010816 ], "wc_questions_avg": [ 79.0, 65.22269543648132 ], "wc_review_avg": [ 336.3333333333333, 40.20226638166339 ], "wc_reply_reviewers_avg": [ 154.66666666666666, 78.69491018413382 ], "wc_reply_authors_avg": [ 1837.0, 474.7975006954719 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.0, 1.4142135623730951 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9285714285714286, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Zt0xXu-bXz8J:scholar.google.com/&scioq=BLG:+BALANCED+LANGUAGE+DISTRIBUTION+AS+GUIDANCE+FOR+ROBUST+LONG-TAILED+VISION+CLASSIFICATION&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Chinese University of Hong Kong;Jilin University;Shenzhen Research Institute of Big Data", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.cn;http://www.jlu.edu.cn;http://www.sribd.cn", "aff_unique_abbr": "CUHK;JLU;", "aff_campus_unique_index": "0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "BUNkXMwfXL", "title": "Why Diffusion Models Are Stable and How to Make Them Faster: An Empirical Investigation and Optimization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Diffusion models, a potent generative framework, have garnered considerable attention in recent years. While many posit that the superiority of diffusion models stems from their stable training process compared to Generative Adversarial Networks (GANs), these assertions often rest on intuition and lack empirical substantiation. \nIn this paper, we aim to provide direct evidence to explain why diffusion models exhibit remarkable stability during training. We start by conducting a consistency experiment, where we compare the generation results of models with different hyper-parameters, such as initialization and model structure, under the same sampling conditions. Our results show that diffusion models produce consistent generation results across different hyper-parameters, indicating that they are stable in learning the mapping between noise and data. We then compare the loss landscapes of diffusion models and GANs, and find that diffusion models have much smoother loss landscapes, implying better convergence stability. Based on these analyses, we propose two optimization methods for diffusion models, namely the curriculum learning based timestep schedule (CLTS) and the momentum decay with learning rate compensation (MDLRC), which optimize the sampling probability of timesteps and the momentum selection, respectively, to accelerate convergence. For example, on ImageNet128, our methods achieve a 2.6x speedup in training, demonstrating the effectiveness of our methods.", "keywords": "diffusion model;curriculum learning;momentum decay;loss landscape", "primary_area": "generative models", "supplementary_material": "", "author": "Tianshuo Xu;Peng Mi;Ruilin Wang;Yingcong Chen", "authorids": "~Tianshuo_Xu1;~Peng_Mi1;~Ruilin_Wang1;yingcongchen@hkust-gz.edu.cn", "gender": "M;M;;", "homepage": ";https://www.github.com/Mi-Peng;https://github.com/Wangruiln;", "dblp": "304/1328;;;", "google_scholar": "ZOi14IUAAAAJ;PTM4HCsAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Tianshuo_Xu1;~Peng_Mi1;~Ruilin_Wang1;yingcongchen@hkust-gz.edu.cn", "aff": "Hong Kong University of Science and Technology (Guang Zhou);;Xiamen University;", "aff_domain": "hkust-gz.edu.cn;;xmu.edu.cn;", "position": "PhD student;;MS student;", "bibtex": "@misc{\nxu2024why,\ntitle={Why Diffusion Models Are Stable and How to Make Them Faster: An Empirical Investigation and Optimization},\nauthor={Tianshuo Xu and Peng Mi and Ruilin Wang and Yingcong Chen},\nyear={2024},\nurl={https://openreview.net/forum?id=BUNkXMwfXL}\n}", "github": "", "project": "", "reviewers": "fHcz;bVYL;sw1d", "site": "https://openreview.net/forum?id=BUNkXMwfXL", "pdf_size": 19071151, "rating": "3;3;5", "confidence": "4;4;4", "soundness": "3;1;2", "contribution": "2;1;3", "presentation": "3;3;3", "wc_summary": "53;192;108", "wc_strengths": "30;8;62", "wc_weaknesses": "95;6;152", "wc_questions": "45;289;186", "wc_review": "223;495;508", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 117.66666666666667, 57.156704671358455 ], "wc_strengths_avg": [ 33.333333333333336, 22.17105219775452 ], "wc_weaknesses_avg": [ 84.33333333333333, 60.07957685899224 ], "wc_questions_avg": [ 173.33333333333334, 100.01444340138522 ], "wc_review_avg": [ 408.6666666666667, 131.39338728498564 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 3, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:O9sHkLUgSYkJ:scholar.google.com/&scioq=Why+Diffusion+Models+Are+Stable+and+How+to+Make+Them+Faster:+An+Empirical+Investigation+and+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Xiamen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.xmu.edu.cn", "aff_unique_abbr": "HKUST;XMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "BURvGotSLz", "title": "Is Training Necessary for Representation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "The field of neural network-based encoders is currently experiencing rapid growth. However, in the pursuit of higher performance, models are becoming increasingly complex and specialized for specific datasets and tasks, resulting in a loss of generality.\nIn response to this trend, we explore the finite element method (FEM) as a general solution for feature extraction and introduce LagrangeEmbedding, an untrainable encoder with a universal architecture across various types of raw data and recognition tasks. Our experimental results demonstrate its successful application and good performance in diverse domains, including data fitting, computer vision, and natural language processing.\nLagrangeEmbedding is explainable, it adheres to the error-bound formula in FEM, which governs the relationship between mean absolute error (MAE) and the number of model parameters. \nAs the encoder has no trainable parameters, neural networks utilizing it only need to train a linear layer. This reduces gradient computation and significantly accelerates training convergence.\nOur research promises to advance machine learning by opening up new avenues for unsupervised representation learning.", "keywords": "unsupervised representation learning;universal encoder;finite element method;multi-scale mesh;multivariate Lagrange interpolation", "primary_area": "unsupervised, self-supervised, semi-supervised, and supervised representation learning", "supplementary_material": "/attachment/ce7a5cb197787a608b6f1bb42a52e03867ce1da0.zip", "author": "Zheng Li;Jerry Cheng;Huanying Gu", "authorids": "~Zheng_Li12;~Jerry_Cheng2;~Huanying_Gu1", "gender": "M;M;F", "homepage": ";https://www.nyit.edu/bio/jcheng18;", "dblp": ";;98/3698", "google_scholar": "https://scholar.google.com.hk/citations?user=JccDEo4AAAAJ;n_uoQ6MAAAAJ;sTEfizUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zheng_Li12;~Jerry_Cheng2;~Huanying_Gu1", "aff": "New York Institute of Technology;New York Institute of Technology;New York Institute of Technology", "aff_domain": "nyit.edu;nyit.edu;nyit.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nli2024is,\ntitle={Is Training Necessary for Representation Learning},\nauthor={Zheng Li and Jerry Cheng and Huanying Gu},\nyear={2024},\nurl={https://openreview.net/forum?id=BURvGotSLz}\n}", "github": "", "project": "", "reviewers": "RtZ7;YmUG;P3jk", "site": "https://openreview.net/forum?id=BURvGotSLz", "pdf_size": 5549026, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "2;2;3", "contribution": "2;3;3", "presentation": "2;4;2", "wc_summary": "71;58;88", "wc_strengths": "49;70;22", "wc_weaknesses": "313;524;94", "wc_questions": "14;44;162", "wc_review": "447;696;366", "wc_reply_reviewers": "387;206;0", "wc_reply_authors": "2045;2343;928", "reply_reviewers": "1;1;0", "reply_authors": "5;9;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 72.33333333333333, 12.283683848458853 ], "wc_strengths_avg": [ 47.0, 19.6468827043885 ], "wc_weaknesses_avg": [ 310.3333333333333, 175.55689169674378 ], "wc_questions_avg": [ 73.33333333333333, 63.88183535942662 ], "wc_review_avg": [ 503.0, 140.42079618062277 ], "wc_reply_reviewers_avg": [ 197.66666666666666, 158.10193617480533 ], "wc_reply_authors_avg": [ 1772.0, 609.0719716639953 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 5.666666666666667, 2.494438257849294 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fQ5dqJjmjrsJ:scholar.google.com/&scioq=Is+Training+Necessary+for+Representation+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.nyit.edu", "aff_unique_abbr": "NYIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "BUSZQWbRaR", "title": "Generalized Convergence Analysis of Tsetlin Machines: A Probabilistic Approach to Concept Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Tsetlin Machines (TMs) have garnered increasing interest for their ability to learn concepts via propositional formulas and their proven efficiency across various application domains. Despite this, the convergence proof for the TMs, particularly for the AND operator (\\emph{conjunction} of literals), in the generalized case (inputs greater than two bits) remains an open problem. This paper aims to fill this gap by presenting a comprehensive convergence analysis of Tsetlin automaton-based Machine Learning algorithms. We introduce a novel framework, referred to as Probabilistic Concept Learning (PCL), which simplifies the TM structure while incorporating dedicated feedback mechanisms and dedicated inclusion/exclusion probabilities for literals. Given $n$ features, PCL aims to learn a set of conjunction clauses $C_i$ each associated with a distinct inclusion probability $p_i$. Most importantly, we establish a theoretical proof confirming that, for any clause $C_k$, PCL converges to a conjunction of literals when $0.5